set more off set logtype text, perm capture log close log using Tobacco_Bar_Planned_Missingness_Study_MI.log, replace version 13 cd "C:\Users\tneilands\Box Sync\My Documents\CAPS\Methods Core\Presentations\Missing Data 2015\Part 2\Example 1 use addict_demo3.dta, clear // Multiple imputation of previous FIML example // Note: ntwrksmk had the most missing data (49%), so I initially set M = 50 // imputed data sets, following Paul Allison's rule of thumb for setting M. // Then I increase M to 75 after reviewing the FMI output from // -mi estimate- mi set flong mi register imputed smokdays srsmokr extrindx ntwrksmk race lgbtcat mi register regular age male mi describe // The dry run - with default ordering of chained equations mi impute chained (pmm) smokdays /// (logit, iter(50)) srsmokr /// (ologit, iter(50) ascontinuous) extrindx /// (regress) ntwrksmk /// (mlogit, iter(50)) race lgbtcat = age male, /// augment dryrun // The dry run - with ordering of chained equations based on the listing below mi impute chained (pmm) smokdays /// (logit, iter(50)) srsmokr /// (ologit, iter(50) ascontinuous) extrindx /// (regress) ntwrksmk /// (mlogit, iter(50)) race lgbtcat = age male, /// augment dryrun orderasis // Evaluate number of burn-in iterations (here I used the variable with the most // missing data, ntwrksmk. In an actual application, one should check each // variable). // Syntax drawn from the -mi impute chained- documentation. timer on 1 preserve mi impute chained (pmm) smokdays /// (logit, iter(50)) srsmokr /// (ologit, iter(50) ascontinuous) extrindx /// (regress) ntwrksmk /// (mlogit, iter(50)) race lgbtcat = age male, /// rseed(1359) augment chainonly chaindots burnin(100) /// savetrace(impstats, replace) orderasis use impstats, clear capture noisiliy erase TracePlot_1.pdf tsset iter tsline ntwrksmk_mean, name(gr1, replace) nodraw tsline ntwrksmk_sd, name(gr2, replace) nodraw graph combine gr1 gr2, /// title(Trace plots of summaries of imputed values) /// row(2) name(TracePlot_1, replace) graph export TracePlot_1.pdf, replace restore timer off 1 // Evaluate multiple chain convergence. // Syntax drawn from the -mi impute chained- documentation. timer on 2 preserve set more off mi impute chained (pmm) smokdays /// (logit, iter(50)) srsmokr /// (ologit, iter(50) ascontinuous) extrindx /// (regress) ntwrksmk /// (mlogit, iter(50)) race lgbtcat = age male, /// rseed(1359) chaindots augment add(3) burnin(100) savetrace(impstats2, replace) orderasis use impstats2, clear reshape wide *mean *sd, i(iter) j(m) capture noisiliy erase TracePlot_2.pdf tsset iter tsline ntwrksmk_mean1 ntwrksmk_mean2 ntwrksmk_mean3, /// ytitle(Mean of Network Smoking) yline(25.24) /// legend(rows(1) label(1 "Chain 1") label(2 "Chain 2") label(3 "Chain 3")) /// title(Overlaid trace plots of summaries of imputed values) /// name(TracePlot_2, replace) graph save TracePlot_2, replace graph export TracePlot_2.pdf, replace restore timer off 2 // Generate actual imputations used in the analysis - 75 imputed data sets timer on 3 mi extract 0, clear mi set flong mi register imputed smokdays srsmokr extrindx ntwrksmk race lgbtcat mi register regular age male mi impute chained (pmm) smokdays /// (logit, iter(50)) srsmokr /// (ologit, iter(50) ascontinuous) extrindx /// (regress) ntwrksmk /// (mlogit, iter(50)) race lgbtcat = age male, /// chaindots dots add(75) rseed(1349) augment orderasis timer off 3 // Compress and save imputed data to disk for later use compress save addict_demo_imputed.dta, replace // Load imputed data use addict_demo_imputed.dta, clear // Perform post-imputation diagnostics timer on 4 // The default -midiagplots- is to look at all the variables with any missings, // and get information about the first imputation. // tabulate categorical vars; graph kdensity for continuous vars // Warning: do not do this for more than a single imptation! // Imputed are: smokdays srsmokr extrindx ntwrksmk race lgbtcat // Of those the categorical vars are: srsmokr race lgbtcat; tables // The continuous vars are: smokdays extrindx ntwrksmk; kd // The following is an example only; it display information for imputation 9 midiagplots, m(9) saving(g1,replace) nodraw graph combine g1_9_smokdays.gph g1_9_ntwrksmk.gph g1_9_extrindx.gph, /// name(MIDiagPlot_9, replace) graph save MIDiagPlot_9.gph, replace graph export MIDiagPlot_9.pdf, replace timer off 4 // The following is for continuous vars, specified plottype, // which selected imputations, and displaying the combined graph only timer on 5 midiagplots ntwrksmk, m(15(15)75) plottype(kdensity, kernel(epan2)) combine graph save ntwrksmk_15_75_kd, replace graph export ntwrksmk_15_75_kd.pdf, replace midiagplots extrindx, m(15(15)75) plottype(kdensity, kernel(epan2)) combine graph save extrindx_15_75_kd, replace graph export extrindx_15_75_kd.pdf, replace midiagplots smokdays, m(15(15)75) plottype(kdensity, kernel(epan2)) combine graph save smokdays_15_75_kd, replace graph export smokdays_15_75_kd.pdf, replace timer off 5 // Perform analysis phase timer on 6 mi estimate: regress smokdays age i.race male i.lgbtcat srsmokr ntwrksmk extrindx mi test 2.race 3.race 4.race 5.race mi test 2.lgbtcat 3.lgbtcat 4.lgbtcat // Obtain standardized coefficients and r-squares mibeta smokdays age i.race male i.lgbtcat srsmokr ntwrksmk extrindx, fisherz timer off 6 timer list timer clear log close exit