stata备忘录
时间:2024-04-24 20:25:30 来源:网络cs 作者:康由 栏目:卖家故事 阅读:
阅读本书更多章节>>>>
Stata备忘录
1. 画图
(1)时间趋势图
label var year "年份"label var per "制造业增加值比重[左轴]"label var tjj "工业增加值比重[右轴]"graph twoway (connect per year ,yaxis(1) color(black) ) ///(connect tjj year ,yaxis(2) color(black) lpattern(dash) ) ///, graphregion(color(white)) xlabel(2003(2)2019) ///ytitle("世界银行制造业增加值比重(%)",axis(1) height(5)) ///ytitle("国家统计局工业增加值比重(%)",axis(2) height(5)) ///note(数据来源:World Bank Open Data、国家统计局) xline(2011)
等价命令
tw (connect value1819 season , /// lcolor(black) lpattern(dash) msymbol(O) mlcolor(gs5) mfcolor(gs12)) /// (connect value2020 season , ///lcolor(black) lpattern(solid) msymbol(S) mlcolor(gs5) mfcolor(gs12)) ///,graphregion(color(white)) ///legend(label(1 "18-19年平均") label(2 "2020年") ) ///xlabel(1 "第一季度" 2 "第二季度" 3 "第三季度" 4 "第四季度" ,labsize(small) )
label var year "年份"tw bar mR1 year,yaxis(2) bc(balck) sort barwidth(0.9) fintensity(inten0) ///ylabel(0(2000)6000, axis(2)) /// xlabel(2014(1)2021)|| /// connect percent_R year,yaxis(1) lc(black) lp(dash) mc(blace) ///ylabel(0.5 "50%" 0.6 "60%" 0.7 "70%" 0.8 "80%" ,axis(1)) ||, ///graphregion(color(white) ) ///bgcolor(white) ///title("中国数字内容企业(游戏)收入金额及占全球市场比重", c(black) size(*0.8)) ///ytitle("占比(%)",axis(1) height(7)) ///ytitle("收入额(百万美元)",axis(2) height(5)) /// legend(label(1 "中国数字内容企业(游戏)收入占全球市场比重") label(2 "中国数字内容企业(游戏)收入金额") ) ///legend(size(small) col(1)) ///note("数据源自:app annie")graph save "Graph" "$path\output\playdata_1_percent_and_value_of_Chinese_Apps_Export.gph",replace
use hs_adj_year_PQV_2000_2015.dta,clearuse hs_adj_year_PQV_2000_2015_cregime10,clearmerge m:1 hs_adj using equipmentreplace BEC=4 if BEC==1 & equipment!=1destring hs_adj,replacereghdfe lnV i.year if year!=2006 & BEC==2, a(hs)est store result_accessoriesreghdfe lnV i.year if year!=2006 & BEC==4, a(hs)est store result_equipmentreghdfe lnV i.year if year!=2006 & BEC==0, a(hs)est store result_noncapital#d ;coefplot (result_accessories,c(l) label("accessories") lp(dash) lc(black) mc(black) ms(smcircle_hollow) offset(-0.07)) (result_equipment ,c(l) label("equipment") lp(solid) lc(black) mc(black) ms(smcircle_hollow)) (result_noncapital,c(l) label("noncapital") lp(dot) lc(black) mc(black) ms(smcircle_hollow) offset(0.07)) , vertical drop(_cons) byopts(xrescale) xlabel(1 "2001" 3"2003" 5"2005" 6"2007" 8"2009" 10"2011" 12"2013" 14"2015") graphregion(color(white)) legend(size(small) col(3)) ;#d cr
字体大小 option
字体大小option | description |
---|---|
zero | no size whatsoever, vanishingly small |
minuscule | smallest |
quarter_tiny | |
third_tiny | |
half_tiny | |
tiny | |
vsmall | |
small | |
medsmall | |
medium | |
medlarge | |
large | |
vlarge | |
huge | |
vhuge | largest |
tenth | one-tenth the size of the graph |
quarter | one-fourth the size of the graph |
third | one-third the size of the graph |
half | one-half the size of the graph |
full | text the size of the graph |
size | any size you want |
节点样式 eg: msymbol(O) mlcolor(gs5) mfcolor(gs12)
symbolstyle | Synonym(if any) | Description |
---|---|---|
circle | O | solid |
diamond | D | solid |
triangle | T | solid |
square | S | solid |
plus | + | |
X | X | |
arrowf | A | filled arrow head |
arrow | a | |
pipe | ||
V | V | |
smcircle | o | solid |
smdiamond | d | solid |
smsquare | s | solid |
smtriangle | t | solid |
smplus | ||
smx | x | |
smv | v | |
circle_hollow | Oh | hollow |
diamond_hollow | Dh | hollow |
triangle_hollow | Th | hollow |
square_hollow | Sh | hollow |
smcircle_hollow | oh | hollow |
smdiamond_hollow | dh | hollow |
smtriangle_hollow | th | hollow |
smsquare_hollow | sh | hollow |
point | p | a small dot |
none | i | a symbol that is invisible |
线样式
linepatternstyle | Description |
---|---|
solid | solid line |
dash | dashed line |
dot | dotted line |
dash_dot | |
shortdash | |
shortdash_dot | |
longdash | |
longdash_dot | |
blank | invisible line |
formula | e.g.,-. or --… etc. |
A formula is composed of any combination of | |
l | solid line |
_ | (underscore) a long dash |
- | (hyphen) a medium dash |
. | short dash (almost a dot) |
# | small amount of blank space |
颜色
black | edkblue | gs12 | lime | orange |
---|---|---|---|---|
blue | eggshell | gs13 | ltblue | orange_red |
bluishgray | eltblue | gs14 | ltbluishgray | pink |
bluishgray8 | eltgreen | gs15 | ltbluishgray8 | purple |
brown | emerald | gs16 | ltkhaki | red |
chocolate | emidblue | gs2 | magenta | sand |
cranberry | erose | gs3 | maroon | sandb |
cyan | forest_green | gs4 | midblue | sienna |
dimgray | gold | gs5 | midgreen | stone |
dkgreen | gray | gs6 | mint | sunflowerlime |
dknavy | green | gs7 | navy | teal |
dkorange | gs0 | gs8 | navy8 | white |
ebblue | gs1 | gs9 | none | yellow |
ebg | gs10 | khaki | olive | |
edkbg | gs11 | lavender | olive_teal |
(2)柱状图
#delimit ;graph bar cn_wzje_80 cn_wzje_81 if wzlx==0, over(sec) bargap(-30) ytitle("吸引外资金额") legend( label(1 "08年前") label(2 "08年后") ) title("各行业吸引外资2008年前后对比") subtitle("中国中西部服务业") note("中国中西部服务业") ;#delimit cr
(3)散点图
tw (scatter delta_v2_v3 delta_v1_v2 if delta_v1_v2>=-0.3& delta_v1_v2 <=2.3 ///&delta_v2_v3>= -0.3&delta_v2_v3<=2.3, ///mlabel(hy4) mlc(black) mlabc(black) ms(x) mlabs(tiny)) ///(fun y=x,range(-0.3 2.3)) , ///xlab(-0.3(0.5)2.3) ylab(-0.3(0.5)2.3) ///graphregion(color(white)) ///xline(0,lp(dash) lc(gs10)) ///yline(0,lp(dash) lc(gs10)) ///legend(ring(0) pos(5) order(2 "45°线")) ///ytitle("11-15时段的增速") ///xtitle("07-11时段的增速")
graph twoway (scatter wzje_CE0308 wzje_CE1419, mlabel(sec) mlabv(sec) ) (function y=x, range(0 0.11)) , ///title("中国东部地区03-08对比中国东部地区14-19[金额]") ///ytitle("中国东部03-08") ///xtitle("中国东部14-19") ///legend(ring(0) pos(5) order(2 "45°线")) ///graphregion(color(white))
graph twoway (scatter c_AS c_WD, mlabel(cic03) ) (lfit c_AS c_WD) , ///title("东盟增速放缓 vs 世界增速放缓") ///ytitle("东盟") ///xtitle("世界") ///legend(ring(0) pos(5) order(2 "拟合")) ///graphregion(color(white))
(4)bgshade
bgshade ks, shaders(uu9) /// twoway(connect lamda22 ks if treat==1&ks>=6&ks<=11 || /// connect lamda22 ks if treat==0&ks>=6&ks<=11 , xlab(6(1)11) /// title("新冠疫情冲击下企业平均收入变化趋势"))
(5)coefplot
coefplot, levels(90) vertical lcolor(black)mcolor(black) /// msymbol(circle_hollow) ytitle(估计系数, size(small)) /// ylabel(, labsize(small) angle(horizontal) nogrid) /// yline(0, lwidth(vthin)lpattern(solid) lcolor(black)) /// xtitle(事件发生时间, size(small)) /// title("(B)企业缴税的平行趋势检验") /// xlabel(0"." 1"2019s2" 2"2019s3" 3"2019s4" 4"2020s1" 5"2020s2" 6"2020s3" 7"2020s4")
reghdfe lnQ i.Year if elec == 1,a(i.citycode) vce(r)est store elec_Q_1 reghdfe lnV i.Year if elec == 1,a(i.citycode) vce(r)est store elec_V_1 reghdfe lnQ i.Year if elec == 0,a(i.citycode) vce(r)est store elec_Q_0reghdfe lnV i.Year if elec == 0,a(i.citycode) vce(r)est store elec_V_0coefplot (elec_Q_1,label("半导体电子元件相关企业进口数量") offset(0.05) pstyle(p3)) ///(elec_Q_0 ,label("非半导体电子元件相关企业进口数量") offset(-0.05) pstyle(p4) ), ///vertical drop(_cons) xline(0) ///graphregion(color(white)) /// yline(0) ///addplot(line @b @at,lp(dash) lwidth(*0.5)) /// legend(label(1 "半导体电子元件相关企业进口数量") label(2 "非半导体电子元件相关企业进口数量") )
(6)画系数和置信区间
twoway (scatter coef week) /// (rcap ci_lower ci_upper week, /// lcolor(black) /// mcolor(black) /// lwidth(vthin) /// lpattern(dash) /// msymbol(circle_hollow) /// legend(label(2 "99% CI"))) , ///yline(0) ///xtitle("") ///graphregion(fcolor(white)) ///title("第X周的系数", size(medium)) /// name("Coef_all_I", replace)
(7)画直方图
一般使用kdensity
hist year if year>=1400 & year<=2010, freq bin(200) ylabel(0(500)2500) xtitle("Year") xline(1950 1980,lw(thin)) /// text(1500 1950 "Year=1950", place(w)) text(2000 1980 "Year=1980", place(w))
(8)画桑基图
cd $path\appdatause Data_games.dta,clearmerge m:1 ParentCompanyName using "$path\data\company_city"keep if _m == 3drop _mgen from = city_codeencode iso3_j,gen(to)bys from to :egen tR = total(Revenue)bys from to :egen tD = total(Downloads)duplicates drop from to ,forcegen x0 = 1gen x1 = 2tostring city_code ,gen(city2)drop if dest == "CHN"sankey_plot x0 from x1 to, ///width0(tR) extra xlabel(1 "Source" 2 "Destination", nogrid labsize(small)) ///colorpalette(economist, opacity(30)) ///label0(city) label1(iso3_j) ///labsize(*0.6) labcolor(black) ///graphregion(color(white)) gap(0.1) ///title("地级市层面Apps出海流向(按收入额)",color(black) size(*0.8))graph save "Graph" "$path\output\sankey_R_0228.gph",replacesankey_plot x0 from x1 to, ///width0(tD) extra xlabel(1 "Source" 2 "Destination", nogrid labsize(small)) ///colorpalette(economist, opacity(30)) ///label0(city) label1(iso3_j) ///labsize(*0.6) labcolor(black) ///graphregion(color(white)) gap(0.1) ///title("地级市层面Apps出海流向(按下载量)",color(black) size(*0.8))graph save "Graph" "$path\output\sankey_D_0228.gph",replace
(9)气泡图
twoway(scatter mv T_gap_05_00 [fweight=N] if BEC == 0&T_gap_05_00!=0&N !=0,msymbol(Oh) mc(ebblue%40)) ///(scatter mv T_gap_05_00 [fweight=N] if BEC == 4&T_gap_05_00!=0&N !=0,msymbol(Oh) mc(orange_red%40)) ///(scatter mv T_gap_05_00 [fweight=N] if BEC == 2&T_gap_05_00!=0&N !=0,msymbol(Oh) mc(green%20)) ///, legend(label(1 "非资本品") label(2 "equipment") label(3 "accessories") )
2. 处理数据
(1)拓展expand数据
例如:当前数据中 有9523、9524、9525、9526、9527、9528共计6个样本,现在想把这6个样本根据freq进行扩充,变为9523、9524_1、9524_2、9525_1、9525_2、9526_1、9526_2、9527_1、9527_2、9528_1、9528_2这11个样本
freq | count | value |
---|---|---|
1 | 9523 | 4845.1143 |
2 | 9524 | 969.66498 |
2 | 9525 | 129.53349 |
2 | 9526 | 71284.508 |
2 | 9527 | 1038.127 |
2 | 9528 | 445877.09 |
count是id的唯一识别码,expandcl函数可以生成freq行相同的样本,并生成一个新的id识别码freq_count
egen count=group(id hs02_6)expandcl freq,gen(freq_count) cluster(count)drop freq_count
(2)时间数据
gen R= mdy(month_r,day_r,year_r)gen week_r = week(R)gen day_r = day(R)gen dow_r = dow(R) //返回周几gen doy_r = doy(R) //返回年内日期gen yw_r = yw(year_r,week_r)gen ed = yw - yw_r//yw ym yq yh分别为年周、年月、年季、年半年
gen period_kb= date(date_u,"YMD")-date(date_kb,"YMD")
(3)常见函数
int(x) //取整,不论后面的小数是什么,只取小数点前的数值round(x) // 四舍五入取整round(x, .01) //保留两位小数四舍五入gen y = sum(x) //求列累积和egen y = sum(x) //求列总和egen y = rsum(x y z) //求x+y+z总和egen y = rowmean(x y z) //求(x+y+z)/3egen y = rowsd(x y z) //求x y z的方差egen y = rowmim(x y z) //求x y z的最小值egen y = rowmax(x y z) //求x y z的最大值egen y = mean(x) //求列均值egen y = median(x) //求列中位数egen y = std(x) //求变异系数,与方差不同bysort x(y): gen z = y[1] //按照x分组,分组后按照y排序,生成一个新变量z=y的第一个观察值
(4)缩尾处理
foreach v of var DexpoAS4- DlnexpoWD2{gen `v'_w=`v'qui su `v',detreplace `v'_w=r(p99) if `v'>r(p99) & `v'<.replace `v'_w=r(p1) if `v'<r(p1)}winsor2 wage, replace cuts(1 99) trim
summary 一个变量之后,可以返回的结果有
r(N) //number of observationsr(mean) //meanr(skewness) //skewness (detail only)r(min) //minimumr(max) //maximumr(sum_w) //sum of the weightsr(p1) //1st percentile (detail only)r(p5) //5th percentile (detail only)r(p10) //10th percentile (detail only)r(p25) //25th percentile (detail only)r(p50) //50th percentile (detail only)r(p75) //75th percentile (detail only)r(p90) //90th percentile (detail only)r(p95) //95th percentile (detail only)r(p99) //99th percentile (detail only)r(Var) //variancer(kurtosis) //kurtosis (detail only)r(sum) //sum of variabler(sd) //standard deviation
(5)创建文件夹
在project路径下生成一个workspace文件夹
再生成子文件夹储存数据(data)、控制变量(controlvars )、临时数据(tempdata)、回归结果(outreg)
efolder, cd(D:\stata15\project\workspace)efolder, cd(D:\stata15\project\workspace sub(data controlvars tempdata outreg)
(6)分组处理数据(bysort的替代方案)
*展示根据highzupu50(族谱)和year分组后的变量drqianfen(死亡率)均值;collapse (mean) drqianfen, by(highzupu50 year)
(7)定义无缺失的样本
g rsample = !mi(avggrain_fyr) & !mi(nograin_fyr) & !mi(urban_fyr)& !mi(dis_bj_fyr) & !mi(dis_pc_fyr) & !mi(migrants_fyr)& !mi(rice_fyr) & !mi(minor_fyr) & !mi(edu_fyr)
(8)定义Dummy的新替代式(时间range)
*如果yob满足1825≤yob≤1899则pre取值为1,否则pre取值为0。mid、post生成过程类似。gen pre = inrange(yob, 1825, 1899)gen mid = inrange(yob, 1899, 1919)gen post = inrange(yob, 1920, 1960)
(9)快速替换
recode treatyear (1969 = 1) (1979 = 2) (1989 = 3) (1999 = 4) (2009 = 5)
3. 处理字符
(1)替换字符
replace 候选人姓名=subinstr( 候选人姓名, " ", "",. )
(2)捕捉字符中的某些特征
keep if strmatch(city, "*山东*")gen temp = 1 if strmatch(reporteriso3, "A*")
(3)提取字符,检索特定字符
//从enddate字符1开始取,取4个字符赋给yeargen year = substr(enddate,1,4) //strpos(s1, s2)返回字符s2在s1中的位置,如果s1中找不到s2,则返回0,将该判断再赋给ygen y = strpos(s1, s2) != 0
4. 输出结果
(1)常规输出
outreg2 using "E:\mfg\outreg\r2", word append addtext(CountryFE, YES,YearFE, YES)
(2)iv回归输出第一阶段
eststo: xtivreg p_a_w (DexpoCN4_w=dexpo44) /// i.t c.expr0#t c.Lshare0#t /// c.lnGDP0#t c.lngdp0#t, fe first vce(cluster c) eststo: xtreg DexpoCN4_w dexpo44 /// i.t c.expr0#t c.Lshare0#t /// c.lnGDP0#t c.lngdp0#t if e(sample)==1 ,fe cd $path\outreg outreg2 using "table3", word replace addtext(CityFE, YES,YearFE, YES) keep(dexpo44)
(3)变量描述性统计
*列出inv等变量的样本数、均值、标准差、最小值和最大值。tabstat inv loginv log_levies /// logpopl logincome logasset hhsize landpc logmigration logtax logtransfer share_admin /// postcont postopen secret_ballot proxy_voting moving_ballot /// , s(N mean sd min max) c(s)
5.矩阵保存结果
mat T1 = J(3,3,.)reghdfe temp ib1.season if year == 2018 & treat == 1,noaforvalues i = 1/3{local j = `i' + 1mat T1[`i',1] = _b[`j'.season]}reghdfe temp ib1.season if year == 2019 & treat == 1,noaforvalues i = 1/3{local j = `i' + 1mat T1[`i',2] = _b[`j'.season]}reghdfe temp ib1.season if year == 2020 & treat == 1,noaforvalues i = 1/3{mat T1[`i',3] = _b[`=1+`i''.season]}svmat T1
6.导入数据(全字符串)
forv i = 2000/2003{cd E:\Data\EPS工企海关匹配库\origindataimport delimited "工企+海关(`i').csv", stringcols(_all) clear cd E:\Data\EPS工企海关匹配库save data`i'.dta,replace}
7.循环
clear allset obs 1000**#** 用forvalues循环对单一变量进行处理gen id = . //生成一个变量名为id,代表第几个人,假设一共有50个人//假设每个人都有20个观测值,代表20年forvalues i = 1/50 {local j = `i' - 1//暂时定义0~49 方便计算local lower = `j' * 20 +1//定义下限 1、21、41、61 local upper = `j' * 20 + 20//定义上限 20、40、60、80//由此就定义了 1~20 21~40 41~60 ……replace id = `i' in `lower'/`upper'//给第1~20行,赋值为第1个人//给第21~40行,赋值为第2个人}bys id : gen T = _n + 2000//对于每个人,都生成一个时间序列**#** 用forvalues循环对多个变量进行处理forvalues i = 1/5 {gen value`i' = .cap gen e = rnormal()replace value`i' = e * 10 + `i'cap drop e}// 等价于 gen value6 = .cap gen e = rnormal()replace value6 = e * 10 + 6cap drop egen value7 = .cap gen e = rnormal()replace value7 = e * 10 + 7cap drop egen value8 = .cap gen e = rnormal()replace value8 = e * 10 + 8cap drop egen value9 = .cap gen e = rnormal()replace value9 = e * 10 + 9cap drop egen value10 = .cap gen e = rnormal()replace value10 = e * 10 + 10cap drop e**#** 用while循环对单一变量进行处理// 只要时间在T=11和T=20之间,就对value1~value10进行 " 乘0.1"的处理local i = 2010 while `i' < 2020 {forvalues j = 1/10{replace value`j' = value`j' * 0.1 if T == `i'}local i = `i' + 1}**#** 用foreach对变量进行处理foreach v in value1 value2 value3 value4 value5 {su `v' ,dreplace `v' = (`v' - r(min)) / (r(max) - r(min))kdensity `v'}// 等价于 su value6,d replace value6 = (value6 - r(min)) / (r(max) - r(min))kdensity value6su value7,d replace value7 = (value7 - r(min)) / (r(max) - r(min))kdensity value7su value8,d replace value8 = (value8 - r(min)) / (r(max) - r(min))kdensity value8su value9,d replace value9 = (value9 - r(min)) / (r(max) - r(min))kdensity value9su value10,d replace value10 = (value10 - r(min)) / (r(max) - r(min))kdensity value10
8.将第一行作为 label / varname
* 把第一行作为变量标签labone,nrow(1) * 把第一行作为变量名(不保留第一行)nrow* 把第一行作为变量名(保留第一行)nrow,keep
阅读本书更多章节>>>>
本文链接:https://www.kjpai.cn/gushi/2024-04-24/161964.html,文章来源:网络cs,作者:康由,版权归作者所有,如需转载请注明来源和作者,否则将追究法律责任!
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
下一篇:返回列表