Method 34 · quantile

分位数回归

看均值之外的门槛、分位数和局部变化

分位数回归的 Markdown 风格教程：基于共用 CSMAR 风格案例生成实际代码、结果表和案例图。

一、分位数回归是什么？

这页是分位数回归的方法文档。所有表格和图都由 marketing/method_case_assets/generate_assets.py 从同一份 csmar_innovation_realistic.csv 生成，避免用占位图充当教程。重点是展示局部、分位数、门槛或非参数关系，避免把平均效应写成唯一结论。

二、先看这个案例的结论

0.10 = 0.3934；0.0353。
0.25 = 0.4928；0.0524。
0.50 = 0.6282；0.0556。
这些数字来自页面里的结果表；写论文时先解释数值含义，再讨论理论含义。

三、案例口径

字段	口径
数据	CSMAR 风格 A 股企业创新面板
原始样本	196 家上市公司，2015-2020 年，约 1200 个公司-年观测；各方法有效样本以本页输出表 N 为准
因变量	patent_count；回归页通常使用 ln(1 + patent_count)
核心解释变量	dfi_index，数字普惠金融指数；部分真实烟测输出展示的是标准化后的 dfi_index
控制变量	roa、lev、size、growth、cashflow、tobinq、top1、dual、board、indep、soe、age
输出文件	quantile_results.csv
角色要求	dv、iv
依赖包	无额外 Stata 社区包要求

四、实际代码

下面是本页对应的最小可复现 Stata 代码。生产环境里 empirical-wizard 会在此基础上处理变量映射、输出校验、失败诊断和报告装配。

log using "/root/workspace/empirical-wizard/workspace/2f608d77/analysis.log", replace text
global JOB_DIR "/root/workspace/empirical-wizard/workspace/2f608d77"
set more off
adopath + "/root/ado/plus"
global DATA_PATH "/root/workspace/empirical-wizard/workspace/test_e2e/csmar_innovation.csv"
import delimited "/root/workspace/empirical-wizard/workspace/test_e2e/csmar_innovation.csv", clear case(preserve)
capture confirm global JOB_DIR
if _rc global JOB_DIR "."
* 自动去除完全重复行（同列同值），避免 N 虚增与 xtset 失败
quietly duplicates drop
local idvar ""
local timevar ""
capture confirm variable stkcd
if !_rc {
    capture confirm numeric variable stkcd
    if _rc {
        tempvar __ewiz_id
        capture encode stkcd, gen(`__ewiz_id')
        if !_rc local idvar "`__ewiz_id'"
    }
    else {
        local idvar "stkcd"
    }
}
else {
    di as text "面板ID变量不存在，跳过 xtset ID：stkcd"
}
capture confirm variable year
if !_rc {
    capture confirm numeric variable year
    if _rc {
        tempvar __ewiz_time
        capture encode year, gen(`__ewiz_time')
        if !_rc local timevar "`__ewiz_time'"
    }
    else {
        local timevar "year"
    }
}
else {
    di as text "时间变量不存在，跳过 xtset time：year"
}
if "`idvar'" != "" & "`timevar'" != "" {
    capture xtset `idvar' `timevar'
}

* ── 分位数回归 ──
tempname fh
capture file close `fh'
file open `fh' using "$JOB_DIR/quantile_results.csv", write replace
file write `fh' "分位数,系数,标准误,t/z 值,p 值,95% CI 下限,95% CI 上限,N" _n

* 分位数 0.10
capture noisily qreg patent_count dfi_index roa lev size growth cashflow tobinq top1 dual board indep soe age, quantile(0.1) vce(robust)
if !_rc {
    local _coef : display %9.4f _b[dfi_index]
    local _se : display %9.4f _se[dfi_index]
    local _t = _b[dfi_index] / _se[dfi_index]
    local _p : display %9.4f 2*ttail(e(df_r), abs(`_t'))
    local _t : display %7.3f `_t'
    local _cil : display %9.4f _b[dfi_index] - 1.96 * _se[dfi_index]
    local _cih : display %9.4f _b[dfi_index] + 1.96 * _se[dfi_index]
    local _n = e(N)
    file write `fh' "0.10,`_coef',`_se',`_t',`_p',`_cil',`_cih',`_n'" _n
}
else {
    file write `fh' "0.10,.,.,.,.,.,.,." _n
}

* 分位数 0.25
capture noisily qreg patent_count dfi_index roa lev size growth cashflow tobinq top1 dual board indep soe age, quantile(0.25) vce(robust)
if !_rc {
    local _coef : display %9.4f _b[dfi_index]
    local _se : display %9.4f _se[dfi_index]
    local _t = _b[dfi_index] / _se[dfi_index]
    local _p : display %9.4f 2*ttail(e(df_r), abs(`_t'))
    local _t : display %7.3f `_t'
    local _cil : display %9.4f _b[dfi_index] - 1.96 * _se[dfi_index]
    local _cih : display %9.4f _b[dfi_index] + 1.96 * _se[dfi_index]
    local _n = e(N)
    file write `fh' "0.25,`_coef',`_se',`_t',`_p',`_cil',`_cih',`_n'" _n
}
else {
    file write `fh' "0.25,.,.,.,.,.,.,." _n
}

* 分位数 0.50
capture noisily qreg patent_count dfi_index roa lev size growth cashflow tobinq top1 dual board indep soe age, quantile(0.5) vce(robust)
if !_rc {
    local _coef : display %9.4f _b[dfi_index]
    local _se : display %9.4f _se[dfi_index]
    local _t = _b[dfi_index] / _se[dfi_index]
    local _p : display %9.4f 2*ttail(e(df_r), abs(`_t'))
    local _t : display %7.3f `_t'
    local _cil : display %9.4f _b[dfi_index] - 1.96 * _se[dfi_index]
    local _cih : display %9.4f _b[dfi_index] + 1.96 * _se[dfi_index]
    local _n = e(N)
    file write `fh' "0.50,`_coef',`_se',`_t',`_p',`_cil',`_cih',`_n'" _n
}
else {
    file write `fh' "0.50,.,.,.,.,.,.,." _n
}

* 分位数 0.75
capture noisily qreg patent_count dfi_index roa lev size growth cashflow tobinq top1 dual board indep soe age, quantile(0.75) vce(robust)
if !_rc {
    local _coef : display %9.4f _b[dfi_index]
    local _se : display %9.4f _se[dfi_index]
    local _t = _b[dfi_index] / _se[dfi_index]
    local _p : display %9.4f 2*ttail(e(df_r), abs(`_t'))
    local _t : display %7.3f `_t'
    local _cil : display %9.4f _b[dfi_index] - 1.96 * _se[dfi_index]
    local _cih : display %9.4f _b[dfi_index] + 1.96 * _se[dfi_index]
    local _n = e(N)
    file write `fh' "0.75,`_coef',`_se',`_t',`_p',`_cil',`_cih',`_n'" _n
}
else {
    file write `fh' "0.75,.,.,.,.,.,.,." _n
}

* 分位数 0.90
capture noisily qreg patent_count dfi_index roa lev size growth cashflow tobinq top1 dual board indep soe age, quantile(0.9) vce(robust)
if !_rc {
    local _coef : display %9.4f _b[dfi_index]
    local _se : display %9.4f _se[dfi_index]
    local _t = _b[dfi_index] / _se[dfi_index]
    local _p : display %9.4f 2*ttail(e(df_r), abs(`_t'))
    local _t : display %7.3f `_t'
    local _cil : display %9.4f _b[dfi_index] - 1.96 * _se[dfi_index]
    local _cih : display %9.4f _b[dfi_index] + 1.96 * _se[dfi_index]
    local _n = e(N)
    file write `fh' "0.90,`_coef',`_se',`_t',`_p',`_cil',`_cih',`_n'" _n
}
else {
    file write `fh' "0.90,.,.,.,.,.,.,." _n
}

file close `fh'

* coef-by-quantile plot via grqreg if available — only on small/mid samples
quietly count if !missing(patent_count)
local __sqreg_n = r(N)
local __sqreg_reps = 200
if `__sqreg_n' > 30000 local __sqreg_reps = 100
if `__sqreg_n' > 100000 {
    di as text "sqreg skipped: N=`__sqreg_n' too large for simultaneous-quantile bootstrap; the five separate qreg runs above already provide the per-quantile coefficients."
}
else {
    capture noisily sqreg patent_count dfi_index roa lev size growth cashflow tobinq top1 dual board indep soe age, q(0.1 0.25 0.5 0.75 0.9) reps(`__sqreg_reps')
    capture which grqreg
    if !_rc {
        capture noisily grqreg dfi_index, ci ols olsci
        capture graph export "$JOB_DIR/quantile_coef_plot.png", replace width(1800)
    }
}

di "分位数回归完成 (5 个分位数)"
log close

五、实际输出表

这张表就是本方法页使用的案例输出文件，保存在 marketing/method_case_assets/quantile/result.csv。

分位数	系数	标准误	t/z 值	95% CI 下限	95% CI 上限	N
0.10	0.3934	0.0353	11.152	0.3243	0.4626	720
0.25	0.4928	0.0524	9.398	0.3900	0.5956	720
0.50	0.6282	0.0556	11.296	0.5192	0.7373	720
0.75	0.5981	0.0442	13.525	0.5115	0.6848	720
0.90	0.5896	0.0563	10.471	0.4792	0.6999	720

六、案例图

这是一张由同一份案例数据生成的页面内诊断图。

七、论文里怎么写

本文在共用企业面板样本上报告分位数回归，核心输出见 quantile_results.csv。结果解释时同时关注样本口径、变量构造、系数方向、标准误和适用前提，避免只凭单个 p 值完成方法选择。

八、检查清单

确认本页使用的因变量、核心解释变量、控制变量与论文主模型一致。
先看表格里的样本口径，再看系数、p 值或诊断指标。
代码里的输出文件名要能对应网页展示的结果表。

返回方法库 · 打开 empirical-wizard