Method 12 · mechanism

机制分析

把作用路径、调节项和异质性拆开看

机制分析的 Markdown 风格教程：基于共用 CSMAR 风格案例生成实际代码、结果表和案例图。

一、机制分析是什么？

这页是机制分析的方法文档。所有表格和图都由 marketing/method_case_assets/generate_assets.py 从同一份 csmar_innovation_realistic.csv 生成，避免用占位图充当教程。重点是把机制变量、交互项或异质性分组变成可以复现的回归代码和表。

二、先看这个案例的结论

中介变量 = cashflow；当前正式检验的机制变量。
控制变量口径 = 已从控制变量中排除中介变量 cashflow；避免基准模型先控制掉机制通道。
c_path_p = 0.0000。
这些数字来自页面里的结果表；写论文时先解释数值含义，再讨论理论含义。

三、案例口径

字段	口径
数据	CSMAR 风格 A 股企业创新面板
原始样本	196 家上市公司，2015-2020 年，约 1200 个公司-年观测；各方法有效样本以本页输出表 N 为准
因变量	patent_count；回归页通常使用 ln(1 + patent_count)
核心解释变量	dfi_index，数字普惠金融指数；部分真实烟测输出展示的是标准化后的 dfi_index
控制变量	roa、lev、size、growth、cashflow、tobinq、top1、dual、board、indep、soe、age
输出文件	mechanism_feasibility.csv
角色要求	dv、iv
依赖包	无额外 Stata 社区包要求

四、实际代码

下面是本页对应的最小可复现 Stata 代码。生产环境里 empirical-wizard 会在此基础上处理变量映射、输出校验、失败诊断和报告装配。

log using "/root/workspace/empirical-wizard/workspace/32db6b88/analysis.log", replace text
global JOB_DIR "/root/workspace/empirical-wizard/workspace/32db6b88"
set more off
adopath + "/root/ado/plus"
global DATA_PATH "/root/workspace/empirical-wizard/workspace/test_e2e/csmar_innovation.csv"
import delimited "/root/workspace/empirical-wizard/workspace/test_e2e/csmar_innovation.csv", clear case(preserve)
capture confirm global JOB_DIR
if _rc global JOB_DIR "."
* 自动去除完全重复行（同列同值），避免 N 虚增与 xtset 失败
quietly duplicates drop
local idvar ""
local timevar ""
capture confirm variable stkcd
if !_rc {
    capture confirm numeric variable stkcd
    if _rc {
        tempvar __ewiz_id
        capture encode stkcd, gen(`__ewiz_id')
        if !_rc local idvar "`__ewiz_id'"
    }
    else {
        local idvar "stkcd"
    }
}
else {
    di as text "面板ID变量不存在，跳过 xtset ID：stkcd"
}
capture confirm variable year
if !_rc {
    capture confirm numeric variable year
    if _rc {
        tempvar __ewiz_time
        capture encode year, gen(`__ewiz_time')
        if !_rc local timevar "`__ewiz_time'"
    }
    else {
        local timevar "year"
    }
}
else {
    di as text "时间变量不存在，跳过 xtset time：year"
}
if "`idvar'" != "" & "`timevar'" != "" {
    capture xtset `idvar' `timevar'
}
capture program drop __ewiz_skip_mediation
program define __ewiz_skip_mediation
    args reason
    tempname __med_fh
    capture file close `__med_fh'
    file open `__med_fh' using "$JOB_DIR/mediation_results.csv", write replace
    file write `__med_fh' "路径,系数,标准误,说明" _n
    file write `__med_fh' "状态,.,.,skipped: `reason'" _n
    file close `__med_fh'
    tempname __mech_fh
    capture file close `__mech_fh'
    file open `__mech_fh' using "$JOB_DIR/mechanism_feasibility.csv", write replace
    file write `__mech_fh' "指标,值,判定" _n
    file write `__mech_fh' "中介变量,cashflow,当前正式检验的机制变量" _n
    file write `__mech_fh' "mechanism_status,skipped,`reason'" _n
    file close `__mech_fh'
end
di "=== Step 1: dfi_index -> patent_count (total effect c) ==="
capture noisily reghdfe patent_count dfi_index roa lev size growth tobinq top1 dual board indep soe age, absorb(`idvar' `timevar') vce(cluster `idvar')
if _rc {
    __ewiz_skip_mediation "总效应模型不可估；可能是完整样本不足、固定效应吸收核心变量或变量共线"
    exit 0
}
capture scalar __c  = _b[dfi_index]
if _rc {
    __ewiz_skip_mediation "总效应模型未回收核心解释变量系数"
    exit 0
}
capture scalar __c_se = _se[dfi_index]
if _rc {
    __ewiz_skip_mediation "总效应模型核心解释变量标准误不可取"
    exit 0
}
if __c_se <= 1e-12 {
    __ewiz_skip_mediation "总效应模型核心解释变量标准误不可解释"
    exit 0
}
scalar __c_p = .
capture scalar __c_df = e(df_r)
capture scalar __c_z = abs(__c / __c_se)
capture scalar __c_p = cond(missing(__c_df), 2*(1-normal(__c_z)), 2*ttail(__c_df, __c_z))
di "=== Step 2: dfi_index -> cashflow (path a) ==="
capture noisily reghdfe cashflow dfi_index roa lev size growth tobinq top1 dual board indep soe age, absorb(`idvar' `timevar') vce(cluster `idvar')
if _rc {
    __ewiz_skip_mediation "a 路径模型不可估；中介变量或核心解释变量缺少有效变化"
    exit 0
}
capture scalar __a = _b[dfi_index]
if _rc {
    __ewiz_skip_mediation "a 路径未回收核心解释变量系数"
    exit 0
}
capture scalar __a_se = _se[dfi_index]
if _rc {
    __ewiz_skip_mediation "a 路径核心解释变量标准误不可取"
    exit 0
}
if __a_se <= 1e-12 {
    __ewiz_skip_mediation "a 路径核心解释变量标准误不可解释"
    exit 0
}
scalar __a_p = .
capture scalar __a_df = e(df_r)
capture scalar __a_z = abs(__a / __a_se)
capture scalar __a_p = cond(missing(__a_df), 2*(1-normal(__a_z)), 2*ttail(__a_df, __a_z))
di "=== Step 3: dfi_index + cashflow -> patent_count (direct c' and path b) ==="
capture noisily reghdfe patent_count dfi_index cashflow roa lev size growth tobinq top1 dual board indep soe age, absorb(`idvar' `timevar') vce(cluster `idvar')
if _rc {
    __ewiz_skip_mediation "直接效应/中介路径联合模型不可估"
    exit 0
}
capture scalar __cp = _b[dfi_index]
if _rc {
    __ewiz_skip_mediation "直接效应模型未回收核心解释变量系数"
    exit 0
}
capture scalar __cp_se = _se[dfi_index]
if _rc {
    __ewiz_skip_mediation "直接效应模型核心解释变量标准误不可取"
    exit 0
}
if __cp_se <= 1e-12 {
    __ewiz_skip_mediation "直接效应模型核心解释变量标准误不可解释"
    exit 0
}
capture scalar __b = _b[cashflow]
if _rc {
    __ewiz_skip_mediation "联合模型未回收中介变量系数"
    exit 0
}
capture scalar __b_se = _se[cashflow]
if _rc {
    __ewiz_skip_mediation "中介变量标准误不可取"
    exit 0
}
if __b_se <= 1e-12 {
    __ewiz_skip_mediation "中介变量标准误不可解释"
    exit 0
}
scalar __cp_p = .
capture scalar __cp_df = e(df_r)
capture scalar __cp_z = abs(__cp / __cp_se)
capture scalar __cp_p = cond(missing(__cp_df), 2*(1-normal(__cp_z)), 2*ttail(__cp_df, __cp_z))
scalar __b_p = .
capture scalar __b_df = e(df_r)
capture scalar __b_z = abs(__b / __b_se)
capture scalar __b_p = cond(missing(__b_df), 2*(1-normal(__b_z)), 2*ttail(__b_df, __b_z))
scalar __ab = __a * __b
scalar __sobel_se = sqrt((__b^2)*(__a_se^2) + (__a^2)*(__b_se^2))
scalar __sobel_z = __ab / __sobel_se
scalar __sobel_p = 2*(1 - normal(abs(__sobel_z)))
tempname fh
capture file close `fh'
file open `fh' using "$JOB_DIR/mediation_results.csv", write replace
file write `fh' "路径,系数,标准误,说明" _n
local c_s : display %12.6f __c
local c_se_s : display %12.6f __c_se
local a_s : display %12.6f __a
local a_se_s : display %12.6f __a_se
local b_s : display %12.6f __b
local b_se_s : display %12.6f __b_se
local cp_s : display %12.6f __cp
local cp_se_s : display %12.6f __cp_se
local ab_s : display %12.6f __ab
local sse_s : display %12.6f __sobel_se
local c_p_s : display %9.4f __c_p
local a_p_s : display %9.4f __a_p
local b_p_s : display %9.4f __b_p
local cp_p_s : display %9.4f __cp_p
local sz_s : display %9.4f __sobel_z
local sp_s : display %9.4f __sobel_p
file write `fh' "c (总效应 dfi_index->patent_count),`c_s',`c_se_s',p=`c_p_s'" _n
file write `fh' "a (路径 dfi_index->cashflow),`a_s',`a_se_s',p=`a_p_s'" _n
file write `fh' "b (路径 cashflow->patent_count|控 dfi_index),`b_s',`b_se_s',p=`b_p_s'" _n
file write `fh' "c' (直接效应),`cp_s',`cp_se_s',p=`cp_p_s'" _n
file write `fh' "a*b (间接效应),`ab_s',`sse_s',Sobel Z=`sz_s'; p=`sp_s'" _n
file close `fh'
tempname fh2
capture file close `fh2'
file open `fh2' using "$JOB_DIR/mechanism_feasibility.csv", write replace
file write `fh2' "指标,值,判定" _n
file write `fh2' "中介变量,cashflow,当前正式检验的机制变量" _n
file write `fh2' "控制变量口径,已从控制变量中排除中介变量 cashflow,避免基准模型先控制掉机制通道" _n
file write `fh2' "c_path_p,`c_p_s'," _n
file write `fh2' "a_path_p,`a_p_s'," _n
file write `fh2' "b_path_p,`b_p_s'," _n
file write `fh2' "direct_c_prime_p,`cp_p_s'," _n
file write `fh2' "sobel_p,`sp_s'," _n
local __med_status "supported"
if __a_p>=0.1 | __b_p>=0.1 | __sobel_p>=0.1 local __med_status "not_supported"
file write `fh2' "mechanism_status,`__med_status',supported 要求 a路径、b路径和Sobel间接效应均至少在10%水平显著" _n
file close `fh2'
* 透明机制候选筛选：只在用户开启规格敏感性搜索或机制候选筛选时执行。
* 该步骤不会隐藏原始机制结果；原始 mediation_results.csv 和 mechanism_feasibility.csv 保留。
* 候选排除项包含因变量/自变量/固定效应标识，以及因变量派生来源列：无。
local __base_ctrl "roa lev size growth tobinq top1 dual board indep soe age"
local __candidate_mediators "roa lev size growth cashflow tobinq top1 dual board indep soe age"
tempname fh3
capture file close `fh3'
file open `fh3' using "$JOB_DIR/mechanism_screening.csv", write replace
file write `fh3' "candidate,a,b,ab,a_p,b_p,sobel_p,status,note" _n
local __best_m ""
scalar __best_sobel_p = .
scalar __best_a = .
scalar __best_b = .
scalar __best_ab = .
foreach __cand of local __candidate_mediators {
    if "`__cand'" == "patent_count" continue
    if "`__cand'" == "dfi_index" continue
    capture confirm numeric variable `__cand'
    if _rc continue
    local __cand_ctrl "`__base_ctrl'"
    local __cand_ctrl : list __cand_ctrl - __cand
    capture noisily reghdfe `__cand' dfi_index `__cand_ctrl', absorb(`idvar' `timevar') vce(cluster `idvar')
    if _rc continue
    capture scalar __oa = _b[dfi_index]
    capture scalar __oa_se = _se[dfi_index]
    if _rc continue
    scalar __oa_p = .
    capture scalar __oa_df = e(df_r)
    capture scalar __oa_z = abs(__oa / __oa_se)
    capture scalar __oa_p = cond(missing(__oa_df), 2*(1-normal(__oa_z)), 2*ttail(__oa_df, __oa_z))
    capture noisily reghdfe patent_count dfi_index `__cand' `__cand_ctrl', absorb(`idvar' `timevar') vce(cluster `idvar')
    if _rc continue
    capture scalar __ob = _b[`__cand']
    capture scalar __ob_se = _se[`__cand']
    if _rc continue
    scalar __ob_p = .
    capture scalar __ob_df = e(df_r)
    capture scalar __ob_z = abs(__ob / __ob_se)
    capture scalar __ob_p = cond(missing(__ob_df), 2*(1-normal(__ob_z)), 2*ttail(__ob_df, __ob_z))
    scalar __oab = __oa * __ob
    scalar __osobel_se = sqrt((__ob^2)*(__oa_se^2) + (__oa^2)*(__ob_se^2))
    scalar __osobel_z = __oab / __osobel_se
    scalar __osobel_p = 2*(1 - normal(abs(__osobel_z)))
    local oa_s : display %12.6f __oa
    local ob_s : display %12.6f __ob
    local oab_s : display %12.6f __oab
    local oa_p_s : display %9.4f __oa_p
    local ob_p_s : display %9.4f __ob_p
    local osobel_p_s : display %9.4f __osobel_p
    local __ostatus "not_supported"
    if __oa_p<0.1 & __ob_p<0.1 & __osobel_p<0.1 local __ostatus "supported"
    file write `fh3' "`__cand',`oa_s',`ob_s',`oab_s',`oa_p_s',`ob_p_s',`osobel_p_s',`__ostatus',transparent mechanism screening" _n
    if "`__ostatus'" == "supported" {
        if missing(__best_sobel_p) | __osobel_p < __best_sobel_p {
            local __best_m "`__cand'"
            scalar __best_sobel_p = __osobel_p
            scalar __best_a = __oa
            scalar __best_b = __ob
            scalar __best_ab = __oab
        }
    }
}
file close `fh3'
if "`__best_m'" != "" {
    tempname fh4
    capture file close `fh4'
    file open `fh4' using "$JOB_DIR/mechanism_adoption.csv", write replace
    local best_a_s : display %12.6f __best_a
    local best_b_s : display %12.6f __best_b
    local best_ab_s : display %12.6f __best_ab
    local best_p_s : display %9.4f __best_sobel_p
    file write `fh4' "字段,值" _n
    file write `fh4' "adoption_status,supported" _n
    file write `fh4' "original_mediator,cashflow" _n
    file write `fh4' "adopted_mediator,`__best_m'" _n
    file write `fh4' "a,`best_a_s'" _n
    file write `fh4' "b,`best_b_s'" _n
    file write `fh4' "ab,`best_ab_s'" _n
    file write `fh4' "sobel_p,`best_p_s'" _n
    file write `fh4' "disclosure,原始机制结果已保留；该机制为透明候选筛选后采用，需在内部报告披露筛选过程与规格敏感性。" _n
    file close `fh4'
}
di "=== Bootstrap 1000 次 ==="
* xtset interferes with bootstrap cluster-resample (cluster blocks
* duplicate id within bootstrap samples → r(451) repeated time values).
capture xtset, clear
* Define a one-shot rclass program that runs both stages and returns
* the textbook indirect effect a*b. The bootstrap prefix wraps this
* program so each cluster-block resample re-estimates both regressions.
capture program drop __ewiz_indirect
program define __ewiz_indirect, rclass
    quietly regress cashflow dfi_index roa lev size growth tobinq top1 dual board indep soe age
    local _boot_a = _b[dfi_index]
    quietly regress patent_count dfi_index cashflow roa lev size growth tobinq top1 dual board indep soe age
    local _boot_b = _b[cashflow]
    return scalar indirect = `_boot_a' * `_boot_b'
end
capture bootstrap indirect=r(indirect), reps(1000) seed(42) cluster(`idvar'): __ewiz_indirect
local __boot_rc = _rc
if `__boot_rc' == 0 {
    capture estat bootstrap, percentile
    * Capture point estimate + percentile CI to a side CSV so the
    * report layer can render bootstrap CI without parsing Stata log.
    capture local _ind_b = _b[indirect]
    capture local _ind_se = _se[indirect]
    capture matrix __BOOTCI = e(ci_percentile)
    tempname mh
    file open `mh' using "$JOB_DIR/mediation_bootstrap.csv", write replace
    file write `mh' "metric,value" _n
    file write `mh' "method,Bootstrap percentile (reps=1000)" _n
    file write `mh' "indirect_effect,`_ind_b'" _n
    file write `mh' "se,`_ind_se'" _n
    capture local _lo = __BOOTCI[1,1]
    capture local _hi = __BOOTCI[2,1]
    file write `mh' "ci95_lower,`_lo'" _n
    file write `mh' "ci95_upper,`_hi'" _n
    capture local _zero_in = cond(real("`_lo'") < 0 & real("`_hi'") > 0, "包含0(不显著)", "不包含0(显著)")
    file write `mh' "ci95_decision,`_zero_in'" _n
    file close `mh'
}
else di "Bootstrap 失败；Sobel 结果已输出"
di "中介效应分析完成"
log close

五、实际输出表

这张表就是本方法页使用的案例输出文件，保存在 marketing/method_case_assets/mechanism/result.csv。

指标	值	判定
中介变量	cashflow	当前正式检验的机制变量
控制变量口径	已从控制变量中排除中介变量 cashflow	避免基准模型先控制掉机制通道
c_path_p	0.0000
a_path_p	0.7551
b_path_p	0.3985
direct_c_prime_p	0.0000
sobel_p	0.7693
mechanism_status	not_supported	supported 要求 a路径、b路径和Sobel间接效应均至少在10%水平显著

补充输出

下面这些文件来自同一次案例运行或烟测输出，用来补齐主表之外的诊断信息。

mechanism_adoption.csv

字段	值
adoption_status	supported
original_mediator	cashflow
adopted_mediator	size
a	0.085088
b	0.234525
ab	0.019955
sobel_p	0.0540
disclosure	原始机制结果已保留；该机制为透明候选筛选后采用，需在内部报告披露筛选过程与规格敏感性。

mechanism_screening.csv

candidate	a	b	ab	a_p	b_p	sobel_p	status	note
roa	0.039198	0.305093	0.011959	0.3424	0.0000	0.3454	not_supported	transparent mechanism screening
lev	0.025637	-0.053351	-0.001368	0.5607	0.2854	0.6082	not_supported	transparent mechanism screening
size	0.085088	0.234525	0.019955	0.0398	0.0000	0.0540	supported	transparent mechanism screening
growth	-0.023456	-0.082243	0.001929	0.5596	0.1155	0.5831	not_supported	transparent mechanism screening
cashflow	0.012568	0.040729	0.000512	0.7551	0.3985	0.7693	not_supported	transparent mechanism screening
tobinq	0.020653	0.023506	0.000485	0.6293	0.6408	0.7366	not_supported	transparent mechanism screening
top1	-0.019938	-0.015554	0.000310	0.6265	0.7660	0.7991	not_supported	transparent mechanism screening
dual	-0.007486	-0.025575	0.000191	0.8627	0.5638	0.8682	not_supported	transparent mechanism screening
board	-0.021813	-0.037615	0.000821	0.6072	0.4610	0.6724	not_supported	transparent mechanism screening
indep	0.039247	-0.032588	-0.001279	0.3141	0.5175	0.5849	not_supported	transparent mechanism screening
soe	0.051570	0.002515	0.000130	0.1998	0.9631	0.9631	not_supported	transparent mechanism screening
age	-0.042025	-0.058351	0.002452	0.3720	0.1905	0.4589	not_supported	transparent mechanism screening

mediation_bootstrap.csv

metric	value
method	Bootstrap percentile (reps=1000)
indirect_effect	.0005309482223866
se	.0025693800719186
ci95_lower	-.0050351792015135
ci95_upper	.0060342594515532
ci95_decision	包含0(不显著)

mediation_results.csv

路径	系数	标准误	说明
c (总效应 dfi_index->patent_count)	0.566553	0.046841	p= 0.0000
a (路径 dfi_index->cashflow)	0.012568	0.040199	p= 0.7551
b (路径 cashflow->patent_count\|控 dfi_index)	0.040729	0.048063	p= 0.3985
c' (直接效应)	0.566041	0.047035	p= 0.0000
a*b (间接效应)	0.000512	0.001745	Sobel Z= 0.2933; p= 0.7693

六、案例图

这是一张由同一份案例数据生成的页面内诊断图。

七、论文里怎么写

本文在共用企业面板样本上报告机制分析，核心输出见 mechanism_feasibility.csv。结果解释时同时关注样本口径、变量构造、系数方向、标准误和适用前提，避免只凭单个 p 值完成方法选择。

八、检查清单

确认本页使用的因变量、核心解释变量、控制变量与论文主模型一致。
先看表格里的样本口径，再看系数、p 值或诊断指标。
代码里的输出文件名要能对应网页展示的结果表。

返回方法库 · 打开 empirical-wizard