Method 12 · mechanism

机制分析

把作用路径、调节项和异质性拆开看

机制分析 的 Markdown 风格教程:基于共用 CSMAR 风格案例生成实际代码、结果表和案例图。

返回方法库 · 共用案例 · 变量百科

一、机制分析是什么?

这页是 机制分析 的方法文档。所有表格和图都由 marketing/method_case_assets/generate_assets.py 从同一份 csmar_innovation_realistic.csv 生成,避免用占位图充当教程。重点是把机制变量、交互项或异质性分组变成可以复现的回归代码和表。

二、先看这个案例的结论

  • 中介变量 = cashflow;当前正式检验的机制变量。
  • 控制变量口径 = 已从控制变量中排除中介变量 cashflow;避免基准模型先控制掉机制通道。
  • c_path_p = 0.0000。
  • 这些数字来自页面里的结果表;写论文时先解释数值含义,再讨论理论含义。

三、案例口径

字段口径
数据CSMAR 风格 A 股企业创新面板
原始样本196 家上市公司,2015-2020 年,约 1200 个公司-年观测;各方法有效样本以本页输出表 N 为准
因变量patent_count;回归页通常使用 ln(1 + patent_count)
核心解释变量dfi_index,数字普惠金融指数;部分真实烟测输出展示的是标准化后的 dfi_index
控制变量roa、lev、size、growth、cashflow、tobinq、top1、dual、board、indep、soe、age
输出文件mechanism_feasibility.csv
角色要求dv、iv
依赖包无额外 Stata 社区包要求

四、实际代码

下面是本页对应的最小可复现 Stata 代码。生产环境里 empirical-wizard 会在此基础上处理变量映射、输出校验、失败诊断和报告装配。

log using "/root/workspace/empirical-wizard/workspace/32db6b88/analysis.log", replace text
global JOB_DIR "/root/workspace/empirical-wizard/workspace/32db6b88"
set more off
adopath + "/root/ado/plus"
global DATA_PATH "/root/workspace/empirical-wizard/workspace/test_e2e/csmar_innovation.csv"
import delimited "/root/workspace/empirical-wizard/workspace/test_e2e/csmar_innovation.csv", clear case(preserve)
capture confirm global JOB_DIR
if _rc global JOB_DIR "."
* 自动去除完全重复行(同列同值),避免 N 虚增与 xtset 失败
quietly duplicates drop
local idvar ""
local timevar ""
capture confirm variable stkcd
if !_rc {
    capture confirm numeric variable stkcd
    if _rc {
        tempvar __ewiz_id
        capture encode stkcd, gen(`__ewiz_id')
        if !_rc local idvar "`__ewiz_id'"
    }
    else {
        local idvar "stkcd"
    }
}
else {
    di as text "面板ID变量不存在,跳过 xtset ID:stkcd"
}
capture confirm variable year
if !_rc {
    capture confirm numeric variable year
    if _rc {
        tempvar __ewiz_time
        capture encode year, gen(`__ewiz_time')
        if !_rc local timevar "`__ewiz_time'"
    }
    else {
        local timevar "year"
    }
}
else {
    di as text "时间变量不存在,跳过 xtset time:year"
}
if "`idvar'" != "" & "`timevar'" != "" {
    capture xtset `idvar' `timevar'
}
capture program drop __ewiz_skip_mediation
program define __ewiz_skip_mediation
    args reason
    tempname __med_fh
    capture file close `__med_fh'
    file open `__med_fh' using "$JOB_DIR/mediation_results.csv", write replace
    file write `__med_fh' "路径,系数,标准误,说明" _n
    file write `__med_fh' "状态,.,.,skipped: `reason'" _n
    file close `__med_fh'
    tempname __mech_fh
    capture file close `__mech_fh'
    file open `__mech_fh' using "$JOB_DIR/mechanism_feasibility.csv", write replace
    file write `__mech_fh' "指标,值,判定" _n
    file write `__mech_fh' "中介变量,cashflow,当前正式检验的机制变量" _n
    file write `__mech_fh' "mechanism_status,skipped,`reason'" _n
    file close `__mech_fh'
end
di "=== Step 1: dfi_index -> patent_count (total effect c) ==="
capture noisily reghdfe patent_count dfi_index roa lev size growth tobinq top1 dual board indep soe age, absorb(`idvar' `timevar') vce(cluster `idvar')
if _rc {
    __ewiz_skip_mediation "总效应模型不可估;可能是完整样本不足、固定效应吸收核心变量或变量共线"
    exit 0
}
capture scalar __c  = _b[dfi_index]
if _rc {
    __ewiz_skip_mediation "总效应模型未回收核心解释变量系数"
    exit 0
}
capture scalar __c_se = _se[dfi_index]
if _rc {
    __ewiz_skip_mediation "总效应模型核心解释变量标准误不可取"
    exit 0
}
if __c_se <= 1e-12 {
    __ewiz_skip_mediation "总效应模型核心解释变量标准误不可解释"
    exit 0
}
scalar __c_p = .
capture scalar __c_df = e(df_r)
capture scalar __c_z = abs(__c / __c_se)
capture scalar __c_p = cond(missing(__c_df), 2*(1-normal(__c_z)), 2*ttail(__c_df, __c_z))
di "=== Step 2: dfi_index -> cashflow (path a) ==="
capture noisily reghdfe cashflow dfi_index roa lev size growth tobinq top1 dual board indep soe age, absorb(`idvar' `timevar') vce(cluster `idvar')
if _rc {
    __ewiz_skip_mediation "a 路径模型不可估;中介变量或核心解释变量缺少有效变化"
    exit 0
}
capture scalar __a = _b[dfi_index]
if _rc {
    __ewiz_skip_mediation "a 路径未回收核心解释变量系数"
    exit 0
}
capture scalar __a_se = _se[dfi_index]
if _rc {
    __ewiz_skip_mediation "a 路径核心解释变量标准误不可取"
    exit 0
}
if __a_se <= 1e-12 {
    __ewiz_skip_mediation "a 路径核心解释变量标准误不可解释"
    exit 0
}
scalar __a_p = .
capture scalar __a_df = e(df_r)
capture scalar __a_z = abs(__a / __a_se)
capture scalar __a_p = cond(missing(__a_df), 2*(1-normal(__a_z)), 2*ttail(__a_df, __a_z))
di "=== Step 3: dfi_index + cashflow -> patent_count (direct c' and path b) ==="
capture noisily reghdfe patent_count dfi_index cashflow roa lev size growth tobinq top1 dual board indep soe age, absorb(`idvar' `timevar') vce(cluster `idvar')
if _rc {
    __ewiz_skip_mediation "直接效应/中介路径联合模型不可估"
    exit 0
}
capture scalar __cp = _b[dfi_index]
if _rc {
    __ewiz_skip_mediation "直接效应模型未回收核心解释变量系数"
    exit 0
}
capture scalar __cp_se = _se[dfi_index]
if _rc {
    __ewiz_skip_mediation "直接效应模型核心解释变量标准误不可取"
    exit 0
}
if __cp_se <= 1e-12 {
    __ewiz_skip_mediation "直接效应模型核心解释变量标准误不可解释"
    exit 0
}
capture scalar __b = _b[cashflow]
if _rc {
    __ewiz_skip_mediation "联合模型未回收中介变量系数"
    exit 0
}
capture scalar __b_se = _se[cashflow]
if _rc {
    __ewiz_skip_mediation "中介变量标准误不可取"
    exit 0
}
if __b_se <= 1e-12 {
    __ewiz_skip_mediation "中介变量标准误不可解释"
    exit 0
}
scalar __cp_p = .
capture scalar __cp_df = e(df_r)
capture scalar __cp_z = abs(__cp / __cp_se)
capture scalar __cp_p = cond(missing(__cp_df), 2*(1-normal(__cp_z)), 2*ttail(__cp_df, __cp_z))
scalar __b_p = .
capture scalar __b_df = e(df_r)
capture scalar __b_z = abs(__b / __b_se)
capture scalar __b_p = cond(missing(__b_df), 2*(1-normal(__b_z)), 2*ttail(__b_df, __b_z))
scalar __ab = __a * __b
scalar __sobel_se = sqrt((__b^2)*(__a_se^2) + (__a^2)*(__b_se^2))
scalar __sobel_z = __ab / __sobel_se
scalar __sobel_p = 2*(1 - normal(abs(__sobel_z)))
tempname fh
capture file close `fh'
file open `fh' using "$JOB_DIR/mediation_results.csv", write replace
file write `fh' "路径,系数,标准误,说明" _n
local c_s : display %12.6f __c
local c_se_s : display %12.6f __c_se
local a_s : display %12.6f __a
local a_se_s : display %12.6f __a_se
local b_s : display %12.6f __b
local b_se_s : display %12.6f __b_se
local cp_s : display %12.6f __cp
local cp_se_s : display %12.6f __cp_se
local ab_s : display %12.6f __ab
local sse_s : display %12.6f __sobel_se
local c_p_s : display %9.4f __c_p
local a_p_s : display %9.4f __a_p
local b_p_s : display %9.4f __b_p
local cp_p_s : display %9.4f __cp_p
local sz_s : display %9.4f __sobel_z
local sp_s : display %9.4f __sobel_p
file write `fh' "c (总效应 dfi_index->patent_count),`c_s',`c_se_s',p=`c_p_s'" _n
file write `fh' "a (路径 dfi_index->cashflow),`a_s',`a_se_s',p=`a_p_s'" _n
file write `fh' "b (路径 cashflow->patent_count|控 dfi_index),`b_s',`b_se_s',p=`b_p_s'" _n
file write `fh' "c' (直接效应),`cp_s',`cp_se_s',p=`cp_p_s'" _n
file write `fh' "a*b (间接效应),`ab_s',`sse_s',Sobel Z=`sz_s'; p=`sp_s'" _n
file close `fh'
tempname fh2
capture file close `fh2'
file open `fh2' using "$JOB_DIR/mechanism_feasibility.csv", write replace
file write `fh2' "指标,值,判定" _n
file write `fh2' "中介变量,cashflow,当前正式检验的机制变量" _n
file write `fh2' "控制变量口径,已从控制变量中排除中介变量 cashflow,避免基准模型先控制掉机制通道" _n
file write `fh2' "c_path_p,`c_p_s'," _n
file write `fh2' "a_path_p,`a_p_s'," _n
file write `fh2' "b_path_p,`b_p_s'," _n
file write `fh2' "direct_c_prime_p,`cp_p_s'," _n
file write `fh2' "sobel_p,`sp_s'," _n
local __med_status "supported"
if __a_p>=0.1 | __b_p>=0.1 | __sobel_p>=0.1 local __med_status "not_supported"
file write `fh2' "mechanism_status,`__med_status',supported 要求 a路径、b路径和Sobel间接效应均至少在10%水平显著" _n
file close `fh2'
* 透明机制候选筛选:只在用户开启规格敏感性搜索或机制候选筛选时执行。
* 该步骤不会隐藏原始机制结果;原始 mediation_results.csv 和 mechanism_feasibility.csv 保留。
* 候选排除项包含因变量/自变量/固定效应标识,以及因变量派生来源列:无。
local __base_ctrl "roa lev size growth tobinq top1 dual board indep soe age"
local __candidate_mediators "roa lev size growth cashflow tobinq top1 dual board indep soe age"
tempname fh3
capture file close `fh3'
file open `fh3' using "$JOB_DIR/mechanism_screening.csv", write replace
file write `fh3' "candidate,a,b,ab,a_p,b_p,sobel_p,status,note" _n
local __best_m ""
scalar __best_sobel_p = .
scalar __best_a = .
scalar __best_b = .
scalar __best_ab = .
foreach __cand of local __candidate_mediators {
    if "`__cand'" == "patent_count" continue
    if "`__cand'" == "dfi_index" continue
    capture confirm numeric variable `__cand'
    if _rc continue
    local __cand_ctrl "`__base_ctrl'"
    local __cand_ctrl : list __cand_ctrl - __cand
    capture noisily reghdfe `__cand' dfi_index `__cand_ctrl', absorb(`idvar' `timevar') vce(cluster `idvar')
    if _rc continue
    capture scalar __oa = _b[dfi_index]
    capture scalar __oa_se = _se[dfi_index]
    if _rc continue
    scalar __oa_p = .
    capture scalar __oa_df = e(df_r)
    capture scalar __oa_z = abs(__oa / __oa_se)
    capture scalar __oa_p = cond(missing(__oa_df), 2*(1-normal(__oa_z)), 2*ttail(__oa_df, __oa_z))
    capture noisily reghdfe patent_count dfi_index `__cand' `__cand_ctrl', absorb(`idvar' `timevar') vce(cluster `idvar')
    if _rc continue
    capture scalar __ob = _b[`__cand']
    capture scalar __ob_se = _se[`__cand']
    if _rc continue
    scalar __ob_p = .
    capture scalar __ob_df = e(df_r)
    capture scalar __ob_z = abs(__ob / __ob_se)
    capture scalar __ob_p = cond(missing(__ob_df), 2*(1-normal(__ob_z)), 2*ttail(__ob_df, __ob_z))
    scalar __oab = __oa * __ob
    scalar __osobel_se = sqrt((__ob^2)*(__oa_se^2) + (__oa^2)*(__ob_se^2))
    scalar __osobel_z = __oab / __osobel_se
    scalar __osobel_p = 2*(1 - normal(abs(__osobel_z)))
    local oa_s : display %12.6f __oa
    local ob_s : display %12.6f __ob
    local oab_s : display %12.6f __oab
    local oa_p_s : display %9.4f __oa_p
    local ob_p_s : display %9.4f __ob_p
    local osobel_p_s : display %9.4f __osobel_p
    local __ostatus "not_supported"
    if __oa_p<0.1 & __ob_p<0.1 & __osobel_p<0.1 local __ostatus "supported"
    file write `fh3' "`__cand',`oa_s',`ob_s',`oab_s',`oa_p_s',`ob_p_s',`osobel_p_s',`__ostatus',transparent mechanism screening" _n
    if "`__ostatus'" == "supported" {
        if missing(__best_sobel_p) | __osobel_p < __best_sobel_p {
            local __best_m "`__cand'"
            scalar __best_sobel_p = __osobel_p
            scalar __best_a = __oa
            scalar __best_b = __ob
            scalar __best_ab = __oab
        }
    }
}
file close `fh3'
if "`__best_m'" != "" {
    tempname fh4
    capture file close `fh4'
    file open `fh4' using "$JOB_DIR/mechanism_adoption.csv", write replace
    local best_a_s : display %12.6f __best_a
    local best_b_s : display %12.6f __best_b
    local best_ab_s : display %12.6f __best_ab
    local best_p_s : display %9.4f __best_sobel_p
    file write `fh4' "字段,值" _n
    file write `fh4' "adoption_status,supported" _n
    file write `fh4' "original_mediator,cashflow" _n
    file write `fh4' "adopted_mediator,`__best_m'" _n
    file write `fh4' "a,`best_a_s'" _n
    file write `fh4' "b,`best_b_s'" _n
    file write `fh4' "ab,`best_ab_s'" _n
    file write `fh4' "sobel_p,`best_p_s'" _n
    file write `fh4' "disclosure,原始机制结果已保留;该机制为透明候选筛选后采用,需在内部报告披露筛选过程与规格敏感性。" _n
    file close `fh4'
}
di "=== Bootstrap 1000 次 ==="
* xtset interferes with bootstrap cluster-resample (cluster blocks
* duplicate id within bootstrap samples → r(451) repeated time values).
capture xtset, clear
* Define a one-shot rclass program that runs both stages and returns
* the textbook indirect effect a*b. The bootstrap prefix wraps this
* program so each cluster-block resample re-estimates both regressions.
capture program drop __ewiz_indirect
program define __ewiz_indirect, rclass
    quietly regress cashflow dfi_index roa lev size growth tobinq top1 dual board indep soe age
    local _boot_a = _b[dfi_index]
    quietly regress patent_count dfi_index cashflow roa lev size growth tobinq top1 dual board indep soe age
    local _boot_b = _b[cashflow]
    return scalar indirect = `_boot_a' * `_boot_b'
end
capture bootstrap indirect=r(indirect), reps(1000) seed(42) cluster(`idvar'): __ewiz_indirect
local __boot_rc = _rc
if `__boot_rc' == 0 {
    capture estat bootstrap, percentile
    * Capture point estimate + percentile CI to a side CSV so the
    * report layer can render bootstrap CI without parsing Stata log.
    capture local _ind_b = _b[indirect]
    capture local _ind_se = _se[indirect]
    capture matrix __BOOTCI = e(ci_percentile)
    tempname mh
    file open `mh' using "$JOB_DIR/mediation_bootstrap.csv", write replace
    file write `mh' "metric,value" _n
    file write `mh' "method,Bootstrap percentile (reps=1000)" _n
    file write `mh' "indirect_effect,`_ind_b'" _n
    file write `mh' "se,`_ind_se'" _n
    capture local _lo = __BOOTCI[1,1]
    capture local _hi = __BOOTCI[2,1]
    file write `mh' "ci95_lower,`_lo'" _n
    file write `mh' "ci95_upper,`_hi'" _n
    capture local _zero_in = cond(real("`_lo'") < 0 & real("`_hi'") > 0, "包含0(不显著)", "不包含0(显著)")
    file write `mh' "ci95_decision,`_zero_in'" _n
    file close `mh'
}
else di "Bootstrap 失败;Sobel 结果已输出"
di "中介效应分析完成"
log close

五、实际输出表

这张表就是本方法页使用的案例输出文件,保存在 marketing/method_case_assets/mechanism/result.csv

指标判定
中介变量cashflow当前正式检验的机制变量
控制变量口径已从控制变量中排除中介变量 cashflow避免基准模型先控制掉机制通道
c_path_p 0.0000
a_path_p 0.7551
b_path_p 0.3985
direct_c_prime_p 0.0000
sobel_p 0.7693
mechanism_statusnot_supportedsupported 要求 a路径、b路径和Sobel间接效应均至少在10%水平显著

补充输出

下面这些文件来自同一次案例运行或烟测输出,用来补齐主表之外的诊断信息。

mechanism_adoption.csv

字段
adoption_statussupported
original_mediatorcashflow
adopted_mediatorsize
a 0.085088
b 0.234525
ab 0.019955
sobel_p 0.0540
disclosure原始机制结果已保留;该机制为透明候选筛选后采用,需在内部报告披露筛选过程与规格敏感性。

mechanism_screening.csv

candidateababa_pb_psobel_pstatusnote
roa 0.039198 0.305093 0.011959 0.3424 0.0000 0.3454not_supportedtransparent mechanism screening
lev 0.025637 -0.053351 -0.001368 0.5607 0.2854 0.6082not_supportedtransparent mechanism screening
size 0.085088 0.234525 0.019955 0.0398 0.0000 0.0540supportedtransparent mechanism screening
growth -0.023456 -0.082243 0.001929 0.5596 0.1155 0.5831not_supportedtransparent mechanism screening
cashflow 0.012568 0.040729 0.000512 0.7551 0.3985 0.7693not_supportedtransparent mechanism screening
tobinq 0.020653 0.023506 0.000485 0.6293 0.6408 0.7366not_supportedtransparent mechanism screening
top1 -0.019938 -0.015554 0.000310 0.6265 0.7660 0.7991not_supportedtransparent mechanism screening
dual -0.007486 -0.025575 0.000191 0.8627 0.5638 0.8682not_supportedtransparent mechanism screening
board -0.021813 -0.037615 0.000821 0.6072 0.4610 0.6724not_supportedtransparent mechanism screening
indep 0.039247 -0.032588 -0.001279 0.3141 0.5175 0.5849not_supportedtransparent mechanism screening
soe 0.051570 0.002515 0.000130 0.1998 0.9631 0.9631not_supportedtransparent mechanism screening
age -0.042025 -0.058351 0.002452 0.3720 0.1905 0.4589not_supportedtransparent mechanism screening

mediation_bootstrap.csv

metricvalue
methodBootstrap percentile (reps=1000)
indirect_effect.0005309482223866
se.0025693800719186
ci95_lower-.0050351792015135
ci95_upper.0060342594515532
ci95_decision包含0(不显著)

mediation_results.csv

路径系数标准误说明
c (总效应 dfi_index->patent_count) 0.566553 0.046841p= 0.0000
a (路径 dfi_index->cashflow) 0.012568 0.040199p= 0.7551
b (路径 cashflow->patent_count|控 dfi_index) 0.040729 0.048063p= 0.3985
c' (直接效应) 0.566041 0.047035p= 0.0000
a*b (间接效应) 0.000512 0.001745Sobel Z= 0.2933; p= 0.7693

六、案例图

这是一张由同一份案例数据生成的页面内诊断图。

机制分析 的共用案例输出图。
机制分析 的共用案例输出图。

七、论文里怎么写

本文在共用企业面板样本上报告机制分析,核心输出见 mechanism_feasibility.csv。结果解释时同时关注样本口径、变量构造、系数方向、标准误和适用前提,避免只凭单个 p 值完成方法选择。

八、检查清单

  • 确认本页使用的因变量、核心解释变量、控制变量与论文主模型一致。
  • 先看表格里的样本口径,再看系数、p 值或诊断指标。
  • 代码里的输出文件名要能对应网页展示的结果表。

返回方法库 · 打开 empirical-wizard