我非常陌生,causal inference
所以我所做的就是使用库示例尽可能地接近我的潜在答案。我试图使用 causlaml 库来弄清楚它是如何工作的。我将样本分成control group
、1000x3
、y = target
和treatment group
与对照组相同的形状。但是在图书馆里面的例子中,形状是不同的。代码如下:
from causalml.inference.meta import LRSRegressor
from causalml.inference.meta import XGBTRegressor, MLPTRegressor
from causalml.inference.meta import BaseXRegressor
from causalml.inference.meta import BaseRRegressor
from xgboost import XGBRegressor
from causalml.dataset import synthetic_data
import numpy as np
import pandas as pd
###Prparing data
df = pd.read_csv('sample_data/configuration_900.out',sep = "\s+",header = None)
df.columns = ['x','y','s','theta','curvature','near']
MIDD = 500
# Function to find middle index
def find_midd(input_list): #middleIndex %2 != 0:
middleIndex = int(float(len(input_list))) /2
if middleIndex %2 == 0:
return middleIndex
else:
return middleIndex - 0.5
excess = df.iloc[:,-1]
middleindex = find_midd(excess)
middleindex = int(middleindex)
new_theta_up = df.iloc[:middleindex,3]
new_cur_up = df.iloc[:middleindex,4]
new_exc_up = df.iloc[:middleindex,5]
df_up = {'col1':new_theta_up,'col2':new_cur_up,'col3':new_exc_up}
df_up = pd.DataFrame(df_up)
df_up.columns = ['theta','Curvature','Target']
new_theta_down = df.iloc[middleindex:-1,3]
new_cur_down = df.iloc[middleindex:-1,4]
new_exc_down = df.iloc[middleindex:-1,5]
df_down = {'col1':new_theta_down,'col2':new_cur_down,'col3':new_exc_down}
df_down = pd.DataFrame(df_down)
df_down.columns = ['theta','Curvature','Target']
targ = excess[middleindex]
new_targ = np.repeat(targ,len(df_down))
### Designing the causal model
y = new_targ
X = df_up
y = new_targ
treatment = df_down
### The linear regression estimator
lr = LRSRegressor()
te, lb, ub = lr.estimate_ate(X, treatment, y)
print('Average Treatment Effect (Linear Regression): {:.2f} ({:.2f}, {:.2f})'.format(te[0], lb[0], ub[0]))
它会抛出如下错误:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-37-c86082e758d7> in <module>()
68
69 lr = LRSRegressor()
---> 70 te, lb, ub = lr.estimate_ate(X, treatment, y)
71 print('Average Treatment Effect (Linear Regression): {:.2f} ({:.2f}, {:.2f})'.format(te[0], lb[0], ub[0]))
72
2 frames
/usr/local/lib/python3.7/dist-packages/causalml/inference/meta/utils.py in check_treatment_vector(treatment, control_name)
17 if control_name is not None:
18 assert control_name in treatment, \
---> 19 'Control group level {} not found in treatment vector.'.format(control_name)
20
21
AssertionError: Control group level 0 not found in treatment vector.