我正在尝试在 Python 中使用串行自相关或 AR1 错误来拟合逻辑回归。不幸的是,statsmodels
还没有走到这一步;但是,R-packageglmmTMB
有。我很接近(似乎)通过rpy2
Python实现这一点,但我被困住了。
我的 R 技能(和错误 RRuntimeError: Error in na.fail.default(as.ts(x)) : missing values in object
:)建议需要修改na.action
函数的参数glmmTMB
(在包中glmmTMB
),但这在 R 中不是必需的——我的示例适合那里并且很高兴。所以,我怀疑 Python-to-R 链中的其他东西是错误的。
我的钱花在变量time
和. 有人看到我能做什么吗?我没有正确创建因子变量吗?group
ar
formula
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from rpy2 import robjects as ro
import pandas as pd
# Make some data.
data = {'Success': [10, 20, 30, 11, 14, 16, 18, 29, 17, 19],
'Failure': [12, 25, 61, 8, 22, 21, 10, 16, 15, 19]}
df = pd.DataFrame(data)
# Allow rpy2 to talk to pandas.
pandas2ri.activate()
# Bring in some R stuff.
base = importr('base')
utils = importr('utils')
stats = importr('stats')
glmmTMB = importr('glmmTMB')
# Modify the dataframe for use with glmmTMB.
df['time'] = range(1, len(df) + 1)
df['group'] = 1
# Convert df to an R dataframe.
r_df = pandas2ri.py2ri(df)
# Add in group as a factor to R dataframe r_df.
col_group = ro.vectors.FactorVector(r_df.rx2('group'))
assessor_col_index = df.columns.get_loc('group') # python
r_df[assessor_col_index] = col_group
# Add in time as a factor to R dataframe r_df.
col_time = ro.vectors.FactorVector(r_df.rx2('time'))
assessor_col_index = df.columns.get_loc('time') # python
r_df[assessor_col_index] = col_time
# Take a look at stuff. Looks correct.
print(r_df)
print(col_group)
print(col_time)
# This works. Normal outcome.
m0 = ro.r.glmmTMB(formula=ro.r('Success ~ 1'), data=r_df)
# print(m0[1][0])
# This works. Logistic outcome.
m1 = ro.r.glmmTMB(formula=ro.r('cbind(Success, Failure) ~ 1'), family=ro.r('binomial'), data=r_df)
# print(m1[1][0])
# This does not work. AR1 with factors, per documentation.
# m2 = ro.r.glmmTMB(formula=ro.r("cbind(Success, Failure) ~ 1 + ar(time + 0 | group)"), family=ro.r('binomial'), data=r_df)
# m2[1][0]
# Added for clarity, 2021-03-08: Other variations using na_action argument.
# m3 = ro.r.glmmTMB(formula=ro.r("cbind(Success, Failure) ~ 1 + ar(time + 0 | group)"), family=ro.r('binomial'), data=r_df, na_action=na_omit)
# name 'na_omit' is not defined.
# m4 = ro.r.glmmTMB(formula=ro.r("cbind(Success, Failure) ~ 1 + ar(time + 0 | group)"), family=ro.r('binomial'), data=r_df, na_action=ro.r(na_omit))
# name 'na_omit' is not defined.
# m5 = ro.r.glmmTMB(formula=ro.r("cbind(Success, Failure) ~ 1 + ar(time + 0 | group)"), family=ro.r('binomial'), data=r_df, na_action='na_omit')
# Error in na.fail.default(as.ts(x)): missing values in object
# m6 = ro.r.glmmTMB(formula=ro.r("cbind(Success, Failure) ~ 1 + ar(time + 0 | group)"), family=ro.r('binomial'), data=r_df, na_action=ro.r('na_omit'))
# Error in (function(expr, envir=parent.frame(), enclos=if(is.list(envir) || object 'na_omit' not found)))
# m7 = ro.r.glmmTMB(formula=ro.r("cbind(Success, Failure) ~ 1 + ar(time + 0 | group)"), family=ro.r('binomial'), data=r_df, na_action=na.omit)
# name 'na' is not defined
# m8 = ro.r.glmmTMB(formula=ro.r("cbind(Success, Failure) ~ 1 + ar(time + 0 | group)"), family=ro.r('binomial'), data=r_df, na_action=ro.r(na.omit))
# name 'na' is not defined
# m9 = ro.r.glmmTMB(formula=ro.r("cbind(Success, Failure) ~ 1 + ar(time + 0 | group)"), family=ro.r('binomial'), data=r_df, na_action='na.omit')
# Error in na.fail.default(as.ts(x)): missing values in object
# m10 = ro.r.glmmTMB(formula=ro.r("cbind(Success, Failure) ~ 1 + ar(time + 0 | group)"), family=ro.r('binomial'), data=r_df, na_action=ro.r('na.omit'))
# Error in na.fail.default(as.ts(x)): missing values in object