如何允许函数的一个参数在组之间变化而其他参数适合所有组?
我正在使用 lmfit 来拟合疾病传播的模型。我希望函数的指数适合所有数据点,但是比例因子需要在不同组之间有所不同(作为不同年份繁殖率不同的疾病的代表)。
请参阅下面的代码:
#### create parameters ####
params = Parameters()
params.add('tau_1', value=1.,min=0.01)
params.add('tau_2', value=1.,min=0.01)
params.add('rho_1', value=1.,min=0.01)
for i in range(0, len(sorted(variable_data.flu_season.unique()))):
season = str(sorted(variable_data.flu_season.unique())[i])
params.add('theta_' + season, value=1., min = 0.01)
#### model ####
def pop_dist_inverse_grav(params, dist, host_pop, targ_pop, flu_sea, data):
parvals = params.valuesdict()
tau_1 = parvals['tau_1']
tau_2 = parvals['tau_2']
rho_1 = parvals['rho_1']
theta_1 = parvals['theta_' + str(season)]
grav_model = theta_1 * (np.power(dist, rho_1)) / ((np.power(host_pop, tau_1)) * (np.power(targ_pop, tau_2)))
return grav_model - data
按照 M Newville 的建议,我得到了合适的工作。下面的代码:
import pandas as pd
from lmfit import minimize, Parameters, report_fit
import numpy as np
variable_data = pd.read_csv("../Data/variables_for_model_fit.csv", sep=",", header='infer')
season_list = sorted(variable_data.flu_season.unique())
#### create parameters ####
params1 = Parameters()
params1.add('tau_host', value=0.24,min=0, max = 3)
params1.add('tau_targ', value=0.14,min=0, max = 3)
params1.add('rho', value=0.29,min=0, max = 3)
# "global" parameters
for i, season in enumerate(season_list):
params1.add('theta_%d' % season_list[i], value=1000., min=1, max=1e6)
# creates theta parameters for each season
#### define model ####
def grav_dist_over_pop(dist, hist_pop, targ_pop, theta, rho, tau_host, tau_targ):
return theta**(-1) * dist**rho * host_pop**(-tau_host) * targ_pop**(-tau_targ)
#### objective function ####
def objective_1(params, dist, host_pop, targ_pop, flu_sea, data):
parvals1 = params1.valuesdict()
resid = np.zeros((len(season_data),len(season_data)))
for i, data in enumerate(data):
theta = parvals1['theta_%d' % flu_sea[i]]
rho = parvals1['rho']#_%d' % flu_sea[i]]
tau_host = parvals1['tau_host']#_%d' % flu_sea[i]]
tau_targ = parvals1['tau_targ']#_%d' % flu_sea[i]]
model = grav_dist_over_pop(dist, host_pop, targ_pop, theta, rho, tau_host, tau_targ)
resid[i, :] = model - data
return resid.flatten()
#### fit global variables ####
season_data = variable_data.sample(500)
# my dataset is huge so lmfit takes an age when fitting all of the
# theta values
host_pop = np.asarray(season_data.host_city_pop.values.tolist())
targ_pop = np.asarray(season_data.target_city_pop.values.tolist())
dist = np.asarray(season_data.distance.values.tolist())
data = np.asarray(season_data.time_to_spread.values.tolist())
flu_sea = np.asarray(season_data.flu_season.values.tolist())
result = minimize(objective_1, params1, args=(dist, host_pop, targ_pop, flu_sea, data))
report_fit(result.params)