python - Python 幂律符合使用 ODR 的数据中的上限和不对称错误

Question

我正在尝试使用 python 将一些数据拟合到幂律中。问题是我的一些观点是上限，我不知道如何将其包含在拟合程序中。

在数据中，我将上限设置为 y 中的误差等于 1，而其余的则要小得多。您可以将此错误设置为 0 并更改 uplims 列表生成器，但这样的配合很糟糕。

代码如下：

import numpy as np
import matplotlib.pyplot as plt
from scipy.odr import *

# Initiate some data
x = [1.73e-04, 5.21e-04, 1.57e-03, 4.71e-03, 1.41e-02, 4.25e-02, 1.28e-01, 3.84e-01, 1.15e+00]
x_err = [1e-04, 1e-04, 1e-03, 1e-03, 1e-02, 1e-02, 1e-01, 1e-01, 1e-01]
y = [1.26e-05, 8.48e-07, 2.09e-08, 4.11e-09, 8.22e-10, 2.61e-10, 4.46e-11, 1.02e-11, 3.98e-12]
y_err = [1, 1, 2.06e-08, 2.5e-09, 5.21e-10, 1.38e-10, 3.21e-11, 1, 1]

# Define upper limits
uplims = np.ones(len(y_err),dtype='bool')
for i in range(len(y_err)):
    if y_err[i]<1:
        uplims[i]=0
    else:
        uplims[i]=1

# Define a function (power law in our case) to fit the data with.
def function(p, x):
     m, c = p
     return m*x**(-c)

# Create a model for fitting.
model = Model(function)

# Create a RealData object using our initiated data from above.
data = RealData(x, y, sx=x_err, sy=y_err)

# Set up ODR with the model and data.
odr = ODR(data, model, beta0=[1e-09, 2])
odr.set_job(fit_type=0)   # 0 is full ODR and 2 is least squares; AFAIK, it doesn't change within errors
# more details in https://docs.scipy.org/doc/scipy/reference/generated/scipy.odr.ODR.set_job.html

# Run the regression.
out = odr.run()


# Use the in-built pprint method to give us results.
#out.pprint()   #this prints much information, but normally we don't need it, just the parameters and errors; the residual variation is the reduced chi square estimator

print('amplitude = %5.2e +/- %5.2e \nindex = %5.2f +/- %5.2f \nchi square = %12.8f'% (out.beta[0], out.sd_beta[0], out.beta[1], out.sd_beta[1], out.res_var))

# Generate fitted data.
x_fit = np.linspace(x[0], x[-1], 1000)    #to do the fit only within the x interval; we can always extrapolate it, of course
y_fit = function(out.beta, x_fit)


# Generate a plot to show the data, errors, and fit.
fig, ax = plt.subplots()

ax.errorbar(x, y, xerr=x_err, yerr=y_err, uplims=uplims, linestyle='None', marker='x')
ax.loglog(x_fit, y_fit)
ax.set_xlabel(r'$x$')
ax.set_ylabel(r'$f(x) = m·x^{-c}$')
ax.set_title('Power Law fit')

plt.show()

拟合的结果是：

amplitude = 3.42e-12 +/- 5.32e-13
index =  1.33 +/-  0.04
chi square =   0.01484021

正如您在图中看到的那样，前两个点和最后两个点是上限，拟合没有考虑到它们。此外，在倒数第二点，即使严格禁止，拟合也会超过它。

我需要拟合知道这个限制非常严格，而不是试图拟合点本身，而只是将它们视为限制。我怎么能用 odr 例程（或任何其他让我适合并给我卡方估计器的代码）来做到这一点？

请考虑到我需要轻松地将函数更改为其他泛化，因此幂律模块之类的东西是不可取的。

谢谢！

score 0 · Accepted Answer

这个答案与这篇文章有关，我在这篇文章中讨论了拟合x和y错误。因此，这不需要ODR模块，但可以手动完成。因此，可以使用leastsqor minimize。关于约束，我在其他帖子中明确表示，如果可能的话，我会尽量避免它们。这也可以在这里完成，尽管编程和数学的细节有点麻烦，特别是如果它应该是稳定和万无一失的。我只是给出一个粗略的想法。说我们想要y0 > m * x0**(-c)。在日志形式中，我们可以将其写为eta0 > mu - c * xeta0. 即有一个alpha这样的eta0 = mu - c * xeta0 + alpha**2。其他不等式也一样。对于第二个上限，您将获得beta**2但是您可以决定哪个是较小的，因此您会自动满足另一个条件。gamma**2同样的事情适用于 a和 a的下限delta**2。假设我们可以使用alphaand gamma。我们也可以结合不等式条件来将这两者联系起来。最后，我们可以拟合 asigma和alpha = sqrt(s-t)* sigma / sqrt( sigma**2 + 1 )，其中s和t是从不等式推导出来的。该sigma / sqrt( sigma**2 + 1 )函数只是让alpha在一定范围内变化的一种选择，即alpha**2 < s-tradicand可能变为负数的事实表明存在没有解决方案的情况。用alpha已知的mu，因此m是计算出来的。所以拟合参数是c和sigma，它考虑了不等式并使得m依赖。我厌倦了它并且它可以工作，但是手头的版本不是最稳定的版本。我会根据要求发布它。

由于我们已经有了手工残差函数，但我们还有第二个选择。我们只是介绍我们自己的chi**2function 和 use minimize，它允许约束。asminimize和constraints关键字解决方案非常灵活，残差函数很容易修改为其他函数，不仅对于m * x**( -c )整体构造非常灵活。它看起来如下：

import matplotlib.pyplot as plt
import numpy as np
from random import random, seed
from scipy.optimize import minimize,leastsq

seed(7563)
fig1 = plt.figure(1)


###for gaussion distributed errors
def boxmuller(x0,sigma):
    u1=random()
    u2=random()
    ll=np.sqrt(-2*np.log(u1))
    z0=ll*np.cos(2*np.pi*u2)
    z1=ll*np.cos(2*np.pi*u2)
    return sigma*z0+x0, sigma*z1+x0


###for plotting ellipses
def ell_data(a,b,x0=0,y0=0):
    tList=np.linspace(0,2*np.pi,150)
    k=float(a)/float(b)
    rList=[a/np.sqrt((np.cos(t))**2+(k*np.sin(t))**2) for t in tList]
    xyList=np.array([[x0+r*np.cos(t),y0+r*np.sin(t)] for t,r in zip(tList,rList)])
    return xyList

###function to fit
def f(x,m,c):
    y = abs(m) * abs(x)**(-abs(c))
    #~ print y,x,m,c
    return y


###how to rescale the ellipse to make fitfunction a tangent
def elliptic_rescale(x, m, c, x0, y0, sa, sb):
    #~ print "e,r",x,m,c
    y=f( x, m, c ) 
    #~ print "e,r",y
    r=np.sqrt( ( x - x0 )**2 + ( y - y0 )**2 )
    kappa=float( sa ) / float( sb )
    tau=np.arctan2( y - y0, x - x0 )
    new_a=r*np.sqrt( np.cos( tau )**2 + ( kappa * np.sin( tau ) )**2 )
    return new_a

###residual function to calculate chi-square
def residuals(parameters,dataPoint):#data point is (x,y,sx,sy)
    m, c = parameters
    #~ print "m c", m, c
    theData = np.array(dataPoint)
    best_t_List=[]
    for i in range(len(dataPoint)):
        x, y, sx, sy = dataPoint[i][0], dataPoint[i][1], dataPoint[i][2], dataPoint[i][3]
        #~ print "x, y, sx, sy",x, y, sx, sy
        ###getthe point on the graph where it is tangent to an error-ellipse
        ed_fit = minimize( elliptic_rescale, x , args = ( m, c, x, y, sx, sy ) )
        best_t = ed_fit['x'][0]
        best_t_List += [best_t]
        #~ exit(0)
    best_y_List=[ f( t, m, c ) for t in best_t_List ]
    ##weighted distance not squared yet, as this is done by scipy.optimize.leastsq
    wighted_dx_List = [ ( x_b - x_f ) / sx for x_b, x_f, sx in zip( best_t_List,theData[:,0], theData[:,2] ) ]
    wighted_dy_List = [ ( x_b - x_f ) / sx for x_b, x_f, sx in zip( best_y_List,theData[:,1], theData[:,3] ) ]
    return wighted_dx_List + wighted_dy_List


def chi2(params, pnts):  
    r = np.array( residuals( params, pnts ) )
    s = sum( [ x**2 for x in  r]  )
    #~ print params,s,r
    return s


def myUpperIneq(params,pnt):
    m, c = params
    x,y=pnt
    return y - f( x, m, c )


def myLowerIneq(params,pnt):
    m, c = params
    x,y=pnt
    return f( x, m, c ) - y


###to create some test data
def test_data(m,c, xList,const_sx,rel_sx,const_sy,rel_sy):
    yList=[f(x,m,c) for x in xList]
    xErrList=[ boxmuller(x,const_sx+x*rel_sx)[0] for x in xList]
    yErrList=[ boxmuller(y,const_sy+y*rel_sy)[0] for y in yList]
    return xErrList,yErrList


###some start values
mm_0=2.3511
expo_0=.3588
csx,rsx=.01,.07
csy,rsy=.04,.09,

limitingPoints=dict()
limitingPoints[0]=np.array([[.2,5.4],[.5,5.0],[5.1,.9],[5.7,.9]])
limitingPoints[1]=np.array([[.2,5.4],[.5,5.0],[5.1,1.5],[5.7,1.2]])
limitingPoints[2]=np.array([[.2,3.4],[.5,5.0],[5.1,1.1],[5.7,1.2]])
limitingPoints[3]=np.array([[.2,3.4],[.5,5.0],[5.1,1.7],[5.7,1.2]])

####some data
xThData=np.linspace(.2,5,15)
yThData=[ f(x, mm_0, expo_0) for x in xThData]

#~ ###some noisy data
xNoiseData,yNoiseData=test_data(mm_0,  expo_0, xThData, csx,rsx, csy,rsy)
xGuessdError=[csx+rsx*x for x in xNoiseData]
yGuessdError=[csy+rsy*y for y in yNoiseData]



for testing in range(4):
    ###Now fitting with limits
    zipData=zip(xNoiseData,yNoiseData, xGuessdError, yGuessdError)    
    estimate = [ 2.4, .3 ]
    con0={'type': 'ineq', 'fun': myUpperIneq, 'args': (limitingPoints[testing][0],)}
    con1={'type': 'ineq', 'fun': myUpperIneq, 'args': (limitingPoints[testing][1],)}
    con2={'type': 'ineq', 'fun': myLowerIneq, 'args': (limitingPoints[testing][2],)}
    con3={'type': 'ineq', 'fun': myLowerIneq, 'args': (limitingPoints[testing][3],)}
    myResult = minimize( chi2 , estimate , args=( zipData, ), constraints=[ con0, con1, con2, con3 ]  )
    print "############"
    print myResult


    ###plot that
    ax=fig1.add_subplot(4,2,2*testing+1)
    ax.plot(xThData,yThData)
    ax.errorbar(xNoiseData,yNoiseData, xerr=xGuessdError, yerr=yGuessdError, fmt='none',ecolor='r')


    testX = np.linspace(.2,6,25)
    testY = np.fromiter( ( f( x, myResult.x[0], myResult.x[1] ) for x in testX ), np.float)

    bx=fig1.add_subplot(4,2,2*testing+2)
    bx.plot(xThData,yThData)
    bx.errorbar(xNoiseData,yNoiseData, xerr=xGuessdError, yerr=yGuessdError, fmt='none',ecolor='r')
    ax.plot(limitingPoints[testing][:,0],limitingPoints[testing][:,1],marker='x', linestyle='')
    bx.plot(limitingPoints[testing][:,0],limitingPoints[testing][:,1],marker='x', linestyle='')
    ax.plot(testX, testY, linestyle='--')
    bx.plot(testX, testY, linestyle='--')

    bx.set_xscale('log')
    bx.set_yscale('log')

plt.show()

提供结果

############
  status: 0
 success: True
    njev: 8
    nfev: 36
     fun: 13.782127248002116
       x: array([ 2.15043226,  0.35646436])
 message: 'Optimization terminated successfully.'
     jac: array([-0.00377715,  0.00350225,  0.        ])
     nit: 8
############
  status: 0
 success: True
    njev: 7
    nfev: 32
     fun: 41.372277637885716
       x: array([ 2.19005695,  0.23229378])
 message: 'Optimization terminated successfully.'
     jac: array([ 123.95069313, -442.27114677,    0.        ])
     nit: 7
############
  status: 0
 success: True
    njev: 5
    nfev: 23
     fun: 15.946621924326545
       x: array([ 2.06146362,  0.31089065])
 message: 'Optimization terminated successfully.'
     jac: array([-14.39131606, -65.44189298,   0.        ])
     nit: 5
############
  status: 0
 success: True
    njev: 7
    nfev: 34
     fun: 88.306027468763432
       x: array([ 2.16834392,  0.14935514])
 message: 'Optimization terminated successfully.'
     jac: array([ 224.11848736, -791.75553417,    0.        ])
     nit: 7

我检查了四个不同的限制点（行）。结果以对数刻度（列）正常显示。通过一些额外的工作，您也可能会遇到错误。

不对称错误的更新 说实话，目前我不知道如何处理这个属性。天真地，我会定义我自己的不对称损失函数，类似于这篇文章。有错误x和y错误我是按象限来做的，而不是仅仅检查正面或负面。因此，我的错误椭圆变为四个连接的部分。尽管如此，这还是有些合理的。为了测试和展示它是如何工作的，我做了一个线性函数的例子。我猜OP可以根据他的要求将这两段代码组合起来。

在线性拟合的情况下，它看起来像这样：

import matplotlib.pyplot as plt
import numpy as np
from random import random, seed
from scipy.optimize import minimize,leastsq

#~ seed(7563)
fig1 = plt.figure(1)
ax=fig1.add_subplot(2,1,1)
bx=fig1.add_subplot(2,1,2)

###function to fit, here only linear for testing.
def f(x,m,y0):
    y = m * x +y0
    return y

###for gaussion distributed errors
def boxmuller(x0,sigma):
    u1=random()
    u2=random()
    ll=np.sqrt(-2*np.log(u1))
    z0=ll*np.cos(2*np.pi*u2)
    z1=ll*np.cos(2*np.pi*u2)
    return sigma*z0+x0, sigma*z1+x0


###for plotting ellipse quadrants
def ell_data(aN,aP,bN,bP,x0=0,y0=0):
    tPPList=np.linspace(0, 0.5 * np.pi, 50)
    kPP=float(aP)/float(bP)
    rPPList=[aP/np.sqrt((np.cos(t))**2+(kPP*np.sin(t))**2) for t in tPPList]

    tNPList=np.linspace( 0.5 * np.pi, 1.0 * np.pi, 50)
    kNP=float(aN)/float(bP)
    rNPList=[aN/np.sqrt((np.cos(t))**2+(kNP*np.sin(t))**2) for t in tNPList]

    tNNList=np.linspace( 1.0 * np.pi, 1.5 * np.pi, 50)
    kNN=float(aN)/float(bN)
    rNNList=[aN/np.sqrt((np.cos(t))**2+(kNN*np.sin(t))**2) for t in tNNList]

    tPNList = np.linspace( 1.5 * np.pi, 2.0 * np.pi, 50)
    kPN = float(aP)/float(bN)
    rPNList = [aP/np.sqrt((np.cos(t))**2+(kPN*np.sin(t))**2) for t in tPNList]

    tList = np.concatenate( [ tPPList, tNPList, tNNList, tPNList] )
    rList = rPPList + rNPList+ rNNList + rPNList

    xyList=np.array([[x0+r*np.cos(t),y0+r*np.sin(t)] for t,r in zip(tList,rList)])
    return xyList


###how to rescale the ellipse to touch fitfunction at point (x,y)
def elliptic_rescale_asymmetric(x, m, c, x0, y0, saN, saP, sbN, sbP , getQuadrant=False):
    y=f( x, m, c ) 
    ###distance to function
    r=np.sqrt( ( x - x0 )**2 + ( y - y0 )**2 )
    ###angle to function
    tau=np.arctan2( y - y0, x - x0 )
    quadrant=0
    if tau >0:
        if tau < 0.5 * np.pi: ## PP
            kappa=float( saP ) / float( sbP )
            quadrant=1
        else:
            kappa=float( saN ) / float( sbP )
            quadrant=2
    else:
        if tau < -0.5 * np.pi: ## PP
            kappa=float( saN ) / float( sbN)
            quadrant=3
        else:
            kappa=float( saP ) / float( sbN )
            quadrant=4
    new_a=r*np.sqrt( np.cos( tau )**2 + ( kappa * np.sin( tau ) )**2 )
    if quadrant == 1 or quadrant == 4:
        rel_a=new_a/saP
    else:
        rel_a=new_a/saN
    if getQuadrant:
        return rel_a, quadrant, tau
    else:
        return rel_a

### residual function to calculate chi-square
def residuals(parameters,dataPoint):#data point is (x,y,sxN,sxP,syN,syP)
    m, c = parameters
    theData = np.array(dataPoint)
    bestTList=[]
    qqList=[]
    weightedDistanceList = []
    for i in range(len(dataPoint)):
        x, y, sxN, sxP, syN, syP = dataPoint[i][0], dataPoint[i][1], dataPoint[i][2], dataPoint[i][3], dataPoint[i][4], dataPoint[i][5]
        ### get the point on the graph where it is tangent to an error-ellipse
        ### i.e. smallest ellipse touching the graph
        edFit = minimize(  elliptic_rescale_asymmetric, x , args = ( m, c, x, y, sxN, sxP, syN, syP ) )
        bestT = edFit['x'][0]
        bestTList += [ bestT ]
        bestA,qq , tau= elliptic_rescale_asymmetric( bestT, m, c , x, y, aN, aP, bN, bP , True)
        qqList += [ qq ]
    bestYList=[ f( t, m, c ) for t in bestTList ]
    ### weighted distance not squared yet, as this is done by scipy.optimize.leastsq or manual chi2 function
    for counter in range(len(dataPoint)):
        xb=bestTList[counter]
        xf=dataPoint[counter][0]
        yb=bestYList[counter]
        yf=dataPoint[counter][1]
        quadrant=qqList[counter]
        if quadrant == 1:
            sx, sy = sxP, syP
        elif quadrant == 2:
            sx, sy = sxN, syP
        elif quadrant == 3:
            sx, sy = sxN, syN
        elif quadrant == 4:
            sx, sy = sxP, syN
        else:
            assert 0
        weightedDistanceList += [ ( xb - xf ) / sx, ( yb - yf ) / sy ]
    return weightedDistanceList


def chi2(params, pnts):  
    r = np.array( residuals( params, pnts ) )
    s = np.fromiter( ( x**2 for x in  r), np.float ).sum()
    return s

####...to make data with asymmetric error (fixed); for testing only
def noisy_data(xList,m0,y0, sxN,sxP,syN,syP):
    yList=[ f(x, m0, y0) for x in xList]
    gNList=[boxmuller(0,1)[0] for dummy in range(len(xList))]
    xerrList=[]
    for x,err in zip(xList,gNList):
        if err < 0:
            xerrList += [ sxP * err + x ]
        else:
            xerrList += [ sxN * err + x ]
    gNList=[boxmuller(0,1)[0] for dummy in range(len(xList))]
    yerrList=[]
    for y,err in zip(yList,gNList):
        if err < 0:
            yerrList += [ syP * err + y ]
        else:
            yerrList += [ syN * err + y ]
    return xerrList, yerrList


###some start values
m0=1.3511
y0=-2.2
aN, aP, bN, bP=.2,.5, 0.9, 1.6

#### some data
xThData=np.linspace(.2,5,15)
yThData=[ f(x, m0, y0) for x in xThData]
xThData0=np.linspace(-1.2,7,3)
yThData0=[ f(x, m0, y0) for x in xThData0]

### some noisy data
xErrList,yErrList = noisy_data(xThData, m0, y0, aN, aP, bN, bP)

###...and the fit
dataToFit=zip(xErrList,yErrList,  len(xThData)*[aN], len(xThData)*[aP], len(xThData)*[bN], len(xThData)*[bP])
fitResult = minimize(chi2, (m0,y0) , args=(dataToFit,) )
fittedM, fittedY=fitResult.x
yThDataF=[ f(x, fittedM, fittedY) for x in xThData0]


### plot that
for cx in [ax,bx]:
    cx.plot([-2,7], [f(x, m0, y0 ) for x in [-2,7]])

ax.errorbar(xErrList,yErrList, xerr=[ len(xThData)*[aN],len(xThData)*[aP] ], yerr=[ len(xThData)*[bN],len(xThData)*[bP] ], fmt='ro')

for x,y in zip(xErrList,yErrList)[:]:
    xEllList,yEllList = zip( *ell_data(aN,aP,bN,bP,x,y) )
    ax.plot(xEllList,yEllList ,c='#808080')
    ### rescaled
    ### ...as well as a scaled version that touches the original graph. This gives the error shortest distance to that graph
    ed_fit = minimize( elliptic_rescale_asymmetric, 0 ,args=(m0, y0, x, y, aN, aP, bN, bP ) )
    best_t = ed_fit['x'][0]
    best_a,qq , tau= elliptic_rescale_asymmetric( best_t, m0, y0 , x, y, aN, aP, bN, bP , True)
    xEllList,yEllList = zip( *ell_data( aN * best_a, aP * best_a, bN * best_a, bP * best_a, x, y) )
    ax.plot( xEllList, yEllList, c='#4040a0' )

###plot the fit

bx.plot(xThData0,yThDataF)
bx.errorbar(xErrList,yErrList, xerr=[ len(xThData)*[aN],len(xThData)*[aP] ], yerr=[ len(xThData)*[bN],len(xThData)*[bP] ], fmt='ro')
for x,y in zip(xErrList,yErrList)[:]:
    xEllList,yEllList = zip( *ell_data(aN,aP,bN,bP,x,y) )
    bx.plot(xEllList,yEllList ,c='#808080')
    ####rescaled
    ####...as well as a scaled version that touches the original graph. This gives the error shortest distance to that graph
    ed_fit = minimize( elliptic_rescale_asymmetric, 0 ,args=(fittedM, fittedY, x, y, aN, aP, bN, bP ) )
    best_t = ed_fit['x'][0]
    #~ print best_t
    best_a,qq , tau= elliptic_rescale_asymmetric( best_t, fittedM, fittedY , x, y, aN, aP, bN, bP , True)
    xEllList,yEllList = zip( *ell_data( aN * best_a, aP * best_a, bN * best_a, bP * best_a, x, y) )
    bx.plot( xEllList, yEllList, c='#4040a0' )

plt.show()

哪个情节

上图显示了原始线性函数和使用非对称高斯误差由此生成的一些数据。绘制了误差条以及分段误差椭圆（灰色......并重新调整以接触线性函数，蓝色）。下图还显示了拟合函数以及重新缩放的分段椭圆，触及拟合函数。

python - Python 幂律符合使用 ODR 的数据中的上限和不对称错误

1 回答 1

Related

Reference