我正在尝试对具有 950 个样本和大约 5000 个特征的数据使用套索优化。套索函数是 $(1 / (2 * numberofsamples)) * ||y - Xw||^2_2 + alpha * ||w||_1$。一旦我尝试通过初始化进行最小化,我会得到完全不同的 w这很奇怪,因为套索是凸的,初始化不应该影响结果。这是有和没有初始化的套索的结果。tol 是公差。如果 w 的变化低于公差,则收敛已经发生。
tol=0.00000001
##### lasso model errors #####
gene: 5478 matrix error: 0.069611732213
with initialization: alpha: 1e-20 promotion: -3.58847815733e-13
coef: [-0.00214732 -0.00509795 0.00272167 -0.00651548 -0.00164646 -0.00115342
0.00553346 0.01047653 0.00139832]
without initialization: alpha: 1e-20 promotion: -19.0735249749
coef: [-0.03650629 0.08992003 -0.01287155 0.03203973 0.1567577 -0.03708655
-0.13710957 -0.01252736 -0.21710334]
with initialization: alpha: 1e-15 promotion: 1.06179081478e-10
coef: [-0.00214732 -0.00509795 0.00272167 -0.00651548 -0.00164646 -0.00115342
0.00553346 0.01047653 0.00139832]
without initialization: alpha: 1e-15 promotion: -19.0735249463
coef: [-0.03650629 0.08992003 -0.01287155 0.03203973 0.1567577 -0.03708655
-0.13710957 -0.01252736 -0.21710334]
Warning (from warnings module):
File "/usr/local/lib/python2.7/site-packages/sklearn/linear_model/coordinate_descent.py", line 491
ConvergenceWarning)
ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.
with initialization: alpha: 1e-10 promotion: 0.775144987537
coef: [-0.00185139 -0.0048819 0.00218349 -0.00622618 -0.00145647 -0.00115857
0.0055919 0.01072924 0.00043773]
without initialization: alpha: 1e-10 promotion: -17.8649603301
coef: [-0.03581581 0.0892119 -0.01232829 0.03151441 0.15606195 -0.03734093
-0.13604286 -0.01247732 -0.21233529]
with initialization: alpha: 1e-08 promotion: -5.87121366314
coef: [-0. 0. -0. -0.01064477 0. -0.00116167
-0. 0.01114746 0. ]
without initialization: alpha: 1e-08 promotion: 4.05593555389
coef: [ 0. 0.04505117 0.00668611 0. 0.07731668 -0.03537848
-0.03151995 0. -0.00310122]
max promote:
4.05593555389
对于实现,我使用了 python 包 sklearn.linear_model 的 lasso 函数。我也更改了数据,但新数据的结果也会随着初始化而改变。我认为这很奇怪,但我无法分析并找到解释。
这是我的代码的一部分,它与套索有关。我的数据是基因表达。我在规范化和非规范化数据上测试代码。对他们两人来说,最初的观点有所不同。
alpha_lasso = [1e-20,1e-15, 1e-10, 1e-8, 1e-7,1e-6,1e-5,1e-4, 1e-3,1e-2, 1, 5 ,20]
lassoreg = Lasso(alpha=alpha_lasso[i],warm_start=True,tol=0.00000001,max_iter=100000)
lassoreg.coef_ = mybeta[:,j-c]
lassoreg.fit(train[:,predictors],train[:,y])
y_train_pred = lassoreg.predict(A)#train[:,predictors])
y_test_pred = lassoreg.predict(C)#test[:,predictors])
这也是我的整个代码:
import pandas as pd
import random
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from GEOparse.GEOTypes import (GSE, GSM, GPL, GDS,
GDSSubset, GEODatabase,
DataIncompatibilityException,
NoMetadataException,
)
import GEOparse as GEO
import numpy as np
import copy
import sys
import math
from sklearn import linear_model
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoLars
from sklearn.linear_model import MultiTaskLassoCV
from sklearn.linear_model import coordinate_descent
from sklearn.linear_model import lasso_path, enet_path
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from copy import deepcopy
miss_percent = 0.1
alpha_lasso = [1e-20,1e-15, 1e-10, 1e-8, 1e-7,1e-6,1e-5,1e-4, 1e-3,1e-2, 1, 5 ,20]
mins=[]
maxs=[]
mean_err=[]
alphas=[]
mins1=[]
maxs1=[]
mean_err1=[]
alphas1=[]
#mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
def getdata(percent):
gsd = GEO.get_GEO(geo="GDS4971")
ngsd = gsd.table.replace('null', np.NaN)
ngsd = ngsd.dropna(axis=0, how='any')
ngsd =ngsd.transpose()
dataarray = ngsd.values
data = np.delete(dataarray, [0,1], 0)
x = data.astype(np.float)
r_df = x.shape[0]
c_df = x.shape[1]
r = int(r_df-math.sqrt((1-percent)*r_df))
c = int(c_df-math.sqrt((1-percent)*c_df))
train = x[0:r,:]
test = x[r:r_df,:]
return x,train,test,r_df,c_df,r,c
genedata,train,test,r_df,c_df,r,c = getdata(miss_percent)
predictors = range(0,c)
promotion =[[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
promotion = np.asmatrix(promotion)
#error of ax-b
error_aw_b = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_aw_b = np.asmatrix(error_aw_b)
#error of cw-x
error_cw_x = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_cw_x = np.asmatrix(error_cw_x)
#error of lasso function
error_lasso = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_lasso = np.asmatrix(error_lasso)
promotion1 =[[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
promotion1 = np.asmatrix(promotion)
#error of ax-b
error_aw_b1 = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_aw_b1 = np.asmatrix(error_aw_b)
#error of cw-x
error_cw_x1 = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_cw_x1 = np.asmatrix(error_cw_x)
#error of lasso function
error_lasso1 = [[0.001 for x in range(len(alpha_lasso))] for y in range(c_df-c)]
error_lasso1 = np.asmatrix(error_lasso)
mybeta = #any initialization
###################### LASSO #####################
print("##### lasso model errors #####")
for j in range(c,c+1):
mean_err=[]
print("\n")
y=j
eachMeanError= math.sqrt((np.power(errorC[:,j-c],2)).sum()/(r_df-r))
print("gene: "+str(j)+ " matrix error: "+ str(eachMeanError))
for i in range(0,4):#len(alpha_lasso)):
lassoreg = Lasso(alpha=alpha_lasso[i],warm_start=True,tol=0.00000001,max_iter=100000)
lassoreg.coef_ = mybeta[:,j-c]
lassoreg.fit(train[:,predictors],train[:,y])
y_train_pred = lassoreg.predict(A)#train[:,predictors])
y_test_pred = lassoreg.predict(C)#test[:,predictors])
y_lasso_func = (1/(2*r))*sum(y_train_pred)+sum(abs(lassoreg.coef_))
################## RMS ##################
error_aw_b[j-c,i] = math.sqrt(sum((y_train_pred-train[:,y])**2)/r)
error_lasso[j-c,i] = y_lasso_func
error_cw_x[j-c,i] = math.sqrt(sum((y_test_pred-test[:,y])**2)/(r_df-r))
mins.extend([(error_cw_x.min())])
maxs.extend([(error_cw_x.max())])
promotion[j-c,i] = (((eachMeanError-error_cw_x[j-c,i])/eachMeanError)*100)
print("alpha: "+str(alpha_lasso[i])+ " error_aw_b: "+str(error_aw_b[j-c,i]) + " error_cw_x: " + str(error_cw_x[j-c,i])+" error_lasso: "+str(error_lasso[j-c,i]) + " promotion: " + str(promotion[j-c,i]) )
print("coef: " + str(lassoreg.coef_[1:10]))
lassoreg1 = Lasso(alpha=alpha_lasso[i],tol=0.00000001,max_iter=100000)
lassoreg1.fit(train[:,predictors],train[:,y])
y_train_pred1 = lassoreg1.predict(A)#train[:,predictors])
y_test_pred1 = lassoreg1.predict(C)#test[:,predictors])
y_lasso_func1 = (1/(2*r))*sum(y_train_pred1)+sum(abs(lassoreg1.coef_))
################## RMS ##################
error_aw_b1[j-c,i] = math.sqrt(sum((y_train_pred1-train[:,y])**2)/r)
error_lasso1[j-c,i] = y_lasso_func1
error_cw_x1[j-c,i] = math.sqrt(sum((y_test_pred1-test[:,y])**2)/(r_df-r))
mins1.extend([(error_cw_x1.min())])
maxs1.extend([(error_cw_x1.max())])
promotion1[j-c,i] = (((eachMeanError-error_cw_x1[j-c,i])/eachMeanError)*100)
print("alpha: "+str(alpha_lasso[i])+ " error_aw_b: "+str(error_aw_b1[j-c,i]) + " error_cw_x: " + str(error_cw_x1[j-c,i])+" error_lasso: "+str(error_lasso1[j-c,i]) + " promotion: " + str(promotion1[j-c,i]) )
print("coef: " + str(lassoreg1.coef_[1:10]))
print("\n")
print("max promote:")
print((promotion[j-c,:].max()))
f = open('analyse_col', 'wb')
np.save(f, [promotion,alphas,error_cw_x,mins,maxs])
f.close()
plt.plot(promotion[:,j-c])
plt.ylabel('coef for ')
plt.xlabel('each gene')
plt.show()