我在波士顿房子数据集上玩了一些不同的回归模型。我发现,如果我使用正常的线性模型或岭回归,预测值的形状为 (102, 1),而如果我使用 Lasso 的相同代码,则输出的形状为 (102,)。这是为什么?这使它然后在行ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
中崩溃pearsonr
。
关于如何使下面的代码顺利运行的任何想法?
from sklearn.datasets import load_boston
import numpy as np
import pandas as pd
import sys
def evalOneModel (model, name, X, y, nRuns):
allMse = []
allR2 = []
all_rho_P = []
################ OLS ################
for i in range(nRuns):
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None)
model = model.fit(x_train, y_train)
predictions = model.predict(x_test)
mse = mean_squared_error(y_test, predictions)
r2 = r2_score(y_test, predictions)
allMse.append(mse)
allR2.append(r2)
print(type(y_test))
print(y_test.shape)
print(type(predictions))
print(predictions.shape)
rhoP, pval = pearsonr(y_test, predictions)
rhoP = rhoP[0]
all_rho_P.append(rhoP)
print("run{}={:0.3f}; ".format(i, rhoP), end="")
print(model.coef_)
myTitle = "{} mean={:0.3f}".format(name, np.mean(all_rho_P))
print("")
print(myTitle)
print("")
sys.stdout.flush()
####### MAIN #####
pd.set_option('expand_frame_repr', False)
bosten_data = load_boston()
df = pd.DataFrame(bosten_data.data, columns=bosten_data.feature_names)
df['MEDV'] = bosten_data.target # add the target to the data frame
target = pd.DataFrame(bosten_data.target, columns=["MEDV"])
norm_df = (df - df.mean()) / df.std()
norm_target = (target - target.mean()) / target.std()
X = norm_df[["RM", "AGE", "PTRATIO", "LSTAT"]]
y = norm_target
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from scipy.stats import spearmanr, pearsonr
print("\n\nstarting runs ...\n")
from sklearn import linear_model
model = linear_model.LinearRegression()
evalOneModel (model, "OLS", X, y, 1)
from sklearn.linear_model import Ridge # L2
model = linear_model.Ridge(alpha=1.0)
evalOneModel (model, "Ridge (alpha=1)", X, y, 1)
from sklearn.linear_model import Lasso # L1
model = linear_model.Lasso(alpha=1.0)
evalOneModel (model, "Lasso (alpha=1)", X, y, 1)