在执行 GridSearchCV(来自 scikit-learn)之后,我无法拟合 MLkNN 模型的实例(来自 scikit-multilearn)。我收到一个错误。这是适当的代码:
#From MachineLearningMastery: https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/
def series_to_supervised(n_lags, n_vars, data, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = DataFrame(data)
cols, names = list(), list()
#input sequence t-n, ..., t-1
for i in range(n_lags, 0, -1): #for i in 3 to 0 not including 0
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range (n_vars)]
#forecast sequence t, t+1, ..., t+n
for i in range(0, n_out):
cols.append(df.shift(-i))
if i==0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
agg = concat(cols, axis=1)
agg.columns = names
if dropnan:
agg.dropna(inplace=True)
return agg
def testexamples():
def average_precision_wrapper(estimator, X, y):
if X.ndim == 2:
X = X.reshape((-1)) #[1, 1497] becomes [1497,], needed for average_precision
if y.ndim == 2:
y = y.reshape((-1)) #[1, 1497] ... as above
y_pred = estimator.predict(X).toarray()
return average_precision_score(y, y_pred)
true_values = np.random.choice([0,1], size=(500, 1497), p=[0.99, 0.01])
#Need to convert this to supervised learning. Use previous 2 days to predict (lag=2)
n_lags = 2
n_vars = true_values.shape[1]
all_data = np.asarray(series_to_supervised(n_lags, n_vars, data=true_values))
train_x = all_data[:400, :int(n_vars*n_lags)]
train_y = all_data[:400, int(n_vars*n_lags):]
test_x = all_data[-100:, :int(n_vars*n_lags)]
test_y = all_data[-100:, int(n_vars*n_lags):]
parameters = {'k': range(1,5), 's': [0.5, 0.75, 1]}
checked_model = GridSearchCV(MLkNN(), parameters, scoring='average_precision')
print('type: train_x: ', type(train_x), ' type: train_y: ', type(train_y))
checked_model.fit(train_x, train_y)
完整跟踪:
user@GPU8:~/path/to/dir$ python May15_mlknn.py
type: train_x: <type 'numpy.ndarray'> type: train_y: <type 'numpy.ndarray'>
Traceback (most recent call last):
File "May15_mlknn.py", line 380, in <module>
testexamples()
File "May15_mlknn.py", line 340, in testexamples
checked_model.fit(train_x, train_y)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 945, in fit
return self._fit(X, y, groups, ParameterGrid(self.param_grid))
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 564, in _fit
for parameters in parameter_iterable
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
while self.dispatch_one_batch(iterator):
File "user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
self._dispatch(tasks)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
result = ImmediateResult(func)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 326, in __init__
self.results = batch()
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_validation.py", line 260, in _fit_and_score
test_score = _score(estimator, X_test, y_test, scorer)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_validation.py", line 288, in _score
score = scorer(estimator, X_test, y_test)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/metrics/scorer.py", line 196, in __call__
return self._sign * self._score_func(y, y_pred, **self._kwargs)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/metrics/ranking.py", line 184, in average_precision_score
average, sample_weight=sample_weight)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/metrics/base.py", line 88, in _average_binary_score
y_score = check_array(y_score)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py", line 380, in check_array
force_all_finite)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py", line 243, in _ensure_sparse_format
raise TypeError('A sparse matrix was passed, but dense '
TypeError: A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
我已经看过这个和这个和这个。我的问题不同,因为我检查了 train_x 和 train_y 的类型,它们都是密集的 numpy 数组。
我做错了什么,我该如何解决?
编辑:
我现在正在尝试下面提供的答案,但由于我得到的错误而进行了修改(在此处回答):
def average_precision_wrapper(estimator, X, y):
if X.ndim == 2:
X = X.reshape((-1)) #(1, 1497) becomes (1497,), needed for average_precision
if y.ndim == 2:
y = y.reshape((-1)) #(1, 1497) ... as above
y_pred = estimator.predict(X).toarray()
return average_precision_score(y, y_pred)
编辑2:毕竟那不好。我明白了ValueError: query data dimension must match training data dimension
。这是跟踪:
/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
DeprecationWarning)
Traceback (most recent call last):
File "May15_mlknn_to_so.py", line 393, in <module>
testexamples()
File "May15_mlknn_to_so.py", line 353, in testexamples
checked_model.fit(train_x, train_y)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 945, in fit
return self._fit(X, y, groups, ParameterGrid(self.param_grid))
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 564, in _fit
for parameters in parameter_iterable
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
while self.dispatch_one_batch(iterator):
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
self._dispatch(tasks)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
result = ImmediateResult(func)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 326, in __init__
self.results = batch()
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_validation.py", line 260, in _fit_and_score
test_score = _score(estimator, X_test, y_test, scorer)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_validation.py", line 288, in _score
score = scorer(estimator, X_test, y_test)
File "May15_mlknn_to_so.py", line 307, in average_precision_wrapper
y_pred = estimator.predict(X).toarray()
File "May15_mlknn_to_so.py", line 237, in predict
self.knn_.kneighbors(X, self.k + self.ignore_first_neighbours, return_distance=False)]
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/neighbors/base.py", line 381, in kneighbors
for s in gen_even_slices(X.shape[0], n_jobs)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 758, in __call__
while self.dispatch_one_batch(iterator):
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 608, in dispatch_one_batch
self._dispatch(tasks)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 571, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 109, in apply_async
result = ImmediateResult(func)
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 326, in __init__
self.results = batch()
File "/user/pkgs/anaconda2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "sklearn/neighbors/binary_tree.pxi", line 1294, in sklearn.neighbors.kd_tree.BinaryTree.query (sklearn/neighbors/kd_tree.c:11337)
ValueError: query data dimension must match training data dimension