我正在尝试将一些 MRI 灰质图图像作为 numpy 数组加载,以便它们可以在 sklearn 中进行处理。我的代码导入它们并使用 NiftiMasker 加载它们(NiftiMasker 对图像应用掩码以提取时间序列,我认为这是导致错误的 numpy 数组中的第 4 维)。当我尝试运行此代码时出现 ValueError ,因为由于某种原因,当我只需要一个 3D 数组时,数据被加载为 4D numpy 数组。有没有办法减少数组的维度或其他方法来解决这个错误?这是代码:
import nibabel
import numpy as np
import matplotlib.pyplot as plt
from nilearn.input_data import NiftiMasker
import pandas as pd
import pdb
import os
def getgm(path):
"""Gets the subject number on the folder that contains the scans
and appends them on a list called dirnames"""
gmnames = []
paths = []
for root, dirs, files in os.walk(path):
for filename in files:
gmnames.append(filename)
for gmname in gmnames:
path = os.path.join('Z:\Projects\MTS_Project\spm\dataset', gmname)
paths.append(path)
return paths
print(getgm('Z:\Projects\MTS_Project\spm\dataset'))
#loads MTSData as a list of paths
MTS_data = getgm('Z:\Projects\MTS_Project\spm\dataset')
print(MTS_data[0])
#loads side data from mtsdata.csv
fields = ['ID', 'SIDE']
csv = pd.read_csv('mtsdata.csv', skipinitialspace = True, usecols = fields)
age = csv.SIDE
nifti_masker = NiftiMasker(standardize = False, smoothing_fwhm=2, verbose=1)
gm_maps_masked = []
for data in MTS_data:
gm_maps_masked.append(nifti_masker.fit_transform(data))
#nifti_masker.fit(MTS_data)
#gm_maps_masked = nifti_masker.transform(MTS_data)
#print(gm_maps_masked)
#n_features = gm_maps_masked.shape
#print(n_features)
print("ANOVA + SVR")
# Define the prediction function to be used.
# Here we use a Support Vector Classification, with a linear kernel
from sklearn.svm import SVR
svr = SVR(kernel='linear')
# Dimension reduction
from sklearn.feature_selection import VarianceThreshold, SelectKBest, \
f_regression
# Remove features with too low between-subject variance
variance_threshold = VarianceThreshold(threshold=1)
# Here we use a classical univariate feature selection based on F-test,
# namely Anova.
feature_selection = SelectKBest(f_regression, k=2000)
# We have our predictor (SVR), our feature selection (SelectKBest), and now,
# we can plug them together in a *pipeline* that performs the two operations
# successively:
from sklearn.pipeline import Pipeline
anova_svr = Pipeline([
('variance_threshold', variance_threshold),
('anova', feature_selection),
('svr', svr)])
### Fit and predict
for gmmap in gm_maps_masked:
anova_svr.fit(gm_maps_masked,age)
pdb.set_trace()
age_pred = anova_svr.predict(gm_maps_masked)
这是错误和回溯:
Traceback (most recent call last):
File "learn.py", line 81, in <module>
anova_svr.fit(gm_maps_masked,age)
File "C:\Program Files\Python36\lib\site-packages\sklearn\pipeline.py", line 248, in fit
Xt, fit_params = self._fit(X, y, **fit_params)
File "C:\Program Files\Python36\lib\site-packages\sklearn\pipeline.py", line 213, in _fit
**fit_params_steps[name])
File "C:\Program Files\Python36\lib\site-packages\sklearn\externals\joblib\memory.py", line 362, in __call__
return self.func(*args, **kwargs)
File "C:\Program Files\Python36\lib\site-packages\sklearn\pipeline.py", line 581, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "C:\Program Files\Python36\lib\site-packages\sklearn\base.py", line 520, in fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "C:\Program Files\Python36\lib\site-packages\sklearn\feature_selection\variance_threshold.py", line 64, in fit
X = check_array(X, ('csr', 'csc'), dtype=np.float64)
File "C:\Program Files\Python36\lib\site-packages\sklearn\utils\validation.py", line 433, in check_array
array = np.array(array, dtype=dtype, order=order, copy=copy)
ValueError: could not broadcast input array from shape (4518515) into shape (1)