我正在尝试为灰度 MRI 数据的二进制分类构建 3D CNN。我是新手,所以不要打任何拳,我是来学习的!我有 20 个 3D 文件的子样本,尺寸为 (189、233、197)。我使用 np.reshape 添加 aa 维度作为通道来获得 (189, 233, 197, 1)。我使用 tf.shape 来获取数据集的形状,即
<tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 20, 189, 233, 197, 1], dtype=int32)>
在标签数据上也是如此
<tf.Tensor: shape=(1,), dtype=int32, numpy=array([20], dtype=int32)>
以下是我正在使用的完整代码:
import numpy as np
import glob
import os
import tensorflow as tf
import pandas as pd
import glob
import SimpleITK as sitk
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from google.colab import drive
drive.mount('/content/gdrive')
datapath = ('/content/gdrive/My Drive/DirectoryTest/All Data/')
patients = os.listdir(datapath)
labels_df = pd.read_csv('/content/Data_Index.csv', index_col = 0 )
FullDataSet = []
for patient in patients:
a = sitk.ReadImage(datapath + patient)
b = sitk.GetArrayFromImage(a)
c = np.reshape(b, (189,233,197))
FullDataSet.append(c)
labelset = []
for i in patients:
label = labels_df.loc[i, 'Group']
if label == 'AD': # use `==` instead of `is` to compare strings
labelset.append(0.)
elif label == 'CN':
labelset.append(1.)
else:
raise "Oops, unknown label"
labelset = np.array(labelset)
x_train, x_valid, y_train, y_valid = train_test_split(FullDataSet, labelset, train_size=0.75)
## 3D CNN
CNN_model = tf.keras.Sequential(
[
#tf.keras.layers.Reshape([189, 233, 197, 1], input_shape=[189, 233, 197]),
tf.keras.layers.Input(shape =[ 189, 233, 197, 1] ),
tf.keras.layers.Conv3D(kernel_size=(7, 7, 7), filters=32, activation='relu',
padding='same', strides=(3, 3, 3)),
#tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPool3D(pool_size=(3, 3, 3), padding='same'),
tf.keras.layers.Dropout(0.20),
tf.keras.layers.Conv3D(kernel_size=(5, 5, 5), filters=64, activation='relu',
padding='same', strides=(3, 3, 3)),
#tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPool3D(pool_size=(2, 2, 2), padding='same'),
tf.keras.layers.Dropout(0.20),
tf.keras.layers.Conv3D(kernel_size=(3, 3, 3), filters=128, activation='relu',
padding='same', strides=(1, 1, 1)),
#tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPool3D(pool_size=(2, 2, 2), padding='same'),
tf.keras.layers.Dropout(0.20),
# last activation could be either sigmoid or softmax, need to look into this more. Sig for binary output, Soft for multi output
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dropout(0.20),
tf.keras.layers.Dense(1, activation='sigmoid')
])
# Compile the model
CNN_model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.00001), loss='binary_crossentropy', metrics=['accuracy'])
# print model layers
CNN_model.summary()
CNN_history = CNN_model.fit(x_train, y_train, epochs=10, validation_data=[x_valid, y_valid])
当我尝试拟合模型时,维度似乎没有对齐,并且出现以下错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-48-c698c45a4d36> in <module>()
1 #running of the model
2 #CNN_history = CNN_model.fit(dataset_train, epochs=100, validation_data =dataset_test, validation_steps=1)
----> 3 CNN_history = CNN_model.fit(x_train, y_train, epochs=10, validation_data=[x_valid, y_valid], batch_size = 1)
4
5
3 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1061 use_multiprocessing=use_multiprocessing,
1062 model=self,
-> 1063 steps_per_execution=self._steps_per_execution)
1064
1065 # Container that configures and calls `tf.keras.Callback`s.
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/data_adapter.py in __init__(self, x, y, sample_weight, batch_size, steps_per_epoch, initial_epoch, epochs, shuffle, class_weight, max_queue_size, workers, use_multiprocessing, model, steps_per_execution)
1115 use_multiprocessing=use_multiprocessing,
1116 distribution_strategy=ds_context.get_strategy(),
-> 1117 model=model)
1118
1119 strategy = ds_context.get_strategy()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/data_adapter.py in __init__(self, x, y, sample_weights, sample_weight_modes, batch_size, epochs, steps, shuffle, **kwargs)
280 label, ", ".join(str(i.shape[0]) for i in nest.flatten(data)))
281 msg += "Please provide data which shares the same first dimension."
--> 282 raise ValueError(msg)
283 num_samples = num_samples.pop()
284
ValueError: Data cardinality is ambiguous:
x sizes: 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189
y sizes: 15
Please provide data which shares the same first dimension.
训练拆分设置为 0.75,因此 20 个中有 15 个。我很困惑为什么这不起作用并且无法弄清楚为什么这是模型正在接收的输入。我之前得到了一些帮助,使用以下代码创建一个虚拟集会导致模型运行:
train_size = 20
val_size = 5
X_train = np.random.random([train_size, 189, 233, 197]).astype(np.float32)
X_valid = np.random.random([val_size, 189, 233, 197]).astype(np.float32)
y_train = np.random.randint(2, size=train_size).astype(np.float32)
y_valid = np.random.randint(2, size=val_size).astype(np.float32)
在这个问题上,我已经把头撞在墙上一段时间了。任何帮助将不胜感激。