我正在尝试训练一个序列到序列的自动编码器。输入形状是[sample, time_step, input_dim]
,每个样本的时间步长彼此不同。这是我的模型的实现:
input_dim = 385
output_dim = 32
cells = int(output_dim / 2)
def create_model():
model = Sequential()
model.add(Input(shape=(None, input_dim,), ragged=True))
model.add(TimeDistributed(Dense(output_dim, activation='relu')))
model.add(LSTM(cells, return_sequences=True))
model.add(TimeDistributed(Dense(output_dim, activation='relu')))
model.add(TimeDistributed(Dense(input_dim, activation='sigmoid')))
return model
我的初始数据是 Python 列表,格式为: [list(), list(), list()...]
,其中每个列表list()
都是一个序列并且具有 shape [time_step, input_dim]
。我通过这个函数将数据转换为 Tensorflow RaggedTensor:
def process_data(data):
data_shape = [len(i) for i in data]
data = [item for sublist in data for item in sublist]
data = to_categorical(data, num_classes=input_dim)
data = tf.RaggedTensor.from_row_lengths(data, data_shape)
return data
这是我的训练代码:
model = create_model()
model.compile(optimizer='adam', loss='mse')
# Load training data for autoencoder
train_ae = np.load('dataset/ADFA-LD/train_ae.npy', allow_pickle=True)
train_ae = process_data(train_ae)
# Load validation data (a combination of normal and malware data)
val_norm = np.load('dataset/ADFA-LD/val_norm.npy', allow_pickle=True)
val_att = np.load('dataset/ADFA-LD/val_att.npy', allow_pickle=True)
y_val = np.concatenate([np.zeros(len(val_norm)), np.ones(len(val_att))])
val_norm = process_data(val_norm)
val_att = process_data(val_att)
x_val = tf.concat([val_norm, val_att], axis=0)
# Train
cb = validate(x_val, y_val)
model.fit(x=train_ae, y=train_ae, epochs=10, callbacks=[cb])
当我训练时,出现错误:
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function *
return step_function(self, iterator)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
return fn(*args, **kwargs)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step **
outputs = model.train_step(data)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\training.py:748 train_step
loss = self.compiled_loss(
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\losses.py:149 __call__
losses = ag_call(y_true, y_pred)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\losses.py:250 call **
y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\losses\util.py:71 squeeze_or_expand_dimensions
y_true, y_pred = confusion_matrix.remove_squeezable_dimensions(
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\confusion_matrix.py:62 remove_squeezable_dimensions
predictions = ops.convert_to_tensor(predictions)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py:1499 convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\constant_op.py:338 _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\constant_op.py:263 constant
return _constant_impl(value, dtype, shape, name, verify_shape=False,
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\constant_op.py:280 _constant_impl
tensor_util.make_tensor_proto(
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\tensor_util.py:550 make_tensor_proto
raise TypeError("Failed to convert object of type %s to Tensor. "
TypeError: Failed to convert object of type <class 'tensorflow.python.ops.ragged.ragged_tensor.RaggedTensor'> to Tensor. Contents: tf.RaggedTensor(values=Tensor("sequential/time_distributed_2/dense_2/Sigmoid:0", shape=(None, 385), dtype=float32), row_splits=Tensor("sequential/time_distributed_2/RaggedFromRowLengths/control_dependency:0", shape=(None,), dtype=int64)). Consider casting elements to a supported type.
我假设错误是由于从 RaggedTensor 到 Tensor 的转换失败,从日志中您可以看到错误是在与损失和预测相关的函数中引起的:
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\training.py:748 train_step
loss = self.compiled_loss(
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\losses.py:149 __call__
losses = ag_call(y_true, y_pred)
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\losses.py:250 call **
y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\losses\util.py:71 squeeze_or_expand_dimensions
y_true, y_pred = confusion_matrix.remove_squeezable_dimensions(
C:\Users\hennm\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\confusion_matrix.py:62 remove_squeezable_dimensions
predictions = ops.convert_to_tensor(predictions)
所以我认为损失函数是以尝试转换y_pred
为张量的方式实现的,因此不能使用 RaggedTensor 类型的输出(预测)。如果是这样的话,还有什么办法处理吗?