我正在尝试构建一个自动编码器,我确定我做错了什么。我尝试将模型的创建与实际训练分开,但这对我来说并没有真正奏效,并且给了我以下错误。
AssertionError: Could not compute output KerasTensor(type_spec=TensorSpec(shape=(None, 310), dtype=tf.float32, name=None), name='dense_7/Sigmoid:0', description="created by layer 'dense_7'")
我正在使用 Kedro 框架完成这一切。我有一个带有管道定义的 pipeline.py 文件和一个带有我想要使用的功能的 nodes.py。到目前为止,这是我的项目结构:
管道.py:
from kedro.pipeline import Pipeline, node
from .nodes.autoencoder_nodes import *
def train_autoencoder_pipeline():
return Pipeline([
# Build neural network
node(
build_models,
inputs=[
"train_x",
"params:autoencoder_n_hidden_layers",
"params:autoencoder_latent_space_size",
"params:autoencoder_regularization_strength",
"params:seed"
],
outputs=dict(
pre_train_autoencoder="pre_train_autoencoder",
pre_train_encoder="pre_train_encoder",
pre_train_decoder="pre_train_decoder"
), name="autoencoder-create-models"
),
# Scale features
node(fit_scaler, inputs="train_x", outputs="autoencoder_scaler", name="autoencoder-fit-scaler"),
node(tranform_scaler, inputs=["autoencoder_scaler", "train_x"], outputs="autoencoder_scaled_train_x", name="autoencoder-scale-train"),
node(tranform_scaler, inputs=["autoencoder_scaler", "test_x"], outputs="autoencoder_scaled_test_x", name="autoencoder-scale-test"),
# Train autoencoder
node(
train_autoencoder,
inputs=[
"autoencoder_scaled_train_x",
"autoencoder_scaled_test_x",
"pre_train_autoencoder",
"pre_train_encoder",
"pre_train_decoder",
"params:autoencoder_epochs",
"params:autoencoder_batch_size",
"params:seed"
],
outputs= dict(
autoencoder="autoencoder",
encoder="encoder",
decoder="decoder",
autoencoder_history="autoencoder_history",
),
name="autoencoder-train-model"
)])
节点.py:
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
import tensorflow as tf
from typing import Dict, Any, Tuple
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import logging
def build_models(data: pd.DataFrame, n_hidden_layers: int, latent_space_size: int, retularization_stregth: float, seed: int) -> Tuple[keras.Model, keras.Model, keras.Model]:
assert n_hidden_layers >= 1, "There must be at least 1 hidden layer for the autoencoder"
n_features = data.shape[1]
tf.random.set_seed(seed)
input_layer = keras.Input(shape=(n_features,))
hidden = keras.layers.Dense(n_features, kernel_regularizer=keras.regularizers.l1(retularization_stregth))(input_layer)
hidden = keras.layers.LeakyReLU()(hidden)
for _ in range(n_hidden_layers - 1):
hidden = keras.layers.Dense(n_features, kernel_regularizer=keras.regularizers.l1(retularization_stregth))(hidden)
hidden = keras.layers.LeakyReLU()(hidden)
encoded = keras.layers.Dense(latent_space_size, activation="sigmoid")(hidden)
hidden = keras.layers.Dense(n_features, kernel_regularizer=keras.regularizers.l1(retularization_stregth))(encoded)
hidden = keras.layers.LeakyReLU()(hidden)
for _ in range(n_hidden_layers - 1):
hidden = keras.layers.Dense(n_features, kernel_regularizer=keras.regularizers.l1(retularization_stregth))(hidden)
hidden = keras.layers.LeakyReLU()(hidden)
decoded = keras.layers.Dense(n_features, activation="sigmoid")(hidden)
# Defines the neural networks
autoencoder = keras.models.Model(inputs=input_layer, outputs=decoded)
encoder = keras.models.Model(inputs=input_layer, outputs=encoded)
decoder = keras.models.Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer="adam", loss="mean_absolute_error")
return dict(
pre_train_autoencoder=autoencoder,
pre_train_encoder=encoder,
pre_train_decoder=decoder
)
def fit_scaler(data: pd.DataFrame) -> MinMaxScaler:
scaler = MinMaxScaler()
scaler.fit(data)
return scaler
def tranform_scaler(scaler: MinMaxScaler, data: pd.DataFrame) -> np.array:
return scaler.transform(data)
def train_autoencoder(
train_x: pd.DataFrame, test_x: pd.DataFrame,
autoencoder: keras.Model, encoder: keras.Model, decoder: keras.Model,
epochs: int, batch_size: int, seed: int) -> Dict[str, Any]:
tf.random.set_seed(seed)
callbacks = [
keras.callbacks.History(),
keras.callbacks.EarlyStopping(patience=3)
]
logging.info(train_x.shape)
logging.info(test_x.shape)
history = autoencoder.fit(
train_x, train_x,
validation_data=(test_x, test_x),
callbacks=callbacks,
epochs=epochs,
batch_size=batch_size
)
return dict(
autoencoder=autoencoder,
encoder=encoder,
decoder=decoder,
autoencoder_history=history,
)
目录.yaml:
autoencoder_scaler:
type: pickle.PickleDataSet
filepath: data/06_models/autoencoder_scaler.pkl
autoencoder:
type: kedro.extras.datasets.tensorflow.TensorFlowModelDataset
filepath: data/06_models/autoencoder.h5
encoder:
type: kedro.extras.datasets.tensorflow.TensorFlowModelDataset
filepath: data/06_models/encoder.h5
decoder:
type: kedro.extras.datasets.tensorflow.TensorFlowModelDataset
filepath: data/06_models/decoder.h5
autoencoder_train_x:
type: pandas.CSVDataSet
filepath: data/04_feature/autoencoder_train_x.csv
autoencoder_test_x:
type: pandas.CSVDataSet
filepath: data/04_feature/autoencoder_test_x.csv
最后是parameters.yaml:
seed: 200
# Autoencoder
autoencoder_n_hidden_layers: 3
autoencoder_latent_space_size: 15
autoencoder_epochs: 100
autoencoder_batch_size: 32
autoencoder_regularization_strength: 0.001
我相信 Keras 没有看到整个图表,因为它们超出了 buld_models 函数的范围,但我不确定是否是这种情况,或者如何解决它。任何帮助,将不胜感激。