python - 无法在 Keras 功能 API 中将 FeatureColumn 与类别列一起使用

Question

我正在使用来自 Tensorflow 2.2 的 Keras 功能 API 来构建一个使用特征列的模型。我按照此处的指南和教程对结构化数据进行分类以制作代码片段：

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers

def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()
    labels = dataframe.pop('target')
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    return ds

if __name__ == '__main__':
    URL = 'https://storage.googleapis.com/applied-dl/heart.csv'
    dataframe = pd.read_csv(URL)
    batch_size = 5  # A small batch sized is used for demonstration purposes
    train_ds = df_to_dataset(dataframe, batch_size=batch_size)
    example_batch = next(iter(train_ds))
    feature_columns = []
    inputs = {'age': tf.keras.layers.Input(name='age', shape=(), dtype='float32'),
              'thal': tf.keras.layers.Input(name='thal', shape=(), dtype='string')}
    feature_columns.append(feature_column.numeric_column('age'))
    thal = feature_column.categorical_column_with_vocabulary_list(
        'thal', ['fixed', 'normal', 'reversible'])
    thal_one_hot = feature_column.indicator_column(thal)
    feature_columns.append(thal_one_hot)
    x = layers.DenseFeatures(feature_columns)(inputs)
    preds = layers.Dense(1)(x)
    model = tf.keras.Model(inputs=inputs, outputs=preds)
    print(model(example_batch))

不幸的是，此代码失败并出现错误：

    Traceback (most recent call last):
  File "test.py", line 42, in <module>
    print(model(example_batch))
  File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 968, in __call__
    outputs = self.call(cast_inputs, *args, **kwargs)
  File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\network.py", line 717, in call
    return self._run_internal_graph(
  File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\network.py", line 837, in _run_internal_graph
    y = self._conform_to_reference_input(y, ref_input=x)
  File "C:\Python38\lib\site-packages\tensorflow\python\keras\engine\network.py", line 961, in _conform_to_reference_input
    tensor = math_ops.cast(tensor, dtype=ref_input.dtype)
  File "C:\Python38\lib\site-packages\tensorflow\python\util\dispatch.py", line 180, in wrapper
    return target(*args, **kwargs)
  File "C:\Python38\lib\site-packages\tensorflow\python\ops\math_ops.py", line 789, in cast
    x = gen_math_ops.cast(x, base_type, name=name)
  File "C:\Python38\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1971, in cast
    _ops.raise_from_not_ok_status(e, name)
  File "C:\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 6653, in raise_from_not_ok_status
    six.raise_from(core._status_to_exception(e.code, message), None)
  File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.UnimplementedError: Cast int64 to string is not supported [Op:Cast]

当我从 feature_columns 中删除类别列“thal”时，它可以正常工作。不幸的是，我需要将类别列传递到我的模型中。我不知道如何解决它。

score 0 · Accepted Answer

无需传入Inputs代码，x =layers.DenseFeatures(feature_columns)因为feature_columns已经包含了Feature Columns所有Features.

使用数据训练模型的完整工作代码heart.csv如下所示：

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers

def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()
    labels = dataframe.pop('target')
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    return ds

URL = 'https://storage.googleapis.com/applied-dl/heart.csv'
dataframe = pd.read_csv(URL)

print(dataframe.head())

batch_size = 5  # A small batch sized is used for demonstration purposes
train_ds = df_to_dataset(dataframe, batch_size=batch_size)

example_batch = next(iter(train_ds))

feature_columns = []

age = feature_column.numeric_column('age')

feature_columns.append(age)


thal = feature_column.categorical_column_with_vocabulary_list(
    'thal', ['fixed', 'normal', 'reversible'])

thal_one_hot = feature_column.indicator_column(thal)

feature_columns.append(thal_one_hot)



print(feature_columns)

x = layers.DenseFeatures(feature_columns)

model = tf.keras.Sequential([
  x,
  layers.Dense(1)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train_ds,
          epochs=5)

#print(model(example_batch))

如果您需要任何其他信息或遇到任何其他错误，请告诉我，我很乐意为您提供帮助。

希望这可以帮助。快乐学习！

python - 无法在 Keras 功能 API 中将 FeatureColumn 与类别列一起使用

1 回答 1

Related

Reference