我正在尝试使用 TPU 在 Google-colaboratory 上微调 BERT 模型。但我总是收到以下错误:
ValueError: 在 (tensorflow.python.在 0x7f6a1fad3390 处分发.tpu_strategy.TPUStrategyV1 对象)。这很可能是由于并非所有层或模型或优化器都是在分发策略范围之外创建的。尝试确保您的代码类似于以下内容。
使用 strategy.scope():
模型=_create_model()
模型.编译(...)
我的代码基于这个笔记本!我针对我的具体问题对其进行了修改,显然尝试在 TPU 上运行它。
我有一个服装层 BertLayer,它显然是在范围之外创建的:
class BertLayer(tf.keras.layers.Layer):
def __init__(self, n_fine_tune_layers=10, **kwargs):
self.n_fine_tune_layers = n_fine_tune_layers
self.trainable = True
self.output_size = 768
super(BertLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.bert = hub.Module(
bert_path,
trainable=self.trainable,
name="{}_module".format(self.name)
)
trainable_vars = self.bert.variables
# Remove unused layers
trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]
# Select how many layers to fine tune
trainable_vars = trainable_vars[-self.n_fine_tune_layers :]
# Add to trainable weights
for var in trainable_vars:
self._trainable_weights.append(var)
# Add non-trainable weights
for var in self.bert.variables:
if var not in self._trainable_weights:
self._non_trainable_weights.append(var)
super(BertLayer, self).build(input_shape)
def call(self, inputs):
inputs = [K.cast(x, dtype="int32") for x in inputs]
input_ids, input_mask, segment_ids = inputs
bert_inputs = dict(
input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids
)
result = self.bert(inputs=bert_inputs, signature="tokens", as_dict=True)[
"pooled_output"
]
return result
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_size)
模型创建在这里完成:
def build_model(max_seq_length):
output_classes = train_labels[0].shape
# Build model
in_id = tf.keras.layers.Input(shape=(max_seq_length,), name="input_ids")
in_mask = tf.keras.layers.Input(shape=(max_seq_length,), name="input_masks")
in_segment = tf.keras.layers.Input(shape=(max_seq_length,), name="segment_ids")
bert_inputs = [in_id, in_mask, in_segment]
# Instantiate the custom Bert Layer defined above
bert_output = BertLayer(n_fine_tune_layers=10)(bert_inputs)
# Build the rest of the classifier
dense = tf.keras.layers.Dense(256, activation='relu')(bert_output)
pred = tf.keras.layers.Dense(train_labels.shape[1], activation='sigmoid')(dense)
model = tf.keras.models.Model(inputs=bert_inputs, outputs=pred)
return model
调用model.compile时发生错误
strategy = tf.distribute.experimental.TPUStrategy(
tf.contrib.cluster_resolver.TPUClusterResolver(TPU_ADDRESS))
with strategy.scope():
model = build_model(256)
opt = tf.train.AdamOptimizer(0.001)
model.compile(loss='binary_crossentropy', optimizer=opt)
据我了解,BertLayer 确实是在范围内创建的,但我对 keras 和 tensorflow 比较陌生,所以很高兴得到您的帮助。我正在研究 tensorflow 1.14