2

我正在使用 Keras 的预训练 MobileNet 并希望使用 TensorFlows 联合学习对其进行训练,但我总是收到一个错误,即协议缓冲区无法序列化,因为超过了 2GB 限制。我的输入是 224x224 RGB 图像。

编辑:我有一个包含 1000 个图像的数据集(每个客户端 500 个图像),每个数据集使用 10 个 epoch。

我创建和训练 Keras 模型的代码:

def create_compiled_keras_model():
    base_model = MobileNet(weights='imagenet', include_top=False)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    preds = Dense(47, activation='softmax')(x)

    # specify input & output
    model = Model(inputs=base_model.input, outputs=preds)

    # set the first 20 layers of the network to be non-trainable
    for layer in model.layers[:20]:
        layer.trainable = False
    for layer in model.layers[20:]:
        layer.trainable = True

    def loss_fn(y_true, y_pred):
        return tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(
        y_true, y_pred))

    model.compile(
    loss=loss_fn,
    optimizer=gradient_descent.SGD(learning_rate=0.02),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

    return model


def model_fn():
    keras_model = create_compiled_keras_model()
    return tff.learning.from_compiled_keras_model(keras_model,  sample_batch)


# @test {"output": "ignore"}
iterative_process =   tff.learning.build_federated_averaging_process(model_fn)
state = iterative_process.initialize()

# @test {"timeout": 600, "output": "ignore"}

state, metrics = iterative_process.next(state, federated_train_data)
print('round  1, metrics={}'.format(metrics))

我收到的错误:

Traceback (most recent call last):
  File "/Users/recep/Documents/federated_learning.py", line 128, in <module>
    state, metrics = iterative_process.next(state, federated_train_data)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/function_utils.py", line 600, in __call__
    return context.invoke(self, arg)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 700, in invoke
    result = computed_comp.value(computed_arg)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 843, in <lambda>
    return ComputedValue(lambda x: self._compute(comp.result, _wrap(x)),
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 746, in _compute
    return self._compute_tuple(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 802, in _compute_tuple
    computed_v = self._compute(v, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 744, in _compute
    return self._compute_call(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 784, in _compute_call
    computed_arg = self._compute(comp.argument, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 746, in _compute
    return self._compute_tuple(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 802, in _compute_tuple
    computed_v = self._compute(v, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 744, in _compute
    return self._compute_call(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 784, in _compute_call
    computed_arg = self._compute(comp.argument, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 746, in _compute
    return self._compute_tuple(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 802, in _compute_tuple
    computed_v = self._compute(v, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 744, in _compute
    return self._compute_call(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 784, in _compute_call
    computed_arg = self._compute(comp.argument, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 746, in _compute
    return self._compute_tuple(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 802, in _compute_tuple
    computed_v = self._compute(v, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 744, in _compute
    return self._compute_call(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 784, in _compute_call
    computed_arg = self._compute(comp.argument, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 746, in _compute
    return self._compute_tuple(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 802, in _compute_tuple
    computed_v = self._compute(v, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 744, in _compute
    return self._compute_call(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 784, in _compute_call
    computed_arg = self._compute(comp.argument, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 746, in _compute
    return self._compute_tuple(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 802, in _compute_tuple
    computed_v = self._compute(v, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 744, in _compute
    return self._compute_call(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 784, in _compute_call
    computed_arg = self._compute(comp.argument, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 746, in _compute
    return self._compute_tuple(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 802, in _compute_tuple
    computed_v = self._compute(v, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 744, in _compute
    return self._compute_call(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 784, in _compute_call
    computed_arg = self._compute(comp.argument, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 746, in _compute
    return self._compute_tuple(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 802, in _compute_tuple
    computed_v = self._compute(v, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 744, in _compute
    return self._compute_call(comp, context)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 791, in _compute_call
    result = computed_fn.value(computed_arg)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 871, in <lambda>
    lambda x: my_method(fit_argument(x, arg_type, context)),
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 914, in _federated_map
    fn(ComputedValue(x, mapping_type.parameter)).value for x in arg.value[1]
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 914, in <listcomp>
    fn(ComputedValue(x, mapping_type.parameter)).value for x in arg.value[1]
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 775, in <lambda>
    return ComputedValue(lambda x: run_tensorflow(comp, x),
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/reference_executor.py", line 340, in run_tensorflow
    comp.proto, stamped_arg, graph))
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/tensorflow_deserialization.py", line 90, in deserialize_and_call_tf_computation
    arg_type, arg_binding = graph_utils.capture_result_from_graph(arg, graph)
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/graph_utils.py", line 244, in capture_result_from_graph
    for k, v in name_value_pairs
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/graph_utils.py", line 244, in <listcomp>
    for k, v in name_value_pairs
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow_federated/python/core/impl/graph_utils.py", line 265, in capture_result_from_graph
    handle_name = result.make_one_shot_iterator().string_handle().name
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1358, in make_one_shot_iterator
    _make_dataset.add_to_graph(ops.get_default_graph())
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow/python/framework/function.py", line 490, in add_to_graph
    self._create_definition_if_needed()
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow/python/framework/function.py", line 341, in _create_definition_if_needed
    self._create_definition_if_needed_impl()
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow/python/framework/function.py", line 417, in _create_definition_if_needed_impl
    self._op_def = self.definition.signature
  File "/Users/recep/anaconda3/envs/python37/lib/python3.6/site-packages/tensorflow/python/framework/function.py", line 289, in definition
    c_api.TF_FunctionToFunctionDef(self._c_func.func, buf)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Cannot serialize protocol buffer of type tensorflow.FunctionDef as the serialized size (3004868250bytes) would be larger than the limit (2147483647 bytes)
4

1 回答 1

1

0.12.0TensorFlow Federated 版本中,默认情况下开始使用新的 eager-mode 执行器,这可能会显着改善这一点;有报告称训练 ResNet 模型的轮次已完成。

于 2020-02-14T04:55:49.853 回答