我在 tensorflow federated 中实现回归模型。我从本教程中用于 keras 的简单模型开始:https ://www.tensorflow.org/tutorials/keras/regression
我将模型更改为使用联邦学习。这是我的模型:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_federated as tff
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_names,
na_values = "?", comment='\t',
sep=" ", skipinitialspace=True)
df = raw_dataset.copy()
df = df.dropna()
dfs = [x for _, x in df.groupby('Origin')]
datasets = []
targets = []
for dataframe in dfs:
target = dataframe.pop('MPG')
from sklearn.preprocessing import StandardScaler
standard_scaler_x = StandardScaler(with_mean=True, with_std=True)
normalized_values = standard_scaler_x.fit_transform(dataframe.values)
dataset = tf.data.Dataset.from_tensor_slices(({ 'x': normalized_values, 'y': target.values}))
train_dataset = dataset.shuffle(len(dataframe)).repeat(10).batch(20)
test_dataset = dataset.shuffle(len(dataframe)).batch(1)
datasets.append(train_dataset)
def build_model():
model = keras.Sequential([
layers.Dense(64, activation='relu', input_shape=[7]),
layers.Dense(64, activation='relu'),
layers.Dense(1)
])
return model
dataset_path
import collections
model = build_model()
sample_batch = tf.nest.map_structure(
lambda x: x.numpy(), iter(datasets[0]).next())
def loss_fn_Federated(y_true, y_pred):
return tf.reduce_mean(tf.keras.losses.MSE(y_true, y_pred))
def create_tff_model():
keras_model_clone = tf.keras.models.clone_model(model)
# adam = keras.optimizers.Adam()
adam = tf.keras.optimizers.SGD(0.002)
keras_model_clone.compile(optimizer=adam, loss='mse', metrics=[tf.keras.metrics.MeanSquaredError()])
return tff.learning.from_compiled_keras_model(keras_model_clone, sample_batch)
print("Create averaging process")
# This command builds all the TensorFlow graphs and serializes them:
iterative_process = tff.learning.build_federated_averaging_process(model_fn=create_tff_model)
print("Initzialize averaging process")
state = iterative_process.initialize()
print("Start iterations")
for _ in range(10):
state, metrics = iterative_process.next(state, datasets)
print('metrics={}'.format(metrics))
Start iterations
metrics=<mean_squared_error=95.8644027709961,loss=96.28633880615234>
metrics=<mean_squared_error=9.511247634887695,loss=9.522096633911133>
metrics=<mean_squared_error=8.26853084564209,loss=8.277074813842773>
metrics=<mean_squared_error=7.975323677062988,loss=7.9771647453308105>
metrics=<mean_squared_error=7.618809700012207,loss=7.644164562225342>
metrics=<mean_squared_error=7.347906112670898,loss=7.340310096740723>
metrics=<mean_squared_error=7.210267543792725,loss=7.210223197937012>
metrics=<mean_squared_error=7.045553207397461,loss=7.045469760894775>
metrics=<mean_squared_error=6.861278533935547,loss=6.878870487213135>
metrics=<mean_squared_error=6.80275297164917,loss=6.817670822143555>
evaluation = tff.learning.build_federated_evaluation(model_fn=create_tff_model)
test_metrics = evaluation(state.model, datasets)
print(test_metrics)
<mean_squared_error=27.308320999145508,loss=27.19877052307129>
我很困惑为什么当迭代过程返回一个小得多的 mse 时,训练集的 10 次迭代后评估的 mse 更高。我在这里做错了什么?是不是在 tensorflow 中 fml 的实现中隐藏了什么?有人可以向我解释吗?