我想在健身房环境中初步训练我的强化学习模型,然后将其部署到真实环境中以继续在真实环境中进行强化学习。
我正在使用 TF、Keras RL + gym 进行初始训练,代码如下 管理它的方法是什么?迷失在谷歌搜索中
我的猜测是,在真实环境中,我应该有 2 个代理,一个用于预测,另一个用于进一步训练。 训练代理应该基于运行时收集的状态动作样本工作,然后这个新的训练模型应该合并到预测模型中。如果它是正确的假设,它如何实现?
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
# custom gym environment
env = FooEnv()
env.seed(0)
states = env.observation_space.shape
actions = env.action_space.n
def build_model(states, actions):
model = Sequential()
model.add(Flatten(input_shape=(1,) + states))
model.add(Dense(24, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model
from rl.agents import DQNAgent
from rl.callbacks import ModelIntervalCheckpoint, FileLogger
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
model = build_model(states, actions)
model.summary()
def build_agent(model, actions):
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=0.1, value_test=0.05,
nb_steps=500)
memory = SequentialMemory(limit=10000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy, enable_double_dqn=True,
nb_actions=actions, gamma=.98, nb_steps_warmup=100, target_model_update=1e-2)
return dqn
def build_callbacks(env_name):
checkpoint_weights_filename = 'weights/dqn_' + env_name + '_weights_{step}.h5f'
log_filename = 'weights/dqn_{}_log.json'.format(env_name)
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=1000)]
callbacks += [FileLogger(log_filename, interval=100)]
return callbacks
callbacks = build_callbacks('FooEnv')
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=30000, log_interval=1000, nb_max_episode_steps=50, visualize=False, verbose=1, callbacks=callbacks)
scores = dqn.test(env, nb_episodes=1, visualize=True)
dqn.save_weights('weights/saved_weights')