我已经使用 tf-keras 创建了一个 DQN,现在我想通过使用 tensorflow-probability 添加一些贝叶斯翻转层来扩展 DQN。但是,我收到一个错误。我认为错误是由于我使用没有名称范围的 tensorflow 层,但我不确定应该将名称范围放在哪里(我是 tf 的新手,所以......)
DQN 代码:
class DQN:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=2000)
self.gamma = 0.95 # discount rate
self.epsilon = 0.81 # exploration rate
self.epsilon_min = 0.2
self.epsilon_decay = 0.965
self.lr = 0.001
self.model = self._build_model()
self.optimizer = Adam(lr=self.lr)
def _build_model(self):
model = Sequential()
## model.add(tfp.layers.Convolution2DFlipout(2, kernel_size=5,
## activation='relu'
## ))
model.add(tf.layers.Dropout(rate=0.3))
model.add(Flatten())
#model.add(tfp.layers.DenseFlipout(self.action_size * 4, activation='relu'))
#model.add(tfp.layers.DenseFlipout(self.action_size * 3, activation='relu'))
model.add(tfp.layers.DenseFlipout(self.action_size * 2, activation='relu'))
model.add(tfp.layers.DenseFlipout(self.action_size))
return model
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
else:
act_values = self.model.predict(state)
return np.argmax(act_values[0]) # returns action
def replay(self, batch_size):
minibatch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in minibatch:
target = reward
if not done:
target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
target_f = self.model.predict(state)
target_f[0][action] = target
self.history = self.model.fit(state, target_f, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
loss = self.history.history['loss'][0]
return loss
def load(self, name):
self.model.load_weights(name)
def save(self, name):
self.model.save_weights(name)
def initialize(self):
self.model.compile(loss='mean_squared_logarithmic_error', optimizer = self.optimizer)
错误信息:
Traceback (most recent call last):
File "thoughtmouse.py", line 57, in <module>
combined()
File "thoughtmouse.py", line 18, in combined
state_action_reward_loop(agent)
File "thoughtmouse.py", line 30, in state_action_reward_loop
rewards = agent.replay(1)
File "/home/ai/Downloads/ScreenMouse/bdqn.py", line 88, in replay
target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
File "/home/ai/anaconda3/envs/drl/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 1096, in predict
x, check_steps=True, steps_name='steps', steps=steps)
File "/home/ai/anaconda3/envs/drl/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 2289, in _standardize_user_data
self._set_inputs(cast_inputs)
File "/home/ai/anaconda3/envs/drl/lib/python3.6/site-packages/tensorflow/python/training/checkpointable/base.py", line 442, in _method_wrapper
method(self, *args, **kwargs)
File "/home/ai/anaconda3/envs/drl/lib/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 2529, in _set_inputs
outputs = self.call(inputs, training=training)
File "/home/ai/anaconda3/envs/drl/lib/python3.6/site-packages/tensorflow/python/keras/engine/sequential.py", line 233, in call
inputs, training=training, mask=mask)
File "/home/ai/anaconda3/envs/drl/lib/python3.6/site-packages/tensorflow/python/keras/engine/sequential.py", line 253, in _call_and_compute_mask
with ops.name_scope(layerf.epsilon_min:
self.epsilon *= self.epsilon_decay
loss = self.history.history['loss'][0]
return loss
def load(self, name):
self.model.load_weights(name)
._name_scope()):
File "/home/ai/anaconda3/envs/drl/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 284, in _name_scope
return self._current_scope.original_name_scope
AttributeError: 'NoneType' object has no attribute 'original_name_scope'