有一个自定义环境。使用 Keras-rl 制作模型并训练代理
错误:
模型输出“Tensor("dense_2/BiasAdd:0", shape=(None, 1, 3), dtype=float32)" 的形状无效。DQN 期望模型对每个动作都有一个维度,在本例中为 3。
编辑:如果我将状态更改为 (3,),我会收到此错误:
检查输入时出错:预期的 dense_input 有 2 维,但得到的数组形状为 (1, 1, 3)
我想问题在于形状 n/或我如何实现观察状态。
总的来说,我返回的状态是 [[],[],[]]
环境:
class Game(gym.Env):
def __init__(self):
self.score = 0
self.board = [0, 0, 0, 0]
self.deck = None
self.my_card = None
self.game_over = False
self.init_deck()
self.draw_card()
self.state = [self.board, self.my_card, self.deck]
self.action_space = spaces.Discrete(3)
def init_deck(self):
self.deck = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,9,9,9,9,8,8,8,8,7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4,3,3,3,3,2,2,2,2,1,1,1,1,1]
random.shuffle(self.deck)
def draw_card(self):
self.my_card = self.deck.pop()
def step(self, action):
self.board[action] += self.my_card
self.my_card = None
if self.board[action] > 21:
self.board[action] = 0
self.score -= 10
reward = -10
if self.board[action] == 21:
self.board[action] = 0
self.score += 10
reward = 10
if self.board[action] < 21:
reward = 0
self.draw_card()
if len(self.deck) == 0:
self.game_over = True
if self.game_over:
done = True
else:
done = False
info = {"score:", self.score}
obs = [self.board, self.deck, self.my_card]
print('Score: ', self.score)
return obs, reward, done, info
def reset(self):
self.score = 0
self.board = [0, 0, 0, 0]
self.deck = None
self.my_card = None
self.game_over = False
self.init_deck()
self.draw_card()
return [self.board, self.deck, self.my_card]
型号及代理:
states = (1, 3)
actions = env.action_space.n
def build_model(states, actions):
model = Sequential()
model.add(Dense(24, activation='relu', input_shape=states))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model
model = build_model(states, actions)
def build_agent(model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy,
nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return dqn
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)