0

有一个自定义环境。使用 Keras-rl 制作模型并训练代理

错误:

模型输出“Tensor("dense_2/BiasAdd:0", shape=(None, 1, 3), dtype=float32)" 的形状无效。DQN 期望模型对每个动作都有一个维度,在本例中为 3。

编辑:如果我将状态更改为 (3,),我会收到此错误:

检查输入时出错:预期的 dense_input 有 2 维,但得到的数组形状为 (1, 1, 3)

我想问题在于形状 n/或我如何实现观察状态。

总的来说,我返回的状态是 [[],[],[]]

环境:

class Game(gym.Env):
def __init__(self):
    self.score = 0
    self.board = [0, 0, 0, 0]
    self.deck = None
    self.my_card = None
    self.game_over = False
    self.init_deck()
    self.draw_card()
    self.state = [self.board, self.my_card, self.deck]
    self.action_space = spaces.Discrete(3)

def init_deck(self):
    self.deck = [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,9,9,9,9,8,8,8,8,7,7,7,7,6,6,6,6,5,5,5,5,4,4,4,4,3,3,3,3,2,2,2,2,1,1,1,1,1]
    random.shuffle(self.deck)

def draw_card(self):
    self.my_card = self.deck.pop()

def step(self, action):
    self.board[action] += self.my_card
    self.my_card = None
    
    if self.board[action] > 21:
        self.board[action] = 0
        self.score -= 10
        reward = -10

    if self.board[action] == 21:
        self.board[action] = 0
        self.score += 10
        reward = 10

    if self.board[action] < 21:
        reward = 0
    
    self.draw_card()

    if len(self.deck) == 0:
        self.game_over = True
    
    if self.game_over:
        done = True
    else:
        done = False

    info = {"score:", self.score}
    obs = [self.board, self.deck, self.my_card]

    print('Score: ', self.score)

    return obs, reward, done, info

    
def reset(self):
    self.score = 0
    self.board = [0, 0, 0, 0]
    self.deck = None
    self.my_card = None
    self.game_over = False 
    self.init_deck()
    self.draw_card()
    return [self.board, self.deck, self.my_card]

型号及代理:

states = (1, 3)
actions = env.action_space.n

def build_model(states, actions):
  model = Sequential()    
  model.add(Dense(24, activation='relu', input_shape=states))
  model.add(Dense(24, activation='relu'))
  model.add(Dense(actions, activation='linear'))
  return model

model = build_model(states, actions)

def build_agent(model, actions):
  policy = BoltzmannQPolicy()
  memory = SequentialMemory(limit=50000, window_length=1)
  dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
  return dqn

dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)
4

0 回答 0