3

我正在尝试使用 Keras 在MineRL环境中训练代理。到目前为止,这是我的代码:

import gym
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from gym import spaces

import minerl
from collections import OrderedDict

class EnvHandeler():
    def __init__(self):
        self.env = gym.make('MineRLTreechop-v0')
        self.env.observation_space = gym.spaces.Dict({
            "pov": gym.spaces.Box(low=0, high=255, shape=(64, 64, 3))
        })
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
    def step(self, action):
        action_holder = self.env.action_space.noop()
        if action == 0:
            action_holder['forward'] = 1
            action_holder['jump'] = 0
            action_holder['left'] = 0
            action_holder['right'] = 0
            action_holder['attack'] = 0
        if action == 1:
            action_holder['forward'] = 1
            action_holder['jump'] = 1
            action_holder['left'] = 0
            action_holder['right'] = 0
            action_holder['attack'] = 0
        if action == 2:
            action_holder['forward'] = 0
            action_holder['jump'] = 0
            action_holder['left'] = 1
            action_holder['right'] = 0
            action_holder['attack'] = 0
        if action == 3:
            action_holder['forward'] = 0
            action_holder['jump'] = 0
            action_holder['left'] = 0
            action_holder['right'] = 1
            action_holder['attack'] = 0
        if action == 4:
            action_holder['forward'] = 0
            action_holder['jump'] = 0
            action_holder['left'] = 0
            action_holder['right'] = 0
            action_holder['attack'] = 1

        n_state, reward, done, info = self.env.step(action_holder)
        return n_state,reward,done,info
    def render(self):
        self.env.render()
    def reset(self):
        #self.env.reset()
        return self.env.reset()

def build_model(height, width, channels, actions):
    model = Sequential()
    print(height,width,channels)
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height,width,channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model



def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                   enable_dueling_network=True, dueling_type='avg',
                   nb_actions=actions, nb_steps_warmup=1000
                   )
    return dqn

def main():
    env = EnvHandeler()

    height, width, channels = env.observation_space['pov'].shape

    actions = 5

    model = build_model(height,width,channels,actions)
    dqn = build_agent(model, actions)
    dqn.compile(Adam(lr=1e-4))
    print(model.summary())
    dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)


if __name__ == '__main__':
    main()

开始训练时出现以下错误:

Training for 10000 steps ...
Traceback (most recent call last):
  File "F:/WORKING/Minecraft KI/Project/SmartTreeKeras.py", line 172, in <module>
    main()
  File "F:/WORKING/Minecraft KI/Project/SmartTreeKeras.py", line 159, in main
    dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)
  File "C:\Users\Marius\anaconda3\lib\site-packages\rl\core.py", line 169, in fit
    action = self.forward(observation)
  File "C:\Users\Marius\anaconda3\lib\site-packages\rl\agents\dqn.py", line 225, in forward
    q_values = self.compute_q_values(state)
  File "C:\Users\Marius\anaconda3\lib\site-packages\rl\agents\dqn.py", line 69, in compute_q_values
    q_values = self.compute_batch_q_values([state]).flatten()
  File "C:\Users\Marius\anaconda3\lib\site-packages\rl\agents\dqn.py", line 64, in compute_batch_q_values
    q_values = self.model.predict_on_batch(batch)
  File "C:\Users\Marius\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 1201, in predict_on_batch
    x, extract_tensors_from_dataset=True)
  File "C:\Users\Marius\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 2334, in _standardize_user_data
    batch_size=batch_size)
  File "C:\Users\Marius\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 2361, in _standardize_tensors
    exception_prefix='input')
  File "C:\Users\Marius\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_utils.py", line 573, in standardize_input_data
    'with shape ' + str(data_shape))
ValueError: Error when checking input: expected conv2d_input to have 5 dimensions, but got array with shape (1, 3)``

正如评论中所问的 env.step() 返回:

The env.stop() returns: (OrderedDict([('pov', array([[[120, 153, 216],
        [120, 153, 217],
        [120, 153, 217],
        ...,
        [121, 153, 215],
        [121, 153, 214],
        [121, 153, 214]],

       [[121, 153, 216],
        [120, 153, 216],
        [120, 153, 217],
        ...,
        [121, 153, 214],
        [121, 153, 214],
        [121, 153, 214]],

       [[121, 153, 216],
        [121, 153, 216],
        [120, 153, 216],
        ...,
        [121, 153, 214],
        [121, 153, 213],
        [121, 153, 213]],

       ...,

       [[ 16,  30,   9],
        [ 14,  27,   8],
        [ 13,  25,   7],
        ...,
        [ 17,  33,  10],
        [ 21,  40,  12],
        [ 17,  33,  10]],

       [[ 14,  27,   8],
        [ 14,  27,   8],
        [ 21,  41,  12],
        ...,
        [ 28,  54,  16],
        [ 16,  30,   9],
        [ 14,  27,   8]],

       [[ 12,  24,   7],
        [ 20,  39,  11],
        [ 21,  42,  12],
        ...,
        [ 33,  64,  19],
        [ 30,  58,  17],
        [ 17,  34,  10]]], dtype=uint8))]), 0.0, False, {})

更多数据:

  • 蟒蛇 3.7
  • TensorFlow-GPU 2.3.0

我可以想象结构中也存在错误,因为预期尺寸会根据输入而不断变化,例如:如果我输入 5 Dims,则预期为 6 Dims,如果我输入 3 Dims,则预期为 4,等等。

对这个错误有点疯狂。我已经尝试了很多我在网上找到的解决方案,但没有一个奏效。

如果有人可以提供帮助,我会很高兴!

-干杯

4

0 回答 0