我正在使用带有 BipedalWalker-v3 环境的 python gym。设置 DQNAgent 时出现以下错误
IndexError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_7456/1216894816.py in <module>
----> 1 dqn.fit(env, nb_steps=1000, visualize=False, verbose=1)
~\.conda\envs\AI\lib\site-packages\rl\core.py in fit(self, env, nb_steps, action_repetition, callbacks, verbose, visualize, nb_max_start_steps, start_step_policy, log_interval, nb_max_episode_steps)
174 for _ in range(action_repetition):
175 callbacks.on_action_begin(action)
--> 176 observation, r, done, info = env.step(action)
177 observation = deepcopy(observation)
178 if self.processor is not None:
~\.conda\envs\AI\lib\site-packages\gym\wrappers\time_limit.py in step(self, action)
16 self._elapsed_steps is not None
17 ), "Cannot call env.step() before calling reset()"
---> 18 observation, reward, done, info = self.env.step(action)
19 self._elapsed_steps += 1
20 if self._elapsed_steps >= self._max_episode_steps:
~\.conda\envs\AI\lib\site-packages\gym\envs\box2d\bipedal_walker.py in step(self, action)
413 print('Action inpuit type: {}'.format(type(action)))
414 #print(action[0])
--> 415 self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
416 self.joints[0].maxMotorTorque = float(
417 MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
IndexError: invalid index to scalar variable.
我在gym 包内的.step() 函数中添加了一些打印函数(就在else: 行的下方),以查看我为action 变量获得了什么输入。
def step(self, action):
# self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
control_speed = False # Should be easier as well
if control_speed:
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
else:
print('Action inpuit: {}'.format(action))
print('Action inpuit type: {}'.format(type(action)))
#print(action[0])
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
self.joints[0].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
)
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
self.joints[1].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)
)
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
self.joints[2].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)
)
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
self.joints[3].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)
)
我得到以下输出:
训练1000步...
动作输入:[0 0 0 0]
动作输入类型:<class 'numpy.ndarray'>
间隔 1(执行 0 步)
动作输入:3
动作输入类型:<class 'numpy.int32'>
(索引错误)
从我可以看到第二次将动作变量提供给健身房环境步骤函数,它是以 int 的形式而不是应有的数组形式。
对此的任何帮助将不胜感激。
完整代码如下:
import gym
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
def build_model(states, actions):
model = Sequential()
model.add(Flatten(input_shape=(1,states)))
model.add(Dense(24, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model
def build_agent(model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy,
nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
return dqn
env = gym.make('BipedalWalker-v3')
#env = gym.make('CartPole-v1')
states = env.observation_space.shape[0]
actions = env.action_space.shape[0]
#actions = env.action_space.n
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=1000, visualize=False, verbose=1)