我制作了一个自定义环境并尝试对其进行 DQL,但经过一段时间的模拟,输入尺寸发生了变化。我认为这可能是模拟时间的原因,但有时 de dqn 能够成功重置环境。我将 Gym 用于自定义环境:
from gym import Env
from gym.spaces import Discrete, Box
class Estanque_conico(Env):
def __init__(self,ns,t,delta_t):
# Acciones posibles (frecuencia de la bomba)
self.action_space = Box(np.array([0]),np.array([100]),dtype=np.int32)
# Arreglo de anltura
self.observation_space = Box(low=np.array([0]), high=np.array([50]))
# Set start temp
self.state = 10 + random.randint(-3,3)
# Set shower length
self.ns=ns
self.simulation_len = ns
self.set_point=10
#constantes
self.c1=3.9251
self.c2 = -30*self.c1
self.beta=11.1424
self.alpha=79.8**2/(np.pi*34.5**2)
self.h_ss=30
self.u_ss=30
self.delta_t=delta_t
def estanque_conico(self,h,t,u,c1,c2,alpha,beta):
h_punto = (c1*u+c2-beta*np.sqrt(h))*alpha/(h**2)
return h_punto
def step(self, action):
# Apply action
y = odeint(self.estanque_conico,self.state,[0,self.delta_t],args=(action
,self.c1,self.c2,self.alpha,self.beta,))
self.state =y[-1]
if np.isnan(self.state):
self.state =0.001
# Reduce simulation length by 1 second
self.simulation_len -= 1
# Calculate reward
reward = -(self.state-self.set_point)**2
# Check if simulation is done
if self.simulation_len <= 0:
print(" ")
print(self.state)
print(" ")
done = True
else:
done = False
# Apply noise
#self.state += random.randint(-1,1)
# Set placeholder for info
info = {}
# Return step information
return self.state, reward, done, info
def change_setpoint(self,sp):
self.set_point=sp
return self.set_point
def render(self,mode="human"):
# Implement viz
pass
def reset(self):
# Reset parameters
self.state = 10 + random.randint(-3,3)
self.simulation_len = self.ns
self.set_point=10
return self.state
ns = 3000
t = np.linspace(0,ns,ns+1)
delta_t = t[1]-t[0]
env = Estanque_conico(ns,t,delta_t)
然后使用来自 tensorflow.keras 的简单 Secuential 模型
def build_model(states, actions):
model = Sequential()
model.add(Dense(24, activation='relu',input_shape=(states)))
model.add(Dense(24, activation='relu'))
model.add(Dense(actions, activation='linear'))
return model
来自 Keras RL 的代理
def build_agent(model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, memory=memory, policy=policy,
nb_actions=actions, nb_steps_warmup=100, target_model_update=1e-2)
return dqn
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)
但是当我尝试训练代理时,出现以下错误:
Training for 50000 steps ...
Interval 1 (0 steps performed)
WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_v1.py:2070: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
27/10000 [..............................] - ETA: 1:17 - reward: -99.9800
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:25: RuntimeWarning: invalid value encountered in sqrt
2996/10000 [=======>......................] - ETA: 1:27 - reward: -99.9800
0.001
5998/10000 [================>.............] - ETA: 49s - reward: -99.9800
0.001
8999/10000 [=========================>....] - ETA: 12s - reward: -99.9800
0.001
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-12-08c9e140f6bc> in <module>()
1 dqn = build_agent(model, actions)
2 dqn.compile(Adam(lr=1e-3), metrics=['mae'])
----> 3 dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)
7 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
572 ': expected ' + names[i] + ' to have ' +
573 str(len(shape)) + ' dimensions, but got array '
--> 574 'with shape ' + str(data_shape))
575 if not check_batch_axis:
576 data_shape = data_shape[1:]
ValueError: Error when checking input: expected dense_input to have 2 dimensions, but got array with shape (1, 1, 1)
正如你所看到的,它运行了一段时间,它会重置几次,但由于某种原因,它会改变输入形状。