I'm training a model for the OpenAI lunarLander-v2 environment. I've succesfully done this using a Sequential model, but when trying to use the functional model, I get some errors with tensorshapes being incompatible. Here is the code for the Agent class, the issue I think has to do with the shape of the done_list and next_states being incompatible, but I'm not sure how to reshape these tensors for it to work.
class DQAgent(Agent):
def __init__(self, env, config):
Agent.__init__(self, env, config)
self.memory = deque(maxlen=self.config.memory_size)
self.model = self.initialize()
def initialize(self):
inputs = Input(shape=(8,))
dense = Dense(self.config.layer_size * self.config.input_layer_mult, activation = relu)
x = dense(inputs)
x = Dense(self.config.layer_size, activation = relu)(x)
outputs = layers.Dense(self.action_space_size, activation = linear)(x)
model = keras.Model(inputs = inputs, outputs = outputs, name = self.name)
model.compile(loss = mean_squared_error, optimizer = Adam(lr = self.config.learning_rate))
model.summary()
return model
def policyAct(self, state):
predicted_actions = self.model.predict(state)
return np.argmax(predicted_actions[0])
def addToMemory(self, state, action, reward, next_state, done):
self.memory.append((self, state, action, reward, next_state, done))
def sampleFromMemory(self):
sample = np.random.sample(self.memory, self.config.batch_size)
return sample
def extractFromSample(self, sample):
states = np.array([i[0] for i in sample])
actions = np.array([i[1] for i in sample])
rewards = np.array([i[2] for i in sample])
next_states = np.array([i[3] for i in sample])
done_list = np.array([i[4] for i in sample])
states = np.squeeze(states)
next_states = np.squeeze(next_states)
return np.squeeze(states), actions, rewards, next_states, done_list
def updateReplayCount(self):
self.config.replay_counter += 1
self.config.replay_counter = self.replay_counter % self.config.replay_step_size
def learnFromMemory(self):
if len(self.memory) < self.config.batch_size or self.config.replay_counter != 0:
return
if np.mean(self.training_episode_rewards[-10:]) > 100:
return
sample = self.sampleFromMemory()
states, actions, rewards, next_states, done_list = self.extractFromSample(sample)
targets = rewards + self.config.gamma * (np.amax(self.model.predict_on_batch(next_states),
axis=1)) * (1 - (done_list))
target_vec = self.model.predict_on_batch(states)
indexes = np.array([i for i in range(self.config.batch_size)])
target_vec[[indexes], [actions]] = targets
self.model.fit(states, target_vec, epochs=1, verbose=0)
def save(self, name):
self.model.save(name)
Similar code works fine when creating the model using the Sequential API instead of the functional. I'm very new to this, and to SO as well, any help is greatly appreciated.
WARNING:tensorflow:Model was constructed with shape (None, 8) for input Tensor("input_10:0", shape=(None, 8), dtype=float32), but it was called on an input with incompatible shape (None, 1). ValueError: Input 0 of layer dense_72 is incompatible with the layer: expected axis -1 of input shape to have value 8 but received input with shape [None, 1]
The model from the sequential implementation, which runs with no issues (the rest of the code is the same)
def initialize_model(self):
model = Sequential()
model.add(Dense(self.config.layer_size*self.config.input_layer_mult, input_dim = self.observation_space_dim, activation=relu))
for i in range(self.config.deep_layers):
model.add(Dense(self.config.layer_size, activation=relu))
model.add(Dense(self.action_space_dim, activation=linear))
model.compile(loss=mean_squared_error, optimizer=Adam(lr=self.config.learning_rate))
print(model.summary())
return model