我正在尝试使用 Ray Tune包对使用纯 Tensorflow 实现的 LSTM 进行超参数调整。为此,我使用了超频带调度程序和HyperOptSearch算法,并且还使用了可训练的类方法。当我尝试运行它时,我收到以下错误:
类型错误:ap_uniform_sampler() 缺少 1 个必需的位置参数:“高”
下面显示的是堆栈跟踪:
FutureWarning:不推荐将 issubdtype 的第二个参数从float
to转换np.floating
。将来,它将被视为np.float64 == np.dtype(float).type
. from ._conv import register_converters as _register_converters 处理 STDOUT 和 STDERR 被重定向到 /tmp/ray/session_2018-12-19_09-43-46_5469/logs。等待 127.0.0.1:14332 的 redis 服务器响应... 等待 127.0.0.1:25158 的 redis 服务器响应... 使用 /dev/shm 启动具有 3.220188364 GB 内存的 Plasma 对象存储。无法启动 UI,您可能需要运行“pip install jupyter”。== 状态 == 使用 HyperBand:num_stopped=0 total_brackets=0 第 0 轮:请求的资源:0/4 CPU,0/0 GPU 此节点上的内存使用量:3.7/8.1 GB
Traceback (most recent call last):
File "/home/suleka/Documents/sales_prediction/auto_LSTM_try3.py", line 398, in <module>
run_experiments(config, search_alg=algo, scheduler=hyperband)
File "/home/suleka/anaconda3/lib/python3.6/site-packages/ray/tune/tune.py", line 108, in run_experiments
runner.step()
File "/home/suleka/anaconda3/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 114, in step
next_trial = self._get_next_trial()
File "/home/suleka/anaconda3/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 254, in _get_next_trial
self._update_trial_queue(blocking=wait_for_trial)
File "/home/suleka/anaconda3/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 330, in _update_trial_queue
trials = self._search_alg.next_trials()
File "/home/suleka/anaconda3/lib/python3.6/site-packages/ray/tune/suggest/suggestion.py", line 67, in next_trials
for trial in self._trial_generator:
File "/home/suleka/anaconda3/lib/python3.6/site-packages/ray/tune/suggest/suggestion.py", line 88, in _generate_trials
suggested_config = self._suggest(trial_id)
File "/home/suleka/anaconda3/lib/python3.6/site-packages/ray/tune/suggest/hyperopt.py", line 81, in _suggest
self.rstate.randint(2**31 - 1))
File "/home/suleka/anaconda3/lib/python3.6/site-packages/hyperopt/tpe.py", line 835, in suggest
= tpe_transform(domain, prior_weight, gamma)
File "/home/suleka/anaconda3/lib/python3.6/site-packages/hyperopt/tpe.py", line 816, in tpe_transform
s_prior_weight
File "/home/suleka/anaconda3/lib/python3.6/site-packages/hyperopt/tpe.py", line 690, in build_posterior
b_post = fn(*b_args, **dict(named_args))
TypeError: ap_uniform_sampler() missing 1 required positional argument: 'high'
我的代码如下所示:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import time
import ray
from ray.tune import grid_search, run_experiments, register_trainable, Trainable
from ray.tune.schedulers import HyperBandScheduler
from tensorflow.examples.tutorials.mnist import input_data
# from ray.tune import sample_from
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from math import sqrt
from ray.tune.suggest import HyperOptSearch
import argparse
from hyperopt import hp
num_steps = 14
lstm_size = 32
batch_size = 8
init_learning_rate = 0.01
learning_rate_decay = 0.99
init_epoch = 5 # 5
max_epoch = 60 # 100 or 50
hidden1_nodes = 30
hidden2_nodes = 15
hidden1_activation = tf.nn.tanh
hidden2_activation = tf.nn.tanh
lstm_activation = tf.nn.relu
input_size = 1
num_layers = 1
column_min_max = [[0, 11000], [1, 7]]
columns = ['Sales', 'DayOfWeek', 'SchoolHoliday', 'Promo']
features = len(columns)
def segmentation(data):
seq = [price for tup in data[columns].values for price in tup]
seq = np.array(seq)
# split into items of features
seq = [np.array(seq[i * features: (i + 1) * features])
for i in range(len(seq) // features)]
# split into groups of num_steps
X = np.array([seq[i: i + num_steps] for i in range(len(seq) - num_steps)])
y = np.array([seq[i + num_steps] for i in range(len(seq) - num_steps)])
# get only sales value
y = [[y[i][0]] for i in range(len(y))]
y = np.asarray(y)
return X, y
def scale(data):
for i in range (len(column_min_max)):
data[columns[i]] = (data[columns[i]] - column_min_max[i][0]) / ((column_min_max[i][1]) - (column_min_max[i][0]))
return data
def rescle(test_pred):
prediction = [(pred * (column_min_max[0][1] - column_min_max[0][0])) + column_min_max[0][0] for pred in test_pred]
return prediction
def pre_process():
store_data = pd.read_csv('/home/suleka/salesPred/store2_1.csv')
store_data = store_data.drop(store_data[(store_data.Open == 0) & (store_data.Sales == 0)].index)
#
# store_data = store_data.drop(store_data[(store_data.Open != 0) & (store_data.Sales == 0)].index)
# ---for segmenting original data --------------------------------
original_data = store_data.copy()
## train_size = int(len(store_data) * (1.0 - test_ratio))
validation_len = len(store_data[(store_data.Month == 6) & (store_data.Year == 2015)].index)
test_len = len(store_data[(store_data.Month == 7) & (store_data.Year == 2015)].index)
train_size = int(len(store_data) - (validation_len+test_len))
train_data = store_data[:train_size]
validation_data = store_data[(train_size-num_steps): validation_len+train_size]
test_data = store_data[((validation_len+train_size) - num_steps): ]
original_val_data = validation_data.copy()
original_test_data = test_data.copy()
# -------------- processing train data---------------------------------------
scaled_train_data = scale(train_data)
train_X, train_y = segmentation(scaled_train_data)
# -------------- processing validation data---------------------------------------
scaled_validation_data = scale(validation_data)
val_X, val_y = segmentation(scaled_validation_data)
# -------------- processing test data---------------------------------------
scaled_test_data = scale(test_data)
test_X, test_y = segmentation(scaled_test_data)
# ----segmenting original validation data-----------------------------------------------
nonescaled_val_X, nonescaled_val_y = segmentation(original_val_data)
# ----segmenting original test data-----------------------------------------------
nonescaled_test_X, nonescaled_test_y = segmentation(original_test_data)
return train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y,nonescaled_val_y
def generate_batches(train_X, train_y, batch_size):
num_batches = int(len(train_X)) // batch_size
if batch_size * num_batches < len(train_X):
num_batches += 1
batch_indices = range(num_batches)
for j in batch_indices:
batch_X = train_X[j * batch_size: (j + 1) * batch_size]
batch_y = train_y[j * batch_size: (j + 1) * batch_size]
# assert set(map(len, batch_X)) == {num_steps}
yield batch_X, batch_y
def setupRNN(inputs):
cell = tf.contrib.rnn.LSTMCell(lstm_size, state_is_tuple=True, activation=lstm_activation)
val1, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)
val = tf.transpose(val1, [1, 0, 2])
last = tf.gather(val, int(val.get_shape()[0]) - 1, name="last_lstm_output")
# hidden layer
hidden1 = tf.layers.dense(last, units=hidden1_nodes, activation=hidden2_activation)
hidden2 = tf.layers.dense(hidden1, units=hidden2_nodes, activation=hidden1_activation)
weight = tf.Variable(tf.truncated_normal([hidden2_nodes, input_size]))
bias = tf.Variable(tf.constant(0.1, shape=[input_size]))
prediction = tf.matmul(hidden2, weight) + bias
return prediction
class TrainMNIST(Trainable):
"""Example MNIST trainable."""
def _setup(self, config):
global num_steps, lstm_size, hidden2_nodes, hidden2_activation, hidden1_activation, hidden1_nodes, lstm_size, lstm_activation, init_learning_rate, init_epoch, max_epoch, learning_rate_decay
self.timestep = 0
self.train_X, self.train_y, self.test_X, self.test_y, self.val_X, self.val_y, self.nonescaled_test_y, self.nonescaled_val_y = pre_process()
self.inputs = tf.placeholder(tf.float32, [None, num_steps, features], name="inputs")
self.targets = tf.placeholder(tf.float32, [None, input_size], name="targets")
self.learning_rate = tf.placeholder(tf.float32, None, name="learning_rate")
num_steps = config["num_steps"]
lstm_size = config["lstm_size"]
hidden1_nodes = config["hidden1_nodes"]
hidden2_nodes = config["hidden2_nodees"]
batch_size = config["batch_size"]
init_learning_rate = getattr(config["learning_rate"])
learning_rate_decay = getattr(config["learning_rate_decay"])
max_epoch = getattr(config["max_epoch"])
init_epoch = getattr(config["init_epoch"])
self.prediction = setupRNN(self.inputs)
with tf.name_scope('loss'):
model_loss = tf.losses.mean_squared_error(self.targets, self.prediction)
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(self.learning_rate).minimize(model_loss)
self.train_step = train_step
with tf.name_scope('accuracy'):
correct_prediction = tf.sqrt(tf.losses.mean_squared_error(self.prediction, self.targets))
self.accuracy = correct_prediction
self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())
self.iterations = 0
self.saver = tf.train.Saver()
def _train(self):
learning_rates_to_use = [
init_learning_rate * (
learning_rate_decay ** max(float(i + 1 - init_epoch), 0.0)
) for i in range(max_epoch)]
for epoch_step in range(max_epoch):
current_lr = learning_rates_to_use[epoch_step]
i = 0
for batch_X, batch_y in generate_batches(self.train_X, self.train_y, batch_size):
train_data_feed = {
self.inputs: batch_X,
self.targets: batch_y,
self.learning_rate: 0.01,
}
self.sess.run(self.train_step, train_data_feed)
val_data_feed = {
self.inputs: self.val_X,
self.targets: self.val_y,
self.learning_rate: 0.0,
}
pred = self.sess.run(self.prediction, val_data_feed)
pred_vals = rescle(pred)
pred_vals = np.array(pred_vals)
pred_vals = pred_vals.flatten()
pred_vals = pred_vals.tolist()
nonescaled_y = self.nonescaled_val_y.flatten()
nonescaled_y = nonescaled_y.tolist()
val_accuracy = sqrt(mean_squared_error(nonescaled_y, pred_vals))
})
self.iterations += 1
return {"RMSE_loss": val_accuracy}
def _save(self, checkpoint_dir):
return self.saver.save(
self.sess, checkpoint_dir + "/save", global_step=self.iterations)
def _restore(self, path):
return self.saver.restore(self.sess, path)
# !!! Example of using the ray.tune Python API !!!
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--smoke-test', action='store_true', help='Finish quickly for testing')
args, _ = parser.parse_known_args()
ray.init(redirect_output=True)
register_trainable("my_class", TrainMNIST)
space = {
'num_steps': hp.uniform('num_steps', 2, 14),
'lstm_size': hp.uniform('lstm_size', [8,16,32,64,128]),
'hidden1_nodes': hp.choice("hidden1_nodes", [4,8,16,32,64]),
'hidden2_nodees': hp.choice("hidden2_nodees", [2,4,8,16,32]),
'learning_rate': hp.choice("learning_rate", [0.01,0.1,0.5,0.05]),
'learning_rate_decay': hp.choice("learning_rate_decay", [0.99,0.8,0.7]),
'max_epoch': hp.choice("max_epoch", [60,50,100,120,200]),
'init_epoch': hp.choice("init_epoch", [5,10,15,20]),
'batch_size': hp.choice("batch_size", [5,8,16,30,31,64])
}
config = {
"my_exp": {
"run": "exp",
"num_samples": 10 if args.smoke_test else 1000,
"stop": {
'RMSE_loss': 400.00,
'time_total_s': 600,
},
}
}
algo = HyperOptSearch(space, max_concurrent=4, reward_attr="RMSE_loss")
hyperband = HyperBandScheduler(
time_attr="training_iteration", reward_attr="RMSE_loss", max_t=10)
run_experiments(config, search_alg=algo, scheduler=hyperband)