我在 tensorflow 中使用卷积层和 4GB 内存 GPU(GTX980)
在使用卷积层之前,一切正常,但是当我开始使用卷积层时,出现以下错误
无法在流上将卷积排入队列:CUDNN_STATUS_NOT_SUPPORTED
我听说这个问题与 GPU 内存有关
而且我知道由于 protobuf 限制,单个 tensorflow OP 最多可以有 2GB,但是我的网络没有任何超过 2GB 的 OP,所以这不是问题。
问题是,当我使用卷积层时,我的整个网络大小(权重矩阵大小)甚至更小,但这个错误不断发生。
当我将批量大小更改为非常小的数字时,不会发生错误,但 SGD 在这种小批量大小中效果不佳。
可以使用 PyTorch 等其他框架解决这个问题吗?还是我仍然可以使用 Tensorflow 运行 500000 的批量大小?
还是与小内存(4GB)GPU有关?
请帮帮我,我被困住了。
网络摘要
- 一个一维卷积层
- FC 层
- 回归层
数据汇总
- 批量大小 = 500000
- 特征大小 = 15(浮点数)
- 输入占位符大小:15(特征数量)x 8(float64)x 500000(批量大小)= 60MB
型号代码
如您所见,这是一个非常小的网络
我尝试使用没有卷积层的更大网络,但效果很好
class MyModel:
def __init__(self, learning_rate, batch_size, neighbor, weight_decay = 0.9, huber_delta=0.3, keep_prob_lst=[]):
""" hyperparameters """
self.isConv = True
self.batch_size = batch_size
self.lr = learning_rate
self.input_size = neighbor * 3
self.output_size = 1
self.neighbor = neighbor
self.weight_decay = weight_decay
self.conv1_size = 10
self.layer1_size = 100
self.layer2_size = 100
self.huber_delta = huber_delta
self.keep_prob_lst_val = keep_prob_lst
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
def _create_placeholders(self):
""" define the placeholders for input and output """
with tf.name_scope("data"):
self.input = tf.placeholder(tf.float32, shape = [self.batch_size, self.input_size], name='input')
self.output = tf.placeholder(tf.float32, shape= [self.batch_size, self.output_size], name='output')
def _create_weights(self):
""" define weights. """
# Assemble this part of the graph on the CPU. You can change it to GPU if you have GPU
with tf.name_scope("weights"):
self.conv_W_1 = tf.Variable(tf.random_normal([3,1, self.conv1_size], stddev=0.01, mean=0.0, seed=0), name='conv_layer1_weight')
self.conv_b_1 = tf.Variable(tf.zeros([1, self.conv1_size * self.neighbor]), name='conv_layer1_bias')
self.W_1 = tf.Variable(tf.random_normal([self.conv1_size * self.neighbor, self.layer1_size], stddev=0.01, mean=0.0, seed=0), name='layer1_weight')
self.b_1 = tf.Variable(tf.zeros([1,self.layer1_size]), name='layer1_bias')
self.W_2 = tf.Variable(tf.random_normal([self.layer1_size, self.layer2_size], stddev=0.01, mean=0.0, seed=0), name='layer2_weight')
self.b_2 = tf.Variable(tf.zeros([1,self.layer2_size]), name='layer2_bias')
self.W_out = tf.Variable(tf.random_normal([self.layer2_size, self.output_size], stddev=0.01, mean=0.0, seed=0), name='layer_out_weight')
self.b_out = tf.Variable(tf.zeros([1,self.output_size]), name='layer_out_bias')
def _create_loss(self):
""" define the inference + the loss function """
with tf.name_scope("loss"):
self.conv1_input = tf.reshape(self.input, [self.batch_size, self.neighbor*3, 1])
self.conv1_output = tf.nn.conv1d(self.conv1_input, self.conv_W_1, 3, 'VALID')
self.conv1_output_reshape = tf.reshape(self.conv1_output, [self.batch_size, -1]) + self.conv_b_1
self.layer1_output = tf.nn.relu(tf.matmul(self.conv1_output_reshape, self.W_1) + self.b_1)
self.layer2_output = tf.nn.relu(tf.matmul(self.layer1_output, self.W_2) + self.b_2)
self.layer_out_output = tf.matmul(self.layer2_output, self.W_out) + self.b_out
self.se = 0.5 * tf.square(self.layer_out_output - self.output, name='square')
self.loss = tf.reduce_mean(self.se)
def _create_optimizer(self):
""" define optimizer """
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss,
global_step=self.global_step)
def build_graph(self):
""" Build the graph for our model """
self._create_placeholders()
self._create_weights()
self._create_loss()
self._create_optimizer()
# self._create_summaries()