1

我在使用 Rstudio 的 tfestimator 包与 Tensorflow Estimators 一起使用时遇到问题……它似乎不适用于 Google 提供的任何 DistributionStrategy ,用于将估计器自动缩放到多个 GPU。它会出现“C 堆栈使用 XXXX 太接近限制”的错误。

最小可复制示例(从 [ https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/distribute/README.md ] 转换为 R)

library("tensorflow")
library("tfestimators")

model_fn <- function(features, labels, mode, params, config)
{
  layer   <- tf$layers$Dense(1L)
  logits  <- layer(features)

  if(mode == tf$estimator$ModeKeys$PREDICT) 
  {
    predictions = dict("logits", logits)
  }

  loss = tf$losses$mean_squared_error(labels = labels, predictions = tf$reshape(logits, list()))

  if(mode == tf$estimator$ModeKeys$EVAL)
  {
    return(estimator_spec(mode = mode, loss = loss))
  }

 if(mode == tf$estimator$ModeKey$TRAIN)
 {
    train_op = tf$train$GradientDescentOptimizer(0.2)$minimize(loss)
 }

 return(estimator_spec(mode = model, loss= loss, train_op = train_op))
}  


input_fn <- function()
{
  features <- tf$data$Dataset$from_tensors(list(list(1.0)))$'repeat'(100L)
  labels = tf$data$Dataset$from_tensors(1.0)$'repeat'(100L)

  return(tf$data$Dataset$zip(tuple(features, labels)))
}

## MAIN PROGRAM LOOP ##

distribution = tf$contrib$distribute$MirroredStrategy()
config = tf$estimator$RunConfig(train_distribute = distribution)

classifier <- estimator(model_fn, config = config)

train(classifier, input_fn)
evaluate(classifier, input_fn)

返回错误:

2018-11-18 13:56:02.190515: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2018-11-18 13:56:02.448221: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: 
name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.582
pciBusID: 0000:04:00.0
totalMemory: 10.92GiB freeMemory: 10.76GiB
2018-11-18 13:56:02.609134: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 1 with properties: 
name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.582
pciBusID: 0000:0c:00.0
totalMemory: 10.92GiB freeMemory: 10.76GiB
2018-11-18 13:56:02.610086: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0, 1
2018-11-18 13:56:03.325110: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-11-18 13:56:03.325159: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988]      0 1 
2018-11-18 13:56:03.325169: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0:   N Y 
2018-11-18 13:56:03.325174: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 1:   Y N 
2018-11-18 13:56:03.325779: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10409 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:04:00.0, compute capability: 6.1)
2018-11-18 13:56:03.326896: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10409 MB memory) -> physical GPU (device: 1, name: GeForce GTX 1080 Ti, pci bus id: 0000:0c:00.0, compute capability: 6.1)
2018-11-18 13:56:03.415536: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0, 1
2018-11-18 13:56:03.415606: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-11-18 13:56:03.415617: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988]      0 1 
2018-11-18 13:56:03.415623: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0:   N Y 
2018-11-18 13:56:03.415628: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 1:   Y N 
2018-11-18 13:56:03.416166: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/device:GPU:0 with 10409 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:04:00.0, compute capability: 6.1)
2018-11-18 13:56:03.416376: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/device:GPU:1 with 10409 MB memory) -> physical GPU (device: 1, name: GeForce GTX 1080 Ti, pci bus id: 0000:0c:00.0, compute capability: 6.1)
Error: C stack usage  898515125908 is too close to the limit
4

0 回答 0