python - Tensorflow：使用多个 GPU

Question

我有一个非常大的矩阵 (A)，我想对其进行切片，然后在两个不同的 GPU 上运行矩阵乘法，因为 GPU 的内存只有 16GB。问题是（在我看来）tensorflow 并行处理操作 1、3、5（参见代码示例），这也会导致内存错误。如何按顺序运行它们？我考虑过遍历设备，但我不确定设备是否并行运行。我想失去尽可能少的性能。因为计算时间对我的项目来说很重要。

我希望你能解决我的问题。

问候

import matplotlib.pyplot as plt
import time
import tensorflow as tf
from tensorflow.python.client import timeline

tf.set_random_seed(42)

T = tf.constant(1,tf.float32, name = 'Maturity')
N =tf.constant(500,tf.int32, name = 'Issuer')
sigma    = tf.random.uniform([N],0,1)
riskfreeRate   = 0.02
S0=tf.random.uniform([N],5,10)
simulation     = 10000



S     = tf.placeholder(tf.float32, name = 'StartingPrice')
z     = tf.random_normal([N, simulation], dtype = tf.float32, name =     'standardNormRandomValues')
mu    = tf.placeholder(tf.float32, name = 'riskfreeRate')

ST = S0*tf.exp((mu - tf.square(sigma)/2) * T + sigma * tf.transpose(z) *     tf.sqrt(T))


#A is created with numpy and has 500x1.000.000 entries

A2=tf.convert_to_tensor(np.asmatrix(A),dtype=tf.float32)


#Operation 1
with tf.device('/device:GPU:0'):
    A3=A2[:,0:tf.cast((scenarios/5),tf.int32)]
    SC1=tf.matmul(ST,A3)

#Operation 2     
with tf.device('/device:GPU:1'):
    A4=A2[:,tf.cast((scenarios/5),tf.int32):tf.cast(2*scenarios/5,tf.int32)]
    SC2=tf.matmul(ST,A4)

#Operation 3
with tf.device('/device:GPU:0'):
    A5=A2[:,tf.cast((2*scenarios/5),tf.int32):tf.cast(3*scenarios/5,tf.int32)]
    SC3=tf.matmul(ST,A5)

#Operation 4
with tf.device('/device:GPU:1'):
    A6=A2[:,tf.cast((3*scenarios/5),tf.int32):tf.cast(4*scenarios/5,tf.int32)]
    SC4=tf.matmul(ST,A6)

#Operation 5
with tf.device('/device:GPU:0'):
    A7=A2[:,tf.cast((4*scenarios/5),tf.int32):tf.cast(scenarios,tf.int32)]
    SC5=tf.matmul(ST,A7)


SC=tf.concat(SC1,SC2,SC3,SC4,SC5)


with tf.Session() as sess:

    priceTensor=sess.run(SC, {mu : riskfreeRate})
    sess.close()

python - Tensorflow：使用多个 GPU

0 回答 0

Related

Reference