tensorflow - 创建空间变压器网络时无错误

Question

卷积层的输出特征图是（Batch, Height, Width, Channels）。当我们在 tensorflow 中初始化 CNN 时，我们得到 None 值来代替 Batch。我正在尝试在自定义层中实现空间变换器网络，因此需要将层矢量化为卷积层批量大小。当我尝试初始化网络时，空间转换器层给出了无法使用 None 值执行操作的错误。

我的代码如下所示

    class SpatialTransformer(Layer):
      def __init__(self):
        super(SpatialTransformer, self).__init__()

      def affine_transform(self, input_shape, theta):
        N = theta.shape[0]
        H, W = input_shape  #output dimensions of grid
        x_t, y_t = tf.meshgrid(tf.linspace(-1, 1, W), tf.linspace(-1, 1, H))
        x_t = tf.cast(tf.reshape(x_t, [-1]), dtype = tf.float32)
        y_t = tf.cast(tf.reshape(y_t, [-1]), dtype = tf.float32)
        ones = tf.ones(x_t.shape, dtype=tf.float32)
        sampling_grids = tf.stack([x_t, y_t, ones])
        sampling_grids = tf.expand_dims(sampling_grids, axis = 0)
        sampling_grids = tf.tile(sampling_grids, tf.stack([N, 1, 1]))
        batch_grids = tf.matmul(theta, sampling_grids)
        batch_grids = tf.reshape(batch_grids, [N, 2, H, W])
        return batch_grids

      def get_pixel_value(self, feature_map, x_s, y_s):
        "Util Function to get the value of pixel from 4d image tensors given position vectors x_s and y_s"
        N, H, W = x_s.shape
        batch_idx = tf.range(0, N)
        batch_idx = tf.reshape(batch_idx, (N, 1, 1))
        b = tf.tile(batch_idx, (1, H, W))
        indices = tf.stack([b, y_s, x_s], 3)   #creating indices of shape(N, H, W)
        return tf.gather_nd(feature_map, indices)   #extracting values corresponding to those indices

      def bilinear_sampler(self, feature_map, x, y):
        N, H, W, C = feature_map.shape
        max_y = tf.cast(H - 1, dtype = tf.int32)
        max_x = tf.cast(W - 1, dtype = tf.int32)
        zero = tf.zeros([], dtype= tf.int32)

        x = tf.cast(x, dtype = tf.float32)
        y = tf.cast(y, dtype = tf.float32)    

        #Reshaping the batch grid from [-1, 1] to [0, W-1] and [0, H-1]
        x = (x + 1.0) * tf.cast(max_x, dtype = tf.float32)/2.0
        y = (y + 1.0) * tf.cast(max_y, dtype = tf.float32)/2.0

        #Taking the 4 nearest points to the (x_i, y_i) to perform interpolation
        x0 = tf.cast(tf.floor(x), dtype=tf.int32)
        x1 = x0 + 1
        y0 = tf.cast(tf.floor(y), dtype = tf.int32)
        y1 = y0 + 1

        #clipping the value to be between [0, W-1] or [0, H-1]
        x0 = tf.clip_by_value(x0, zero, max_x)
        x1 = tf.clip_by_value(x1, zero, max_x)
        y0 = tf.clip_by_value(y0, zero, max_y)
        y1 = tf.clip_by_value(y1, zero, max_y)

        #getting pixel values of the corner coordinates(x0,y0), (x0, y1), (x1, y0), (x1, y1)
        Ia = self.get_pixel_value(feature_map, x0, y0)
        Ib = self.get_pixel_value(feature_map, x0, y1)
        Ic = self.get_pixel_value(feature_map, x1, y0)
        Id = self.get_pixel_value(feature_map, x1, y1)

        #Changing the data type to float32
        x0 = tf.cast(x0, dtype = tf.float32)
        x1 = tf.cast(x1, dtype = tf.float32)
        y0 = tf.cast(y0, dtype = tf.float32)
        y1 = tf.cast(y1, dtype = tf.float32)

        #calculating delta (or simply area) weights for interpolation
        Wa = tf.expand_dims((x1-x)*(y1-y), axis=3)
        Wb = tf.expand_dims((x1-x)*(y-y0), axis=3)
        Wc = tf.expand_dims((x-x0)*(y1-y), axis=3)
        Wd = tf.expand_dims((x-x0)*(y-y0), axis=3)
        out = tf.add_n([Wa*Ia, Wb*Ib, Wc*Ic, Wd*Id])
        return out

      def call(self, feature_map, theta, out_size = None):
        N, H, W, _ = feature_map.shape

        if out_size:
          out_H = out_size[0]
          out_W = out_size[1]
          batch_grids = self.affine_transform([out_H, out_W], theta)
        else:
          batch_grids = self.affine_transform([H, W], theta)

        x_s = batch_grids[:,0,:,:]
        y_s = batch_grids[:,0,:,:]

        output_feature_map = self.bilinear_sampler(feature_map, x_s, y_s)
        return output_feature_map
        
    class Localisation_Network(Layer):
      def __init__(self):
        super(Localisation_Network, self).__init__()
        self.conv = Conv2D(4,(3, 3), padding = "valid", strides=2, activation="relu", kernel_initializer="he_normal")
        self.flatten = Flatten()
        self.dense_1 = Dense(64, activation="relu", kernel_initializer="he_normal")
        self.dense_2 = Dense(6, activation="linear")
        self.reshape = Reshape((2, 3))

      def call(self, input_tensor):
        x = self.conv(input_tensor)
        x = self.flatten(x)
        x = self.dense_1(x)
        x = self.dense_2(x)
        x = self.reshape(x)
        return x    

    def get_model():
      x_input = Input((28, 28, 1))
      u = Conv2D(16, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(x_input)
      u = Conv2D(16, (3, 3), padding = "same", strides = 2, activation="relu", kernel_initializer="he_normal")(u)
      theta = Localisation_Network()(u)
      v = SpatialTransformer()(u, theta)
      v = Conv2D(32, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(v)
      x = Conv2D(32, (3, 3), padding = "same", strides = 2, activation= "relu", kernel_initializer="he_normal")(v)
      x = GlobalAveragePooling2D()(x)
      x = Flatten()(x)
      x = Dense(10,activation ="softmax")(x)
      model =  Model(inputs = x_input, outputs = x)
      return model

上述代码的错误：

    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-47-d630585afd1d> in <module>()
          4 u = Conv2D(16, (3, 3), padding = "same", strides = 2, activation="relu", kernel_initializer="he_normal")(u)
          5 theta = Localisation_Network()(u)
    ----> 6 v = SpatialTransformer()(u, theta)
          7 v = Conv2D(32, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(v)
          8 x = Conv2D(32, (3, 3), padding = "same", strides = 2, activation= "relu", kernel_initializer="he_normal")(v)

                                          4 frames
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
        668       except Exception as e:  # pylint:disable=broad-except
        669         if hasattr(e, 'ag_error_metadata'):
    --> 670           raise e.ag_error_metadata.to_exception(e)
        671         else:
        672           raise

    ValueError: in user code:

        <ipython-input-7-910b0adb6eb7>:83 call  *
            batch_grids = self.affine_transform([H, W], theta)
        <ipython-input-45-eb5ac5f8f722>:14 affine_transform  *
            sampling_grids = tf.tile(sampling_grids, tf.stack([N, 1, 1]))
        /usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper  **
            return target(*args, **kwargs)
        /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/array_ops.py:1405 stack
            value_shape = ops.convert_to_tensor(values[0], name=name)._shape_tuple()  # pylint: disable=protected-access
        /usr/local/lib/python3.6/dist-packages/tensorflow/python/profiler/trace.py:163 wrapped
            return func(*args, **kwargs)
        /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:1540 convert_to_tensor
            ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
        /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:339 _constant_tensor_conversion_function
            return constant(v, dtype=dtype, name=name)
        /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:265 constant
    allow_broadcast=True)
        /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:283 _constant_impl
    allow_broadcast=allow_broadcast))
        /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_util.py:445 make_tensor_proto
            raise ValueError("None values not supported.")

        ValueError: None values not supported.

score 0 · Accepted Answer

我已经删除了该tf.tile层，因为具有维度的定位网络的矢量化输出(None, 2, 3)将在操作期间执行矢量化技巧tf.matmul。我还tf.reshape用预定义的 keras 重塑层替换tf.keras.layers.Reshape()了每个重塑操作的操作，因为它们保持矢量化。

class SpatialTransformer(Layer):
  def __init__(self, out_size, name= "spatial_transformer"):
    super(SpatialTransformer, self).__init__()
    self.out_size = out_size
    self.reshape_1 = Reshape([2, self.out_size[0], self.out_size[1]])   #for replacing all the reshape to vectorized form
    self.reshape_2 = Reshape([self.out_size[0], self.out_size[1]])
    self.reshape_3 = Reshape([1, 1])
    self.reshape_4 = Reshape([])


  def affine_transform(self, input_shape, theta):
    N = theta.shape[0]
    H, W = input_shape  #output dimensions of grid
    x_t, y_t = tf.meshgrid(tf.linspace(-1, 1, W), tf.linspace(-1, 1, H))
    x_t = tf.cast(tf.reshape(x_t, [-1]), dtype = tf.float32)
    y_t = tf.cast(tf.reshape(y_t, [-1]), dtype = tf.float32)
    ones = tf.ones(x_t.shape, dtype=tf.float32)
    sampling_grids = tf.stack([x_t, y_t, ones])
    # sampling_grids = tf.tile(sampling_grids, tf.stack([N, 1, 1]))
    batch_grids = tf.matmul(theta, sampling_grids)
    batch_grids = self.reshape_1(batch_grids)
    return batch_grids

  def get_pixel_value(self, feature_map, x_s, y_s):
    "Util Function to get the value of pixel from 4d image tensors given position vectors x_s and y_s"
    N, H, W = x_s.shape
    batch_idx = tf.range(0, N)
    batch_idx = self.reshape_3(batch_idx)
    b = tf.tile(batch_idx, (1, H, W))
    indices = tf.stack([b, y_s, x_s], 3)   #creating indices of shape(N, H, W)
    return tf.gather_nd(feature_map, indices)   #extracting values corresponding to those indices



  def bilinear_sampler(self, feature_map, x, y):
    N, H, W, _ = feature_map.shape
   
    max_y = tf.cast(H - 1, dtype = tf.int32)
    max_x = tf.cast(W - 1, dtype = tf.int32)
    zero = tf.zeros([], dtype= tf.int32)

    x = tf.cast(x, dtype = tf.float32)
    y = tf.cast(y, dtype = tf.float32)    
   
    #Reshaping the batch grid from [-1, 1] to [0, W-1] and [0, H-1]
    x = (x + 1.0) * tf.cast(max_x, dtype = tf.float32)/2.0
    y = (y + 1.0) * tf.cast(max_y, dtype = tf.float32)/2.0
   
    
    #Taking the 4 nearest points to the (x_i, y_i) to perform interpolation
    x0 = tf.cast(tf.floor(x), dtype=tf.int32)
    x1 = x0 + 1
    y0 = tf.cast(tf.floor(y), dtype = tf.int32)
    y1 = y0 + 1
    
    #clipping the value to be between [0, W-1] or [0, H-1]
    x0 = tf.clip_by_value(x0, zero, max_x)
    x1 = tf.clip_by_value(x1, zero, max_x)
    y0 = tf.clip_by_value(y0, zero, max_y)
    y1 = tf.clip_by_value(y1, zero, max_y)
    
    #getting pixel values of the corner coordinates(x0,y0), (x0, y1), (x1, y0), (x1, y1)
    Ia = self.get_pixel_value(feature_map, x0, y0)
    Ib = self.get_pixel_value(feature_map, x0, y1)
    Ic = self.get_pixel_value(feature_map, x1, y0)
    Id = self.get_pixel_value(feature_map, x1, y1)
    # print(f"Ia: {Ia}")

    #Changing the data type to float32
    x0 = tf.cast(x0, dtype = tf.float32)
    x1 = tf.cast(x1, dtype = tf.float32)
    y0 = tf.cast(y0, dtype = tf.float32)
    y1 = tf.cast(y1, dtype = tf.float32)

    #calculating delta (or simply area) weights for interpolation
    Wa = tf.expand_dims((x1-x)*(y1-y), axis=3)
    Wb = tf.expand_dims((x1-x)*(y-y0), axis=3)
    Wc = tf.expand_dims((x-x0)*(y1-y), axis=3)
    Wd = tf.expand_dims((x-x0)*(y-y0), axis=3)
    out = tf.add_n([Wa*Ia, Wb*Ib, Wc*Ic, Wd*Id])
    return out

  def call(self, input_tensor):
    feature_map, theta = input_tensor
    N, H, W, _ = feature_map.shape

    if self.out_size:
      out_H = self.out_size[0]
      out_W = self.out_size[1]
      batch_grids = self.affine_transform([out_H, out_W], theta)
    else:
      batch_grids = self.affine_transform([H, W], theta)
    
    x_s = self.reshape_2(batch_grids[:,0,:,:])
    y_s = self.reshape_2(batch_grids[:,1,:,:])
    
    output_feature_map = self.bilinear_sampler(feature_map, x_s, y_s)
    return output_feature_map
class Localisation_Network(Layer):
  def __init__(self):
    super(Localisation_Network, self).__init__()
    self.conv_1 = Conv2D(16, (3, 3), padding = "same", strides=1, activation="relu", kernel_initializer="he_normal")
    self.conv_2 = Conv2D(32, (3, 3), padding = "same", strides=1, activation="relu", kernel_initializer="he_normal")
    self.flatten = Flatten()
    self.dense_1 = Dense(32, activation="relu", kernel_initializer="he_normal")
    
    def bias_init(shape, dtype = None):
      identitiy = tf.Variable([[1.0, 0.0, 0.0],[0.0, 1.0, 0.0]])
      identitiy = tf.reshape(identitiy, -1)
      return identitiy
  
    self.dense_2 = Dense(6,kernel_initializer = "zeros", bias_initializer = bias_init)
    self.reshape = Reshape((2, 3))

  
  def call(self, input_tensor):
    x = self.conv_1(input_tensor)
    x = self.conv_2(x)
    x = tf.reduce_mean(x, axis = [1, 2])
    x = self.dense_1(x)
    x = self.dense_2(x)
    x = self.reshape(x)
    return x


def transformer_model_2():
  x_input = Input((28, 28, 1))
  theta = Localisation_Network()(x_input)
  x = SpatialTransformer(x_input.shape[1:3], name = "transformer_output" )([x_input, theta])
  x = Conv2D(16, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(x)
  x = Conv2D(16, (3, 3), padding = "same", strides = 2, activation="relu", kernel_initializer="he_normal")(x)
  x = Conv2D(32, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(x)
  x = Conv2D(32, (3, 3), padding = "same", strides = 2, activation= "relu", kernel_initializer="he_normal")(x)
  x = GlobalAveragePooling2D()(x)
  x = Flatten()(x)
  x = Dense(10,activation ="softmax")(x)
  return Model(inputs = x_input, outputs = x)

我唯一坚持的是定位网络，因为它是一个回归网络，所以linear activation放置但是这个网络的输出导致值很大，所以后来被剪裁bilinear sampling，最终导致输出为零，因此梯度不能流经本地化网络。

我查看了中帖子和 github 以找到解决方案，其中许多人建议在本地化网络的最后一层初始化weights to zerosand biases to identity: [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]，但它不起作用。

score 0 · Accepted Answer

从这里很难说，但基于堆栈跟踪似乎这条线是有问题的 - sampling_grids = tf.tile(sampling_grids, tf.stack([N, 1, 1]))（转发 None 不是预期的）。

我注意到的第二件事-不确定您在 SpatialTransformer 中的调用方法覆盖是否实际上应该有 3 个参数def call(self, feature_map, theta, out_size = None):？似乎因为它继承自Layer它应该只有input_tensor参数。

也不确定您是否需要覆盖build您的用例，并可能在那里进行所需的初始化。

除此之外，您可以尝试广泛记录（添加打印语句）并查看None值“输入”的确切位置。

最后，您还可以上传足以重现相同错误的代码摘录，这可能会带来更多帮助。

tensorflow - 创建空间变压器网络时无错误

2 回答 2

Related

Reference