卷积层的输出特征图是(Batch, Height, Width, Channels)。当我们在 tensorflow 中初始化 CNN 时,我们得到 None 值来代替 Batch。我正在尝试在自定义层中实现空间变换器网络,因此需要将层矢量化为卷积层批量大小。当我尝试初始化网络时,空间转换器层给出了无法使用 None 值执行操作的错误。
我的代码如下所示
class SpatialTransformer(Layer):
def __init__(self):
super(SpatialTransformer, self).__init__()
def affine_transform(self, input_shape, theta):
N = theta.shape[0]
H, W = input_shape #output dimensions of grid
x_t, y_t = tf.meshgrid(tf.linspace(-1, 1, W), tf.linspace(-1, 1, H))
x_t = tf.cast(tf.reshape(x_t, [-1]), dtype = tf.float32)
y_t = tf.cast(tf.reshape(y_t, [-1]), dtype = tf.float32)
ones = tf.ones(x_t.shape, dtype=tf.float32)
sampling_grids = tf.stack([x_t, y_t, ones])
sampling_grids = tf.expand_dims(sampling_grids, axis = 0)
sampling_grids = tf.tile(sampling_grids, tf.stack([N, 1, 1]))
batch_grids = tf.matmul(theta, sampling_grids)
batch_grids = tf.reshape(batch_grids, [N, 2, H, W])
return batch_grids
def get_pixel_value(self, feature_map, x_s, y_s):
"Util Function to get the value of pixel from 4d image tensors given position vectors x_s and y_s"
N, H, W = x_s.shape
batch_idx = tf.range(0, N)
batch_idx = tf.reshape(batch_idx, (N, 1, 1))
b = tf.tile(batch_idx, (1, H, W))
indices = tf.stack([b, y_s, x_s], 3) #creating indices of shape(N, H, W)
return tf.gather_nd(feature_map, indices) #extracting values corresponding to those indices
def bilinear_sampler(self, feature_map, x, y):
N, H, W, C = feature_map.shape
max_y = tf.cast(H - 1, dtype = tf.int32)
max_x = tf.cast(W - 1, dtype = tf.int32)
zero = tf.zeros([], dtype= tf.int32)
x = tf.cast(x, dtype = tf.float32)
y = tf.cast(y, dtype = tf.float32)
#Reshaping the batch grid from [-1, 1] to [0, W-1] and [0, H-1]
x = (x + 1.0) * tf.cast(max_x, dtype = tf.float32)/2.0
y = (y + 1.0) * tf.cast(max_y, dtype = tf.float32)/2.0
#Taking the 4 nearest points to the (x_i, y_i) to perform interpolation
x0 = tf.cast(tf.floor(x), dtype=tf.int32)
x1 = x0 + 1
y0 = tf.cast(tf.floor(y), dtype = tf.int32)
y1 = y0 + 1
#clipping the value to be between [0, W-1] or [0, H-1]
x0 = tf.clip_by_value(x0, zero, max_x)
x1 = tf.clip_by_value(x1, zero, max_x)
y0 = tf.clip_by_value(y0, zero, max_y)
y1 = tf.clip_by_value(y1, zero, max_y)
#getting pixel values of the corner coordinates(x0,y0), (x0, y1), (x1, y0), (x1, y1)
Ia = self.get_pixel_value(feature_map, x0, y0)
Ib = self.get_pixel_value(feature_map, x0, y1)
Ic = self.get_pixel_value(feature_map, x1, y0)
Id = self.get_pixel_value(feature_map, x1, y1)
#Changing the data type to float32
x0 = tf.cast(x0, dtype = tf.float32)
x1 = tf.cast(x1, dtype = tf.float32)
y0 = tf.cast(y0, dtype = tf.float32)
y1 = tf.cast(y1, dtype = tf.float32)
#calculating delta (or simply area) weights for interpolation
Wa = tf.expand_dims((x1-x)*(y1-y), axis=3)
Wb = tf.expand_dims((x1-x)*(y-y0), axis=3)
Wc = tf.expand_dims((x-x0)*(y1-y), axis=3)
Wd = tf.expand_dims((x-x0)*(y-y0), axis=3)
out = tf.add_n([Wa*Ia, Wb*Ib, Wc*Ic, Wd*Id])
return out
def call(self, feature_map, theta, out_size = None):
N, H, W, _ = feature_map.shape
if out_size:
out_H = out_size[0]
out_W = out_size[1]
batch_grids = self.affine_transform([out_H, out_W], theta)
else:
batch_grids = self.affine_transform([H, W], theta)
x_s = batch_grids[:,0,:,:]
y_s = batch_grids[:,0,:,:]
output_feature_map = self.bilinear_sampler(feature_map, x_s, y_s)
return output_feature_map
class Localisation_Network(Layer):
def __init__(self):
super(Localisation_Network, self).__init__()
self.conv = Conv2D(4,(3, 3), padding = "valid", strides=2, activation="relu", kernel_initializer="he_normal")
self.flatten = Flatten()
self.dense_1 = Dense(64, activation="relu", kernel_initializer="he_normal")
self.dense_2 = Dense(6, activation="linear")
self.reshape = Reshape((2, 3))
def call(self, input_tensor):
x = self.conv(input_tensor)
x = self.flatten(x)
x = self.dense_1(x)
x = self.dense_2(x)
x = self.reshape(x)
return x
def get_model():
x_input = Input((28, 28, 1))
u = Conv2D(16, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(x_input)
u = Conv2D(16, (3, 3), padding = "same", strides = 2, activation="relu", kernel_initializer="he_normal")(u)
theta = Localisation_Network()(u)
v = SpatialTransformer()(u, theta)
v = Conv2D(32, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(v)
x = Conv2D(32, (3, 3), padding = "same", strides = 2, activation= "relu", kernel_initializer="he_normal")(v)
x = GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = Dense(10,activation ="softmax")(x)
model = Model(inputs = x_input, outputs = x)
return model
上述代码的错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-47-d630585afd1d> in <module>()
4 u = Conv2D(16, (3, 3), padding = "same", strides = 2, activation="relu", kernel_initializer="he_normal")(u)
5 theta = Localisation_Network()(u)
----> 6 v = SpatialTransformer()(u, theta)
7 v = Conv2D(32, (3, 3), padding = "same", activation= "relu", kernel_initializer="he_normal")(v)
8 x = Conv2D(32, (3, 3), padding = "same", strides = 2, activation= "relu", kernel_initializer="he_normal")(v)
4 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
668 except Exception as e: # pylint:disable=broad-except
669 if hasattr(e, 'ag_error_metadata'):
--> 670 raise e.ag_error_metadata.to_exception(e)
671 else:
672 raise
ValueError: in user code:
<ipython-input-7-910b0adb6eb7>:83 call *
batch_grids = self.affine_transform([H, W], theta)
<ipython-input-45-eb5ac5f8f722>:14 affine_transform *
sampling_grids = tf.tile(sampling_grids, tf.stack([N, 1, 1]))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper **
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/array_ops.py:1405 stack
value_shape = ops.convert_to_tensor(values[0], name=name)._shape_tuple() # pylint: disable=protected-access
/usr/local/lib/python3.6/dist-packages/tensorflow/python/profiler/trace.py:163 wrapped
return func(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py:1540 convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:339 _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:265 constant
allow_broadcast=True)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/constant_op.py:283 _constant_impl
allow_broadcast=allow_broadcast))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_util.py:445 make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.