我想在 CNN 中使用注意力。注意是 (N,1) 而 N 是批量大小。我想将其更改为 (1,N) 然后使用 softmax。Pytorch 可以使用“转置”来做到这一点。但是当我在 keras 中使用“Permute”时,会出现错误:
Input 0 is incompatible with layer flatten_2: expected min_ndim=3, found ndim=2
我的代码在这里:
class AttentionModel:
def __init__(self):
self.L = 500
self.D = 128
self.K = 1
inputs = Input(shape=(28,28,1))
result1 = self.feature_extractor_part1(inputs)
result2 = self.feature_extractor_part2(result1) # (N,500)
attention=self.attention(result2) #(N,1)
attention=Permute(dims=(2,1))(attention) #(1,N) !!PROBLEM!!
attention=Flatten()(attention)
attention=Activation('softmax')(attention) #(1,N)
M=Dot()(attention,result2) #(K,L)
final_result=self.classifer(M)
self.model=Model(inputs=inputs,outputs=final_result)
self.model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
def feature_extractor_part1(self, inputs):
conv1 = Conv2D(20, kernel_size=5, activation='relu')(inputs)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(50, kernel_size=5, activation='relu')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
return pool2
def feature_extractor_part2(self, inputs):
flat = Flatten()(inputs)
dense = Dense(self.L, activation='relu')(flat)
return dense
def attention(self, inputs):
flat1 = Dense(self.D, activation='tanh')(inputs)
flat2 = Dense(self.K)(flat1)
return flat2
def classifer(self,inputs):
result=Dense(1,activation='sigmoid')(inputs)
return result