我正在尝试基于 Neupy 中的 MNIST 示例创建自定义激活层。但是,一旦我将自定义层应用于网络,它就会停止训练。对于我的自定义函数,我想让 ReLU 和 Softmax 函数的输入值从浮点值变为固定点。因此,我创建了一个函数调用“float_limit”,它可以帮助我将浮点值更改为定点值。我的第一个想法是在我的 float_limit 函数中使用 int() 函数。但是,它显示类型错误,因为 int() 不能用于张量变量。所以我把int()函数改成T.floor(),它可以做和int()一样的工作。因为 ReLU 函数在应用 float_limit() 后工作正常。但是一旦应用了 softmax 函数,网络就会停止训练。请问我该如何解决这个问题?
这是我的代码:
from sklearn import datasets, model_selection
from sklearn.preprocessing import OneHotEncoder
from neupy import environment,algorithms, layers
import numpy as np
from sklearn.model_selection import train_test_split
import theano
import theano.tensor as T
# load data
mnist = datasets.fetch_mldata('MNIST original')
data, target = mnist.data, mnist.target
# make one hot
data = data / 255.
data = data - data.mean(axis=0)
target_scaler = OneHotEncoder()
target = target_scaler.fit_transform(target.reshape((-1, 1)))
target = target.todense()
# split data for training and testing
environment.reproducible()
x_train, x_test, y_train, y_test = train_test_split(
data.astype(np.float32),
target.astype(np.float32),
train_size=(6. / 7)
)
# Theano is a main backend for the Gradient Descent based algorithms in NeuPy.
theano.config.floatX = 'float32'
#################### create new transfer function ###########################
################# float limit #####################
# # idea code
# def float_limit(n, b):
# d = 2 ** b
# return int(n * d) / d
def float_limit(n, b):
d = T.floor(2.0) ** b
return T.floor(n * d) / d
###################################################
################ custom function ##################
################## relu ##################
def relu(x, alpha=0):
if alpha == 0:
x = float_limit(x, 8)
result = 0.5 * (x + abs(x))
return result
else:
x = float_limit(x, 8)
alpha = T.tensor.as_tensor_variable(alpha)
f1 = 0.5 * (1 + alpha)
f2 = 0.5 * (1 - alpha)
return f1 * x + f2 * abs(x)
class custom_relu(layers.ActivationLayer):
def activation_function(self, input_value):
return relu(input_value)
#################### softmax ########################
class custom_softmax(layers.ActivationLayer):
def activation_function(self, input_value):
input_value = float_limit(input_value,8)
return T.nnet.softmax(input_value)
########### start the model architecture ############
network = algorithms.Momentum(
[
layers.Input(784),
custom_relu(500), #Relu
# Squared(300),
custom_relu(300),
# layers.Relu(300), #Relu
custom_softmax(10), #Softmax
# layers.Sigmoid
# layers.Input(784),
# tansig(500),
# tansig(500),
],
error='categorical_crossentropy',
step=0.01,
verbose=True,
shuffle_data=True,
momentum=0.99,
nesterov=True,
)
# print the architecture(Input shape, Layer Type, Output shape)
network.architecture()
# train the network
network.train(x_train, y_train, x_test, y_test, epochs=30)
# show the accuracy
from sklearn import metrics
y_predicted = network.predict(x_test).argmax(axis=1)
y_test = np.asarray(y_test.argmax(axis=1)).reshape(len(y_test))
print("y_predicted",y_predicted)
print("y_test",y_test)
print(metrics.classification_report(y_test, y_predicted))
score = metrics.accuracy_score(y_test, y_predicted)
print("Validation accuracy: {:.2%}".format(score))
# plot the image
from neupy import plots
plots.error_plot(network)