0

我已经为 Iris 数据集实现了一个具有反向传播和 sigmoid 激活功能的 4 层神经网络。它有 4 个输入单元,2 个隐藏层,每个隐藏层 8 个单元,以及一个输出层,有 3 个单元。代码是使用 numpy 编写的,因为我想自己练习实现它。问题在于,在使用测试集对其进行训练和测试之后,它会产生大致相同的输出,因此它将所有测试用例分类为第二类。结果不完全相同的事实让我更加困惑。

这是我的导入和预处理代码:

# dimensions for each matrix is commented ahead of them
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

X, y = load_iris(return_X_y=True) # base data frame
m = np.size(X[:,0]) # number of examples: 150
n_labels = np.size(np.unique(y)) # number of classes or labels : 3

# Making a new array of labels to indicate them as follows:
# 0: [1,0,0]
# 1: [0,1,0]
# 2: [0,0,1]
y_new = np.zeros((m,n_labels))  # matrix of dimension: 150 * 3
for index, value in enumerate(y):
    y_new[index][value] = 1
    
X_train, X_test, y_train, y_test = train_test_split(X,y_new,test_size=0.1)

这是我的主要代码:

def random_init(n_in, n_out, epsilon):
    # randomly initializing weights to avoid symmetry 
    W = np.random.rand(n_out, n_in + 1) * (2 * epsilon) - epsilon
    return W


def sigmoid(z):
    # sigmoid function
    return 1/(1 + np.exp(-z))



def forward_propagation(X, y, dict2):
    dict1 = {} # declaring a hashmap to save values
    # Assigning values from dict2
    weight1 = dict2["weight1"] # 8 * 5
    weight2 = dict2["weight2"] # 8 * 9
    weight3 = dict2["weight3"] # 3 * 9
    
    # forward propagate
    a1 = np.asarray(X).reshape((4,1))      # 4 * 1
    a1 = np.append(np.ones(1), a1).reshape((5,1)) # 5 * 1
    
    z2 = np.dot(weight1, a1) # 8 * 1
    a2 = sigmoid(z2)         # 8 * 1
    a2 = np.append(np.ones(1), a2).reshape((9,1)) # 9 * 1
    
    z3 = np.dot(weight2, a2) # 8 * 1
    a3 = sigmoid(z3)         # 8 * 1
    a3 = np.append(np.ones(1), a3).reshape((9,1)) # 9 * 1
    z4 = np.dot(weight3, a3) # 3 * 1
    a4 = sigmoid(z4) # 3 * 1
    
    # assigning to dict1
    dict1["a2"] = a2
    dict1["a3"] = a3
    dict1["a4"] = a4
    dict1["z2"] = z2
    dict1["z3"] = z3
    dict1["z4"] = z4
    return dict1



def back_propagation(X, y, dict1, dict2, dict3):
    
    # assigning values from dict1 and dict2
    a1 = np.append(np.ones(1), X).reshape((5,1)) # 5 * 1
    a2 = dict1["a2"] # 9 * 1
    a3 = dict1["a3"] # 9 * 1
    a4 = dict1["a4"] # 3 * 1
    z2 = dict1["z2"] # 8 * 1
    z3 = dict1["z3"] # 8 * 1
    z4 = dict1["z4"] # 8 * 1
    weight1 = dict2["weight1"] # 8 * 5
    weight2 = dict2["weight2"] # 8 * 9
    weight3 = dict2["weight3"] # 3 * 9 
    
    delta4 = a4 - y # 3 * 1
    delta3 = np.dot(weight3.T, delta4) * (a3 * (1-a3)) # 9 * 1
    delta3 = delta3[1:] # 8 * 1
    delta2 = np.dot(weight2.T, delta3) * (a2 * (1-a2)) # 9 * 1
    delta2 = delta2[1:] # 8 * 1
    
    dict3['Delta3'] += np.dot(delta4,a3.T) # 3 * 9
    dict3['Delta2'] += np.dot(delta3,a2.T) # 8 * 9
    dict3['Delta1'] += np.dot(delta2,a1.T) # 8 * 5
    
    
def cost_function(X, y, dict2):
    m = np.size(X[:,0]) # y: m * 3
    h_x = predict(X, dict2) # m * 3
    
    J = (1/m) * np.sum(np.sum((-y) * np.log(h_x) - (1-y) * np.log(1-h_x)))
    return J


def fit(X, y, alpha, num_iter, dict2):
    m = np.size(X[:,0]) # y: m * 3
    weight1 = dict2["weight1"] # 8 * 5
    weight2 = dict2["weight2"] # 8 * 9
    weight3 = dict2["weight3"] # 3 * 9 
    dict3 = {}
    dict3['Cost'] = []
    
    for _ in range(num_iter):
        dict3['Delta1'] = np.zeros((8,5)) # 8 * 5
        dict3['Delta2'] = np.zeros((8,9)) # 8 * 9
        dict3['Delta3'] = np.zeros((3,9)) # 3 * 9
        for i in range(m):
            curr_x = X[i,:].reshape((4,1)) # 4 * 1
            curr_y = y[i,:].reshape((3,1)) # 3 * 1
            d_1 = forward_propagation(curr_x, curr_y, dict2)
            back_propagation(curr_x, curr_y, d_1, dict2, dict3)
        
        dict2["weight1"] -= alpha * dict3['Delta1']
        dict2["weight2"] -= alpha * dict3['Delta2']
        dict2["weight3"] -= alpha * dict3['Delta3']
        dict3['Cost'].append(cost_function(X, y, dict2))
    return dict3


def predict(X, dict2):
    m = np.size(X[:,0]) # y: m * 3
    weight1 = dict2["weight1"] # 8 * 5
    weight2 = dict2["weight2"] # 8 * 9
    weight3 = dict2["weight3"] # 3 * 9 
    
    a1 = np.asarray(X).reshape((4,m)) # 4 * m
    a1 = np.append(np.ones((1,m)), a1).reshape((5,m)) # 5 * m
    
    z2 = np.dot(weight1, a1) # 8 * m
    a2 = sigmoid(z2)         # 8 * m
    a2 = np.append(np.ones((1,m)), a2).reshape((9,m)) # 9 * m
    
    z3 = np.dot(weight2, a2) # 8 * m
    a3 = sigmoid(z3)         # 8 * m
    a3 = np.append(np.ones((1,m)), a3).reshape((9,m)) # 9 * m
    z4 = np.dot(weight3, a3) # 3 * m
    a4 = sigmoid(z4) # 3 * m
    return a4.T # m * 3

这是培训部分:

epsilon = 0.012
dict2 = {}
# Randomly initializing the weights
dict2['weight1'] = random_init(4, 8, epsilon)
dict2['weight2'] = random_init(8, 8, epsilon)
dict2['weight3'] = random_init(8, 3, epsilon)

alpha = 0.0001 # learning rate
num_iter = 300
d_3 = fit(X_train, y_train, alpha, num_iter, dict2)

所以当我运行测试集时,它会产生如下结果:

pred = predict(X_test, dict2)
for index in range(len(pred)):
    print(f'Predicted:{pred[index]}, Actual:{y_test[index]}')

输出:

Predicted:[0.33530749 0.34205935 0.34909822], Actual:[0. 0. 1.]
Predicted:[0.33529658 0.34204889 0.34908868], Actual:[0. 0. 1.]
Predicted:[0.33530866 0.34206106 0.34910007], Actual:[1. 0. 0.]
Predicted:[0.33530336 0.34205595 0.34909549], Actual:[0. 1. 0.]
Predicted:[0.33530603 0.34205787 0.34909684], Actual:[0. 0. 1.]
Predicted:[0.33529737 0.34204983 0.3490895 ], Actual:[0. 1. 0.]
Predicted:[0.33531099 0.34206343 0.34910228], Actual:[1. 0. 0.]
Predicted:[0.33530516 0.34205763 0.34909703], Actual:[1. 0. 0.]
Predicted:[0.33531216 0.34206447 0.34910327], Actual:[1. 0. 0.]
Predicted:[0.3353009  0.34205322 0.34909274], Actual:[0. 1. 0.]
Predicted:[0.3353099  0.34206301 0.34910225], Actual:[0. 0. 1.]
Predicted:[0.33530988 0.34206279 0.34910207], Actual:[0. 1. 0.]
Predicted:[0.335315   0.34206706 0.34910561], Actual:[1. 0. 0.]
Predicted:[0.33530064 0.34205326 0.34909282], Actual:[0. 1. 0.]
Predicted:[0.33530877 0.34206142 0.34910049], Actual:[1. 0. 0.]

我真的被困住了,因为我想知道我在哪里错过了它。

4

0 回答 0