2

我试图构建一个 5 层的神经网络来对 3 个类、178 个实例和 13 个特征数据集进行分类。基本上我遵循这里给出的指导方针。我已经在 Matlab 中编写了自己的代码,它可以成功运行。然而,训练结果却非常糟糕。该模型保持预测与输出相同的类别。我找不到我的代码哪里出了问题,或者模型不适合数据?有人可以帮我找出问题所在吗?非常感谢。

我的 Matlab 训练代码如下所示:

    %% Initialization
    numclass = 3;                  % num of class
    c = 13;                        % num of feature
    % for each layer, initialize each parameter w
      and each b to a small random value near zero
    w1 = normrnd(0,0.01,[c,10]);   % Input layer -> layer 2 (10 nodes)
    b1 = normrnd(0,0.01,[1,10]);
    w2 = normrnd(0,0.01,[10,6]);   % layer 2 -> layer 3 (6 nodes)
    b2 = normrnd(0,0.01,[1,6]);
    w3 = normrnd(0,0.01,[6,4]);    % layer 3 -> layer 4 (4 nodes)
    b3 = normrnd(0,0.01,[1,4]);
    w4 = normrnd(0,0.01,[4,numclass]); % layer 4 -> Output layer (3 nodes/class label)
    b4 = normrnd(0,0.01,[1,numclass]);

    Iter = 0;
    lambda = 0.5;     % regularization coefficient

    %% Batch Training
    while Iter < 200
        Iter = Iter+1
        d_w1 = 0; d_w2 = 0; d_w3 = 0; d_b1 = 0; d_b2 = 0; d_b3 = 0;
        d_w4 = 0; d_b4 = 0;
        for i = 1:r
        % Forward propagation
            a1 = X0(i,:);     % X0 is training data, each row represents a instance with 13 features
            % Input layer -> Layer 2
            z2 = a1*w1+b1;
            a2 = sigmoid(z2);
            % Layer 2 -> Layer 3
            z3 = a2*w2+b2;
            a3 = sigmoid(z3);
            % Layer 3 -> Layer 4
            z4 = a3*w3+b3;
            a4 = sigmoid(z4);   
            % Layer 4 -> Output Layer
            z5 = a4*w4+b4;
            a5 = sigmoid(z5);           
        % Backward propagation
            y = zeros(1,numclass);
            y(Y0(i)) = 1;    % Y0 is the training label ({1,2,3} in this case), each element indicates which class the instance belongs to 
            % Output Layer -> Layer 4
            delta5 = (-(y-a5).*d_sigmoid(z5))';
            % Output Layer -> Layer 3
            delta4 = (w4*delta5).*d_sigmoid(z4');
            % Layer 3 -> Layer 2
            delta3 = (w3*delta4).*d_sigmoid(z3');
            % Layer 2 -> Layer I
            delta2 = (w2*delta3).*d_sigmoid(z2');
        % Compute the desired partial derivatives
            d_w1 = d_w1 + (delta2*a1)';
            d_b1 = d_b1 + delta2';
            d_w2 = d_w2 + (delta3*a2)';
            d_b2 = d_b2 + delta3';
            d_w3 = d_w3 + (delta4*a3)';
            d_b3 = d_b3 + delta4';
            d_w4 = d_w4 + (delta5*a4)';
            d_b4 = d_b4 + delta5';        
        end    
        eta = 0.8;     % leraning rate
        % weights and bias updating
        w1 = w1 - eta*((1/r*d_w1)+ lambda*w1);
        b1 = b1 - eta*1/r*d_b1;
        w2 = w2 - eta*((1/r*d_w2)+ lambda*w2);
        b2 = b2 - eta*1/r*d_b2;
        w3 = w3 - eta*((1/r*d_w3)+ lambda*w3);
        b3 = b3 - eta*1/r*d_b3;
        w4 = w4 - eta*((1/r*d_w4)+ lambda*w4);
        b4 = b4 - eta*1/r*d_b4;    
    end

sigmoid 和 d_sigmoid 函数如下所示:

    function y = sigmoid(x);
       L=1;
       k=10;
       x0=0;
       y = L./(1+exp(-k*(x-x0)));
    end

    function y = d_sigmoid(x)
       tmp = sigmoid(x);
       y = tmp.*(1-tmp);
    end

预测代码如下所示:

   %% Prediction: X1 is testing data, and Y1 is a vector of testing label
   [r,c] = size(X1);
   for i = 1:r
       A1 = X1(i,:);
   % Input layer -> Layer 2
       Z2 = A1*w1+b1;
       A2 = sigmoid(Z2);
   % Layer 2 -> Layer 3
       Z3 = A2*w2+b2;
       A3 = sigmoid(Z3);
   % Layer 3 -> Layer 4
       Z4 = A3*w3+b3;
       A4 = sigmoid(Z4);
   % Layer 4 -> Output Layer
       Z5 = A4*w4+b4;
       A5 = sigmoid(Z5);
       pred(i) = find(A5==max(A5))
   end
   error = length(find((pred'-Y1)~=0)) 
4

0 回答 0