我在 python 中创建了一个 def 函数,我可以在没有激活函数的非常基本的函数逼近神经网络中做简单的梯度偏导数来更新变量。这是针对尝试估计输出 y 的单个输入 x1。
我也有一个标准化的 def 函数,当我运行我的代码但首先标准化输入数据集时,我没有遇到任何问题。当我通过相同的梯度偏导函数运行原始数据集时,当我对变量 m 和 b 进行更新时,精度立即变得很大,并且出现溢出问题。
有谁知道我该如何解决这个问题?由于变量 m 和 b 的更新,我已经能够弄清楚它的发生情况,这些变量会循环回到 y 中。但我不确定如何解决这个问题?通过一些快速的谷歌搜索,我没有看到任何解决方案,然后人们说溢出是一个问题,即您的数字的精度超过了您使用的数据类型的限制。如何阻止溢出发生?
import os
import numpy as np
import random
import csv
import urllib.request
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
########################## Setting the working directory #####################
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)
########## Downloading the data set to the current working directory #########
url = 'https://raw.githubusercontent.com/tofighi/MachineLearning/master/datasets/student_marks.csv'
urllib.request.urlretrieve(url,filename = 'data.csv')
data = np.genfromtxt(fname = 'data.csv', dtype = float, delimiter = ',', skip_header=0)
data = np.delete(data,(0),axis=0)
def standardization(data):
x = np.zeros((len(data),2))
mean_data1 = np.mean(data[:,0])
std_data1 = np.std(data[:,0])
mean_data2 = np.mean(data[:,1])
std_data2 = np.std(data[:,1])
for i in range(0,len(x)):
x[i,0]= ((data[i,0] - mean_data1)/std_data1);
x[i,1]= ((data[i,1] - mean_data2)/std_data2);
return x
def gradient_Partial_Derivatives(nEpoch,N,b,m,x):
#m_tracker = np.zeros(nEpoch*N)
#b_tracker = np.zeros(nEpoch*N)
error = np.zeros(nEpoch*N)
counter = 0
error_sum = 0
sum_counter = 1
#Training m and b
for epoch in range(0,nEpoch):
a=range(0,len(x))
sp=random.sample(a,len(x))
for j in range(0,N):
#Calculate new final grade based on midterm. Training estimate for y.
y = b + m*x[sp[j],0];
#Find the error between estimate final y and target final x[j,1]
#This is not the error function but just e = y_actual - y_estimate
e = x[sp[j],1] - y;
#Update m and b using partial derivatives
m = m + alpha*(2/(sum_counter))*e*x[sp[j],0]
b = b + alpha*(2/(sum_counter))*e
er = (((x[sp[j],1])-(y))**2)
error_sum = error_sum + er
error[counter] = error_sum/(sum_counter)
#m_tracker[counter] = m;
#b_tracker[counter] = b;
counter = counter + 1;
sum_counter = sum_counter + 1
return m, b, error
########################### Initializing Variables ###########################
m = -0.5;
b = 0;
alpha = 0.1;
##############################################################################
##############################################################################
############################## Standardization ###############################
#Standardizing the input
x_standard = standardization(data)
#Calcualting partial derivative and updating m and b
m_final, b_final, er = gradient_Partial_Derivatives(1,100, b, m, x_standard)
#Calcualting partial derivative and updating m and b for 2000 iterations
m_final1, b_final1, er1 = gradient_Partial_Derivatives(20,100, b, m, x_standard)
##############################################################################
##############################################################################
############################ No Standardization ##############################
#Calcualting partial derivative and updating m and b
m_final2, b_final2, er2 = gradient_Partial_Derivatives(1,100, b, m, data)
#Calcualting partial derivative and updating m and b for 2000 iterations
m_final3, b_final3, er3 = gradient_Partial_Derivatives(20,100, b, m, data)