我知道这看起来与之前就同一主题提出的许多问题相似。我对他们中的大多数人进行了调查,但他们并没有完全回答我的问题。我的问题是我的梯度没有收敛到最优值,它甚至在非常低的 alpha 值下发散和振荡。
我的数据生成功能如下
X = [[float(np.random.randn(1)) for i in range(0,100)] for j in range(0,5)]
X = np.array(X).transpose()
Y = [float(0) for i in range(0,100)]
Y = 2*X[:,0] + 3*X[:,1] + 1*X[:,2] + 4*X[:,3] + 1*X[:,4] + 5
fig, ax = plt.subplots(1,5)
fig.set_size_inches(20,5)
k = 0
for j in range(0,5):
sns.scatterplot(X[:,k],Y,ax=ax[j])
k += 1
我的 SGD 实现如下
def multilinreg(X,Y,epsilon = 0.000001,alpha = 0.01,K = 20):
Xnot = [[1] for i in range(0,len(X))]
Xnot = np.array(Xnot)
X = np.append(Xnot,X, axis = 1)
vars = X.shape[1]
W = []
W = [np.random.normal(1) for i in range(vars)]
W = np.array(W)
J = 0
for i in range(len(X)):
Yunit = 0
for j in range(vars):
Yunit = Yunit + X[i,j] * W[j]
J = J + (0.5/(len(X)))*((Y[i]-Yunit)**2)
err = 1
iter = 0
Weights = []
Weights.append(W)
Costs = []
while err > epsilon:
index = [np.random.randint(len(Y)) for i in range(K)]
Xsample, Ysample = X[index,:], Y[index]
m =len(Xsample)
Ypredsample = []
for i in range(len(Xsample)):
Yunit = 0
for j in range(vars):
Yunit = Yunit + X[i,j] * W[j]
Ypredsample.append(Yunit)
Ypredsample = np.array(Ypredsample)
for i in range(len(Xsample)):
for j in range(vars):
gradJunit = (-1)*(Xsample[i,j]*(Ysample[i] - Ypredsample[i]))
W[j] = W[j] - alpha*gradJunit
Jnew = 0
for i in range(len(Xsample)):
Yunit = 0
for j in range(vars):
Yunit = Yunit + Xsample[i,j]*W[j]
Jnew = Jnew + (0.5/(len(Xsample)))*((Ysample[i]-Yunit)**2)
Weights.append(W)
err = abs(float(Jnew - J))
J = Jnew
Costs.append(J)
iter += 1
if iter % 1000 == 0:
print(iter)
print(J)
Costs = np.array(Costs)
Ypred = []
for i in range(len(X)):
Yunit = 0
for j in range(vars):
Yunit = Yunit + X[i,j] * W[j]
Ypred.append(Yunit)
Ypred = np.array(Ypred)
return Ypred, iter, Costs, W
超参数如下
epsilon = 1*(10)**(-20)
alpha = 0.0000001
K = 50
我不认为这是一个数据问题。我使用的是一个相当简单的线性函数。
我认为这是方程式,但我也仔细检查了它们,它们对我来说似乎很好。