我曾尝试在 python 中实现梯度下降,但无论 lambda 和 alpha 值如何,成本 J 似乎都在增加,我无法弄清楚这里的问题是什么。如果有人可以帮助我解决这个问题,那就太好了。输入是具有相同维度的矩阵 Y 和 R。Y 是电影 x 用户的矩阵,R 只是表示用户是否对电影评分。
#Recommender system ML
import numpy
import scipy.io
def gradientDescent(y,r):
(nm,nu) = numpy.shape(y)
x = numpy.mat(numpy.random.randn(nm,10))
theta = numpy.mat(numpy.random.randn(nu,10))
for i in range(1,10):
(x,theta) = costFunc(x,theta,y,r)
def costFunc(x,theta,y,r):
X_tmp = numpy.power(x , 2)
Theta_tmp = numpy.power(theta , 2)
lmbda = 0.1
reg = ((lmbda/2) * numpy.sum(Theta_tmp))+ ((lmbda/2)*numpy.sum(X_tmp))
ans = numpy.multiply(numpy.power(((theta * x.T).T - y),2) , r)
res = (0.5 * numpy.sum(ans))+reg
print "J:",res
print "reg:",reg
(nm,nu) = numpy.shape(y)
X_grad = numpy.mat(numpy.zeros((nm,10)));
Theta_grad = numpy.mat(numpy.zeros((nu,10)));
alpha = 0.1
# [m f] = size(X);
(m,f) = numpy.shape(x);
for i in range(0,m):
for k in range(0,f):
tmp = 0
# X_grad(i,k) += (((theta * x'(:,i)) - y(i,:)').*r(i,:)')' * theta(:,k);
tmp += ((numpy.multiply(((theta * x.T[:,i]) - y[i,:].T),r[i,:].T)).T) * theta[:,k];
tmp += (lmbda*x[i,k]);
X_grad[i,k] -= (alpha*tmp)
# X_grad(i,k) += (lambda*X(i,k));
# [m f] = size(Theta);
(m,f) = numpy.shape(theta);
for i in range(0,m):
for k in range(0,f):
tmp = 0
# Theta_grad(i,k) += (((theta(i,:) * x') - y(:,i)').*r(:,i)') * x(:,k);
tmp += (numpy.multiply(((theta[i,:] * x.T) - y[:,i].T),r[:,i].T)) * x[:,k];
tmp += (lmbda*theta[i,k]);
Theta_grad[i,k] -= (alpha*tmp)
# Theta_grad(i,k) += (lambda*Theta(i,k));
return(X_grad,Theta_grad)
def main():
mat1 = scipy.io.loadmat("C:\Users\ROHIT\Machine Learning\Coursera\mlclass-ex8\ex8_movies.mat")
Y = mat1['Y']
R = mat1['R']
r = numpy.mat(R)
y = numpy.mat(Y)
gradientDescent(y,r)
#if __init__ == '__main__':
main()