我正在尝试使用潜在因素模型构建玩具推荐系统。所以我只是从( http://www.quuxlabs.com/blog/2010/09/matrix-factorization-a-simple-tutorial-and-implementation-in-python/ )复制代码并运行它。一切都很好。然后我用字典重写了代码,出了点问题,我想不通。
原始代码在这里:
import numpy
def matrix_factorization(R, P, Q, K, steps=10000, alpha=0.0002, beta=0.02):
Q = Q.T
for step in xrange(steps):
for i in xrange(len(R)):
for j in xrange(len(R[i])):
if R[i][j] > 0:
eij = R[i][j] - numpy.dot(P[i,:],Q[:,j])
for k in xrange(K):
P_temp = P[i][k]
Q_temp = Q[k][j]
P[i][k] = P_temp + alpha * (2 * eij * Q_temp - beta * P_temp)
Q[k][j] = Q_temp + alpha * (2 * eij * P_temp - beta * Q_temp)
eR = numpy.dot(P,Q)
e = 0
for i in xrange(len(R)):
for j in xrange(len(R[i])):
if R[i][j] > 0:
e = e + pow(R[i][j] - numpy.dot(P[i,:],Q[:,j]), 2)
for k in xrange(K):
e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
if e < 0.001:
break
print 'step',step,'error:',e
step += 1
print e
return P, Q.T
R = [
[5,3,0,1],
[4,0,0,1],
[1,1,0,5],
[1,0,0,4],
[0,1,5,4],
]
R = numpy.array(R)
N = len(R)
M = len(R[0])
K = 2
P = numpy.random.rand(N,K)
Q = numpy.random.rand(M,K)
nP, nQ = matrix_factorization(R, P, Q, K)
nR = numpy.dot(nP, nQ.T)
您可以看到每一步的误差都较小。
step 4976 error: 1.26505397722
step 4977 error: 1.26497866139
step 4978 error: 1.26490338911
step 4979 error: 1.26482816036
step 4980 error: 1.2647529751
step 4981 error: 1.26467783333
step 4982 error: 1.264602735
step 4983 error: 1.26452768009
step 4984 error: 1.26445266858
step 4985 error: 1.26437770044
step 4986 error: 1.26430277565
我的代码:
import random
def matrix_factorization(R, P, Q, K,steps=5000, alpha=0.0002, beta=0.02):
for step in xrange(steps):
for i in R.keys():
for j in R[i].keys():
eij = R[i][j] - sum([x * y for x in P[i] for y in Q[j]])
for k in xrange(K):
P_temp = P[i][k]
Q_temp = Q[j][k]
P[i][k] = P_temp + alpha * (2 * eij * Q_temp - beta * P_temp)
Q[j][k] = Q_temp + alpha * (2 * eij * P_temp - beta * Q_temp)
#print 'P,Q',P[i][k],Q[k][j]
e = 0
for i in R.keys():
for j in R[i].keys():
e += pow(R[i][j] - sum([x * y for x in P[i] for y in Q[j]]), 2)
for k in xrange(K):
e += (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
if e < 0.001:
break
print 'step',step,'error:',e
step += 1
print e
return P,Q
R = {0:{0:5,1:3,3:1},
1:{0:4,3:1},
2:{0:1,1:1,3:5},
3:{0:1,3:4},
4:{1:1,2:5,3:4}
}
N = 5
M = 4
K = 4
P = dict()
Q = dict()
for i in xrange(N):
P[i] = [random.random() for x in xrange(K)]
for j in xrange(M):
Q[j] = [random.random() for x in xrange(K)]
P,Q = matrix_factorization(R,P,Q,K)
与上一节几乎相同。唯一的区别是我用字典写的。但它显示:
step 4944 error: 12786002.1942
step 4945 error: 12838370.3896
step 4946 error: 12890953.0588
step 4947 error: 12943751.0801
step 4948 error: 12996765.3355
step 4949 error: 13049996.7107
step 4950 error: 13103446.0947
step 4951 error: 13157114.3803
step 4952 error: 13211002.4639
step 4953 error: 13265111.2458
step 4954 error: 13319441.6297
step 4955 error: 13373994.5232
step 4956 error: 13428770.8375
step 4957 error: 13483771.4875
step 4958 error: 13538997.392
step 4959 error: 13594449.4735
step 4960 error: 13650128.6582
step 4961 error: 13706035.8761
step 4962 error: 13762172.0611
step 4963 error: 13818538.1509
step 4964 error: 13875135.0871
step 4965 error: 13931963.8149
step 4966 error: 13989025.2837
step 4967 error: 14046320.4465
step 4968 error: 14103850.2604
step 4969 error: 14161615.6864
step 4970 error: 14219617.6893
step 4971 error: 14277857.2379
step 4972 error: 14336335.3052
step 4973 error: 14395052.8678
step 4974 error: 14454010.9066
错误在每一步都变得越来越大。我对此感到很困惑。
非常感谢您的宝贵时间!