我正在尝试实现以下论文的算法 6(最终是算法 7) - https://arxiv.org/pdf/1610.03045.pdf
其中大部分看起来都很好,但我不明白为什么我没有在 $z$ 变量中实现收敛。我的代码似乎符合 Alg 6 的伪代码中所写的内容,所以我真的不确定这里发生了什么,但我认为这是变量 [z_{new}] 或 $z_{hat}$ 的问题因为这些应该实现越来越好的近似,但误差实际上正在增加。
def iterative_primal_dual_sketch_new(data, targets,
left_sketch, right_sketch,
offset, tolerance,
max_iterations=10):
'''Perform the vanilla iterative_primal_dual_sketching.'''
X = data
y = targets
y = y[:,None] # change to (row, column) index not flat array
_lambda = offset
n, d = X.shape
# initialise sketches nb. I have switched notation from the R in paper
feature_sketch = right_sketch
p = feature_sketch.shape[1]
# same as in paper for Pi but needs to be transposed for right multiplication
sample_sketch = left_sketch
m = left_sketch.shape[1]
# sketch products and parameters for iteration
X_R = np.dot(X,feature_sketch) # just renaming to avoid unnecessary compute
double_sketch = (0.5*n)*np.dot(sample_sketch, X_R)
w_hat = np.zeros(shape=(d,1))
has_z_converged = False
for t in range(max_iterations):
k = 0
z = np.zeros(shape=(p,1))
while not has_z_converged:
z0 = z
print("iteration on (t,k) ({},{})".format(t,k))
print("start of loop: norm(z) = {}".format(norm(z0,2)))
temp1 = (1/n)*(np.dot(X_R.T, y) -\
np.dot(X_R.T, np.dot(X,w_hat)) -\
np.dot(X_R.T, np.dot(X_R,z0)))
temp2 = _lambda*np.dot(feature_sketch.T, w_hat) + _lambda*z0
temp3 = temp1 - temp2
print("temp1 shape {}".format(temp1.shape))
print("temp2 shape {}".format(temp2.shape))
print("temp3 shape {}".format(temp3.shape))
assert temp3.shape[0] == p
# solve the linear system for z
z_hat = np.linalg.solve(2*np.dot(double_sketch.T,double_sketch) \
+_lambda*np.eye(p),temp3)
print("z_hat shape {}".format(z_hat.shape))
print("Norm(z_hat) = {}".format(norm(z_hat)))
z_new = z0 + z_hat
print("z_new shape {}".format(z_new.shape))
print("norm(z_new) = {}".format(norm(z_new)))
# convergence criterion: z_n+1 - z_n = z_hat
if norm(z_hat,2) < tolerance:
z = z_new
print("Residual: {}".format(norm(z_hat,2)))
print("Exiting loop.")
has_z_converged = True
else:
z = z_new # flip the labelling so can just plug z in above code
k += 1
print("Looping again.")
has_z_converged = False # reset so the loop is entered next time over
# Update dual
alpha_dual = y - np.dot(X, w_hat) - np.dot(X_R, z)
#print('Alpha shape {}'.format(alpha_dual.shape))
# Update primal
w_hat = (1/(n*_lambda))*np.dot(X.T, alpha_dual)
print(w_hat.shape)
return w_hat, alpha_dual
左右草图被定义为高斯矩阵,如: 1. 左是大小n/2 x n
除以的高斯矩阵n
。2.右是大小d x d/2
除以d的高斯矩阵
非常感谢任何帮助!