0

我正在尝试使用 S4 在 R 中创建隐式居中/缩放矩阵(以期为大型稀疏矩阵执行此操作)。

我可以创建一个隐式缩放矩阵,它可以正确地与向量进行左右乘法:

N = 500
P = 100
X = matrix(runif(N * P), N)

setClass("scaled_matrix", contains="matrix", slots=c(scale="numeric"))
setMethod("%*%", signature(x="scaled_matrix", y="numeric"),
          function(x, y) x@.Data %*% (y / x@scale))
setMethod("%*%", signature(x="numeric", y="scaled_matrix"),
          function(x, y) (x %*% y@.Data) / y@scale)

get_scaled = function(A) {
  rmsd = sqrt(apply(A*A, 2, sum)/(nrow(A)-1))
  new("scaled_matrix", A, scale = rmsd)
}

X_scaled = get_scaled(X)
left_test = runif(N)
max(abs(left_test %*% X_scaled - left_test %*% scale(X, center = F))) # small, yay!
right_test = runif(P)
max(abs(X_scaled %*% right_test - scale(X, center = F) %*% right_test )) # small, yay!

和一个隐式居中的矩阵:

setClass("centered_matrix", 
         contains="matrix", 
         slots=c(center="numeric"))
setMethod("%*%", signature(x="centered_matrix", y="numeric"),
          function(x, y) (x@.Data %*% y - as.numeric(x@center %*% y)))
setMethod("%*%", signature(x="numeric", y="centered_matrix"),
          function(x, y) (x %*% y@.Data - sum(x) * y@center ))
get_centered = function(A) {
  new("centered_matrix", A, center = apply(A, 2, mean))
}

X_centered = get_centered(X)
max(abs(left_test %*% X_centered - left_test %*% scale(X, scale = F))) # small, yay!
max(abs(X_centered %*% right_test - scale(X, scale = F) %*% right_test )) # small, yay!

但是如果我想结合这些呢?我认为以下会起作用

X_centered_scaled = get_scaled(X_centered)

max(abs(left_test %*% X_centered_scaled - left_test %*% scale(X))) # not small, oh no! 
max(abs(X_centered_scaled %*% right_test - scale(X) %*% right_test )) # not small, oh no! 

据我所知,问题在于

class(X_centered_scaled@.Data) # should be centered_matrix but is matrix

即当X_centered_scaled被创建时X_centered被向上转换为 amatrix而不是保持 a centered_matrix。有什么办法可以避免这种情况发生吗?当然,我可以制作一个matrix_centered_scaled类,但我喜欢将这两个链接在一起的优雅,它提供了只使用一个或另一个的选项。

4

1 回答 1

0

好的,我想通了。诀窍是使用显式data插槽并使其具有特殊类ANY



N = 500
P = 100
X = matrix(runif(N * P), N)

setClass("scaled_matrix", 
         slots=c(data = "ANY", scale="numeric"))
setMethod("%*%", signature(x="scaled_matrix", y="numeric"),
          function(x, y) (x@data %*% (y / x@scale)))
setMethod("%*%", signature(x="numeric", y="scaled_matrix"),
          function(x, y) ((x %*% y@data) / y@scale))

get_scaled = function(A, scale = sqrt(apply(A*A, 2, sum)/(nrow(A)-1))) {
  new("scaled_matrix", data = A, scale = scale)
}

X_scaled = get_scaled(X)
left_test = runif(N)
max(abs(left_test %*% X_scaled - left_test %*% scale(X, center = F))) # small, yay!
right_test = runif(P)
max(abs(X_scaled %*% right_test - scale(X, center = F) %*% right_test )) # small, yay!

setClass("centered_matrix",  
         slots=c(data = "ANY", center="numeric"))
setMethod("%*%", signature(x="centered_matrix", y="numeric"),
          function(x, y) ( x@data %*% y - as.numeric(x@center %*% y)))
setMethod("%*%", signature(x="numeric", y="centered_matrix"),
          function(x, y) (x %*% y@data - sum(x) * y@center ))
get_centered = function(A) {
  new("centered_matrix", data = A, center = apply(A, 2, mean))
}

X_centered = get_centered(X)
max(abs(left_test %*% X_centered - left_test %*% scale(X, scale = F))) # small, yay!
max(abs(X_centered %*% right_test - scale(X, scale = F) %*% right_test )) # small, yay!

X_centered_scaled = get_scaled(X_centered, scale = apply(X, 2, sd)) # doesnt' work either

max(abs(left_test %*% X_centered_scaled - left_test %*% scale(X))) # small, yay! 
max(abs(X_centered_scaled %*% right_test - scale(X) %*% right_test )) # small, yay! 
于 2021-10-26T02:58:04.963 回答