这是您的数据:
vertex_names <- c("153","154", "155", "156", "157", "158")
testmat <- sna::rgraph(length(vertex_names))
dimnames(testmat) <- list(vertex_names, vertex_names)
testmat
#> 153 154 155 156 157 158
#> 153 0 0 0 1 1 0
#> 154 1 0 0 1 0 1
#> 155 1 1 0 0 0 1
#> 156 1 0 1 0 1 1
#> 157 1 0 1 1 0 0
#> 158 0 1 1 1 1 0
maxsymmetrizedfile <- sna::symmetrize(testmat, rule = "weak")
dimnames(maxsymmetrizedfile) <- dimnames(testmat)
maxsymmetrizedfile
#> 153 154 155 156 157 158
#> 153 0 1 1 1 1 0
#> 154 1 0 1 1 0 1
#> 155 1 1 0 1 1 1
#> 156 1 1 1 0 1 1
#> 157 1 0 1 1 0 1
#> 158 0 1 1 1 1 0
maxsymm_edge
有一个名为"vnames"
您的示例中缺少的属性。
maxsymm_edge <- sna::as.edgelist.sna(maxsymmetrizedfile)
maxsymm_edge
#> snd rec val
#> [1,] 2 1 1
#> [2,] 3 1 1
#> [3,] 4 1 1
#> [4,] 5 1 1
#> [5,] 1 2 1
#> [6,] 3 2 1
#> [7,] 4 2 1
#> [8,] 6 2 1
#> [9,] 1 3 1
#> [10,] 2 3 1
#> [11,] 4 3 1
#> [12,] 5 3 1
#> [13,] 6 3 1
#> [14,] 1 4 1
#> [15,] 2 4 1
#> [16,] 3 4 1
#> [17,] 5 4 1
#> [18,] 6 4 1
#> [19,] 1 5 1
#> [20,] 3 5 1
#> [21,] 4 5 1
#> [22,] 6 5 1
#> [23,] 2 6 1
#> [24,] 3 6 1
#> [25,] 4 6 1
#> [26,] 5 6 1
#> attr(,"n")
#> [1] 6
#> attr(,"vnames")
#> [1] "153" "154" "155" "156" "157" "158" # *********
我们可以索引到"vnames"
包含在边缘列表中(与 相同vertex_names
)。
(vnames <- attr(maxsymm_edge, "vnames"))
#> [1] "153" "154" "155" "156" "157" "158"
(snd_indices <- maxsymm_edge[, "snd"])
#> [1] 2 3 4 5 1 3 4 6 1 2 4 5 6 1 2 3 5 6 1 3 4 6 2 3 4 5
vnames[snd_indices]
#> [1] "154" "155" "156" "157" "153" "155" "156" "158" "153" "154" "156" "157"
#> [13] "158" "153" "154" "155" "157" "158" "153" "155" "156" "158" "154" "155"
#> [25] "156" "157"
(rec_indices <- maxsymm_edge[, "snd"])
#> [1] 2 3 4 5 1 3 4 6 1 2 4 5 6 1 2 3 5 6 1 3 4 6 2 3 4 5
vnames[rec_indices]
#> [1] "154" "155" "156" "157" "153" "155" "156" "158" "153" "154" "156" "157"
#> [13] "158" "153" "154" "155" "157" "158" "153" "155" "156" "158" "154" "155"
#> [25] "156" "157"
所以我们可以像这样直接构建一个数据框:
el_df <- data.frame(
snd = attr(maxsymm_edge, "vnames")[maxsymm_edge[, "snd"]],
rec = attr(maxsymm_edge, "vnames")[maxsymm_edge[, "rec"]],
val = maxsymm_edge[, "val"],
stringsAsFactors = FALSE # the default if R.Version()$major >= 4
)
el_df
#> snd rec val
#> 1 154 153 1
#> 2 155 153 1
#> 3 156 153 1
#> 4 157 153 1
#> 5 153 154 1
#> 6 155 154 1
#> 7 156 154 1
#> 8 158 154 1
#> 9 153 155 1
#> 10 154 155 1
#> 11 156 155 1
#> 12 157 155 1
#> 13 158 155 1
#> 14 153 156 1
#> 15 154 156 1
#> 16 155 156 1
#> 17 157 156 1
#> 18 158 156 1
#> 19 153 157 1
#> 20 155 157 1
#> 21 156 157 1
#> 22 158 157 1
#> 23 154 158 1
#> 24 155 158 1
#> 25 156 158 1
#> 26 157 158 1
为什么是数据框而不是矩阵?因为顶点名称 和"val"
是不同的类型(character
vs double
),所以尝试这样做(充其量)会强制"val"
转换为一堆字符串。
str(el_df)
#> 'data.frame': 18 obs. of 3 variables:
#> $ snd: chr "154" "155" "156" "157" ...
#> $ rec: chr "153" "153" "153" "153" ...
#> $ val: num 1 1 1 1 1 1 1 1 1 1 ...
但是,这仅在您要使用"val"
. 网络没有加权,因此您可以索引"vnames"
以构建矩阵边缘列表(或用于as.matrix(el_df[, 1:2])
删除该列并从数据框转到矩阵)。
考虑到所有这些,我们可以更进一步,构建一个处理整个操作的函数:
as_edge_list_df <- function(adj_mat, use_vertex_names = TRUE) {
melted <- do.call(cbind, lapply(list(row(adj_mat), col(adj_mat), adj_mat), as.vector)) # 3 col matrix of row index, col index, and `x`'s values
filtered <- melted[melted[, 3] != 0, ] # drop rows where column 3 is 0
if (use_vertex_names && !is.null(dimnames(adj_mat))) { # in case we don't want vertex names
if (!all(rownames(adj_mat) == colnames(adj_mat))) { # in case `adj_mat` is malformed
stop("row names do not match column names.")
}
vertex_names <- rownames(adj_mat)
data.frame(
snd = vertex_names[filtered[, 1L]],
rec = vertex_names[filtered[, 2L]],
val = filtered[, 3L]
)
} else {
data.frame(
snd = filtered[, 1L],
rec = filtered[, 2L],
val = filtered[, 3L]
)
}
}
然后带它去试驾...
el_df2 <- as_edge_list_df(maxsymmetrizedfile)
el_df2
#> snd rec val
#> 1 154 153 1
#> 2 155 153 1
#> 3 156 153 1
#> 4 157 153 1
#> 5 153 154 1
#> 6 155 154 1
#> 7 156 154 1
#> 8 158 154 1
#> 9 153 155 1
#> 10 154 155 1
#> 11 156 155 1
#> 12 157 155 1
#> 13 158 155 1
#> 14 153 156 1
#> 15 154 156 1
#> 16 155 156 1
#> 17 157 156 1
#> 18 158 156 1
#> 19 153 157 1
#> 20 155 157 1
#> 21 156 157 1
#> 22 158 157 1
#> 23 154 158 1
#> 24 155 158 1
#> 25 156 158 1
#> 26 157 158 1
...并验证它与我们构建时所做的完全相同el_df
。
stopifnot(identical(el_df, el_df2))