r - 使用 tidygraph 将来自相同两个节点的两条边合并为一条

Question

我正在努力弄清楚如何将相同的 2 个节点之间的 2 条边折叠成 1 条，然后计算这些边的总和。

我相信有一种方法可以做到igraph：

simplify(gcon, edge.attr.comb = list(weight = "sum", function(x)length(x)))

但如果可能的话，我想这样做，tidygraph因为到目前为止我已经成功地实施了，tidygraph而且我对tidyverse工作方式更加熟悉。

我的数据如下所示：

  from to Strength Dataframe Question                Topic
1    0 32        4    weekly        1 Connection Frequency
2    0 19        5    weekly        1 Connection Frequency
3    0  8        3    weekly        1 Connection Frequency
4    0  6        5    weekly        1 Connection Frequency
5    0  2        4    weekly        1 Connection Frequency
6    0 14        5    weekly        1 Connection Frequency

'from' 和 'to' 包含相同的 id（例如 from-to；0-1 和 1-0）。我想浓缩，以便只存在 0-1 关系的一次迭代，并Strength计算总和。

到目前为止，这是我的代码：

graph <- data %>%
  filter(Dataframe == "weekly" & Question == 1) %>%
  as_tbl_graph(directed = FALSE) %>%
  activate(edges) %>% # first manipulate edges
  filter(!edge_is_loop()) %>% # remove any loops
  activate(nodes) %>% # now manipulate nodes
  left_join(node.group, by = "name") %>% 
  mutate(
    Popularity = centrality_degree(mode = 'in'),
    Centre = node_is_center(),
    Keyplayer = node_is_keyplayer(k = 5))

是否可以将两条对应的边合并为一条边？我搜索了论坛，但只遇到了相同节点在相同列中重复的引用（即跨多行的 0-1）。

score 3 · Accepted Answer

library(tidygraph) # v1.2.0
library(dplyr) # v0.8.5
library(purrr) # v0.3.4

dat <- data.frame(
  from = c("a", "a", "b", "c"),
  to = c("b", "b", "a", "b"),
  n = 1:4
)

调用to_simple()insideconvert()折叠平行边。相应的边和权重.orig_data作为小标题列表存储。然后，从中提取折叠边的权重总和.orig_data。

dat %>% 
  as_tbl_graph() %>% 
  convert(to_simple) %>% 
  activate(edges) %>% 
  mutate(n_sum = map_dbl(.orig_data, ~ sum(.x$n)))

# A tbl_graph: 3 nodes and 3 edges
#
# A directed simple graph with 1 component
#
# Edge Data: 3 x 5 (active)
   from    to .tidygraph_edge_index .orig_data       n_sum
  <int> <int> <list>                <list>           <dbl>
1     1     2 <int [2]>             <tibble [2 x 3]>     3
2     2     1 <int [1]>             <tibble [1 x 3]>     3
3     3     2 <int [1]>             <tibble [1 x 3]>     4
#
# Node Data: 3 x 2
  name  .tidygraph_node_index
  <chr>                 <int>
1 a                         1
2 b                         2
3 c                         3

score 1 · Accepted Answer

您可以通过跳转到加权邻接矩阵并返回到 igraph 图来折叠图 g 中的多个边，如下所示：

gg <- graph.adjacency(get.adjacency(g), mode="undirected", weighted=TRUE)

现在gg将包含与$weight中每个顶点对之间出现的边数相对应的边属性g。

我对 tidygraph 不太熟悉，但我制作了这个教学代码来简化你的路径。

# A graph from sample data
sample_el <- cbind(c(1,1,1,2,2,2,3,3,3,4,4,5,5,6,6,6,7,7,7,7,8,8),
                   c(2,2,3,6,6,4,4,6,8,5,5,6,8,7,7,2,6,8,3,6,4,4))
g <- graph_from_edgelist(sample_el, directed=F)

# Always plot graphs with this same layout
mylaoyt <- layout_(g, as_star())
plot(g, layout = mylaoyt)

# Merge all duplicate edges using a weighted adjacency matric that
# is converted back to a graph
gg <- graph.adjacency(get.adjacency(g), mode="undirected", weighted=TRUE)

# function to return a weighted edgelist from a graph
get.weighted.edgelist <- function(graph){cbind(get.edgelist(gg), E(gg)$weight)}

# compare your two edge-lists. el has duplicates, wel is weighted
el <- get.edgelist(g)
wel<- get.weighted.edgelist(gg)
el
wel

# Plot the two graphs to see what el and wel would look like:
par(mfrow=c(1,2))
plot(g, layout=mylaoyt, vertex.label=NA, vertex.size=10)
plot(gg, layout=mylaoyt, vertex.label=NA, vertex.size=10, edge.width=E(gg)$weight * 3)

el和中的输出wel可视化如下：

希望你能找到你需要的东西。

score 1 · Accepted Answer

我也在努力解决这个问题。到目前为止，我的解决方案是折叠每个节点对，然后将权重相加。像这样的东西：

require(dplyr)
require(tidyr)

pasteCols = function(x, y, sep = ":"){
  stopifnot(length(x) == length(y))
  return(lapply(1:length(x), function(i){paste0(sort(c(x[i], y[i])), collapse = ":")}) %>% unlist())
}
data = data %>% 
  mutate(col_pairs = pasteCols(from, to, sep = ":")) %>% 
  group_by(col_pairs) %>% summarise(sum_weight = sum(weight)) %>% 
  tidyr::separate(col = col_pairs, c("from", "to"), sep = ":")

score 0 · Accepted Answer

tidygraphmorph通过调用可以在ed 状态下简化图形simplify_to，但在unmorphing 时会恢复为原始状态。

这是一个整洁的解决方法：

data <- read.table(header=TRUE, text="
from to weight
0 14 5 
0  1 1 
1  0 1
")

original <- as_tbl_graph(data)

输入：

> original
# A tbl_graph: 3 nodes and 3 edges
#
# A directed simple graph with 1 component
#
# Node Data: 3 x 1 (active)
  name 
  <chr>
1 0    
2 1    
3 14   
#
# Edge Data: 3 x 3
   from    to weight
  <int> <int>    <int>
1     1     3        5
2     1     2        1
3     2     1        1

解决方案：

modified <- original %>% activate(edges) %>% 
    # create a temporary grouping & filtering variable by sorting from/to IDs
    mutate(temp = ifelse(from > to, paste0(to, from), paste0(from, to))) %>% 
    group_by(temp) %>% 
    mutate(weight = sum(weight)) %>% 
    ungroup() %>% 
    dplyr::distinct(temp, .keep_all = TRUE) %>% 
    select(-temp)

输出：

> modified   
# A tbl_graph: 3 nodes and 2 edges
#
# A rooted tree
#
# Edge Data: 2 x 3 (active)
   from    to weight
  <int> <int>    <int>
1     1     3        5
2     1     2        2
#
# Node Data: 3 x 1
  name 
  <chr>
1 0    
2 1    
3 14

score 0 · Accepted Answer

这是一种方法。它使用tidygraph,igraph在引擎盖下使用。

library(tidygraph)
#> 
#> Attaching package: 'tidygraph'
#> The following object is masked from 'package:stats':
#> 
#>     filter
library(igraph)
#> 
#> Attaching package: 'igraph'
#> The following object is masked from 'package:tidygraph':
#> 
#>     groups
#> The following objects are masked from 'package:stats':
#> 
#>     decompose, spectrum
#> The following object is masked from 'package:base':
#> 
#>     union
library(ggraph)
#> Loading required package: ggplot2
library(tidyverse)


g <- tibble(from = sample(letters[1:5], 25, T), 
       to = sample(letters[1:5], 25, T)) %>% 
  as_tbl_graph()



ggraph(g)+
  geom_edge_parallel(arrow = arrow(type = 'closed'), 
                     start_cap = circle(7.5, 'mm'), 
                     end_cap = circle(7.5, 'mm'))+
  geom_node_label(aes(label = name))+
  labs(title = 'Multiple edges shown between node pairs')
#> Using `stress` as default layout

# Add the weigths as counts in the original dataframe

g_weights <- g %>% 
  activate(edges) %>% 
  as_tibble() %>% 
  mutate(link = glue::glue('{from}_{to}')) %>% 
  add_count(link) %>% 
  distinct(link, n, .keep_all = T) %>% 
  select(from, to, n) %>% 
  as_tbl_graph()

ggraph(g_weights)+
  geom_edge_parallel(arrow = arrow(type = 'closed'), 
           start_cap = circle(7.5, 'mm'), 
           end_cap = circle(7.5, 'mm'), 
           aes(width = n))+
  geom_node_label(aes(label = name))+
  labs(title = 'Single edges shown between node pairs', 
       subtitle = 'Weights used as edge width')+
  scale_edge_width(range = c(.5, 2), name = 'Weight')
#> Using `stress` as default layout

^{由reprex 包（v0.3.0）于 2019 年 9 月 3 日创建}

r - 使用 tidygraph 将来自相同两个节点的两条边合并为一条

5 回答 5

Related

Reference