r - 在 ggplot2 中绘制一个重新调整的成对距离矩阵

Question

加载库并创建可重现的示例

#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)

#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <-  LETTERS[1:locs]
tmp

#Converting into a data frame
tmp1 <- as.data.frame(cbind(rownames(tmp),as.data.frame(tmp)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
head(tmp1)

#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
tmp1

#Making a tiled plot based on default levels
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")

但出于更具生物学意义的原因，我想更改这些比较的排序顺序

#biological order
my.order <- c("A","C","D","B","E")
my.order

#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)
tmp1

#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")

我试图找到一种方法来让“BC”和“BD”比较在下对角线中表示。

我试图找到一个具有完整矩阵和 lower.tri() 的解决方案，但到目前为止都失败了

#here is the full matrix
x <- tmp
x[is.na(x)] <- 0
y <- t(tmp)
y[is.na(y)] <- 0
full.matrix <- x+y
full.matrix

#the function lower.tri might be useful in this context
lower.tri(full.matrix)

score 1 · Accepted Answer

如果您运行，则从创建tmp之后开始：full.matrix

reordered_mat <- full.matrix[match(my.order, rownames(full.matrix)), 
                             match(my.order, colnames(full.matrix))]

lt_reordered_mat <- replace(reordered_mat, !lower.tri(reordered_mat), NA)

tmp1 <- as.data.frame(cbind(rownames(lt_reordered_mat),as.data.frame(lt_reordered_mat)))

然后重新运行所有tmp1创建代码并重新排序，然后你应该得到你想要的结果：

完整的可重现代码：

#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)

#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <-  LETTERS[1:locs]

x <- tmp
x[is.na(x)] <- 0
y <- t(tmp)
y[is.na(y)] <- 0
full.matrix <- x+y


my.order <- c("A","C","D","B","E")

reordered_mat <- full.matrix[match(my.order, rownames(full.matrix)), 
                             match(my.order, colnames(full.matrix))]
lt_reordered_mat <- replace(reordered_mat, !lower.tri(reordered_mat), NA)
tmp1 <- as.data.frame(cbind(rownames(lt_reordered_mat),as.data.frame(lt_reordered_mat)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL

#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]


#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)

#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")

score 0 · Accepted Answer

当 Mike H. 提供他的答案时，我创建了一个稍微不同的解决方案。我认为他的答案更好，因为它更简洁并且不使用 for 循环。

#Load libraries
set.seed(123)
library(tidyr)
library(ggplot2)

#Creating a fake pairwise matrix
locs <- 5
tmp <- matrix(runif(n = locs*locs),nrow = locs,ncol = locs)
tmp[upper.tri(tmp,diag = T)] <- NA
colnames(tmp) <- LETTERS[1:locs]
rownames(tmp) <-  LETTERS[1:locs]
tmp

#Converting into a data frame
tmp1 <- as.data.frame(cbind(rownames(tmp),as.data.frame(tmp)))
names(tmp1)[1] <- "locA"
rownames(tmp1) <- NULL
head(tmp1)

#Changing it to long form and getting rid of NAs
tmp1 <- gather(tmp1, key = "locB",value = "value",-locA)
tmp1 <- tmp1[!is.na(tmp1$value),]
tmp1

#Making a tiled plot based on default levels
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")

#biological order
my.order <- c("A","C","D","B","E")
my.order

#re-leveling
tmp1$locA <- factor(tmp1$locA, levels = my.order,ordered = T)
tmp1$locB <- factor(tmp1$locB, levels = my.order,ordered = T)
tmp1

#the releveled plot
ggplot(tmp1, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")

#reordering tmp by my.order and replacing NAs with zero
x <- tmp
x<- x[my.order,my.order]
x[is.na(x)] <- 0
x

#identifying which values switch from the lower matrix to the upper matrix
y <- x
y[y !=0] <- 1

#figuring out which side of the matrix that needs to be switched to switch locA and locB
if(sum(y[lower.tri(y)]) > sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) == sum(y[upper.tri(y)])){ y[lower.tri(y)] <- 0 }
if(sum(y[lower.tri(y)]) < sum(y[upper.tri(y)])){ y[upper.tri(y)] <- 0 }

#Converting t into a long form data frame
fm <- as.data.frame(cbind(rownames(y),as.data.frame(y)))
names(fm)[1] <- "locA"
rownames(fm) <- NULL
fm <- gather(fm, key = "locB",value = "value",-locA)

#identifying which need to be switched and created an identifer to merge with
fm$action <- ifelse(fm$value == 1,"switch","keep")
fm$both <- paste0(fm$locA,fm$locB)
fm

#creating the same identifer in tmp1
tmp1$both <- paste0(tmp1$locA,tmp1$locB)
head(tmp1)

#merging the fm and tmp1 together
tmp2 <- merge(x = fm[,4:5],y = tmp1,by = "both")
tmp2

#using a for loop to make the necessary switches
i <- NULL
for(i in 1:nrow(tmp2)){
  if(tmp2$action[i] == "switch"){
    A <- as.character(tmp2$locA[i])
    B <- as.character(tmp2$locB[i])
    tmp2$locA[i] <- B
    tmp2$locB[i] <- A
  }
}
tmp2

#re-leveling to my order
tmp2$locA <- factor(tmp2$locA, levels = my.order,ordered = T)
tmp2$locB <- factor(tmp2$locB, levels = my.order,ordered = T)
tmp2

#now the graphic
ggplot(tmp2, aes(x = locA, y=locB, fill=value, label=round(value,3)))+
  geom_tile(color="black")+
  geom_text(size=5,color="white")

r - 在 ggplot2 中绘制一个重新调整的成对距离矩阵

2 回答 2

Related

Reference