我有单细胞基因表达数据,这意味着我有data.frame
数千个细胞,其中每个细胞都映射到一个cluster
(即,cluster
s 代表细胞类型)并具有实验设计背景(例如,细胞被应用了某种treatment
)。在这个特定示例中,我有两个级别的cluster
s (top
和converged
),因此在data.frame
映射到cluster
s 的单元格中,每个单元格出现两次,每个集群级别出现一次。此外,还有一个 sparse gene x cell
matrix
,它具有每个细胞中每个基因的表达单元。
为简单起见,在我的示例中,每个都由cluster
来自所有级别的单元格组成treatment
:
以下是数据:
library(dplyr)
library(Matrix)
#create the experimental design
factors <- "treatment"
contrasts <- c("treatment1 vs. ctrl","treatment2 vs. ctrl","treatment2 vs. treatment1")
top.clusters = paste0("cluster",1:9)
converged.clusters = paste0("cluster",1:27)
design.df <- rbind(expand.grid(factors,contrasts,top.clusters) %>% dplyr::mutate(cluster.level = "top"),
expand.grid(factors,contrasts,converged.clusters) %>% dplyr::mutate(cluster.level = "converged"))
colnames(design.df)[1:3] <- c("factor","contrast","cluster")
design.df$factor <- as.character(design.df$factor)
design.df$contrast <- as.character(design.df$contrast)
design.df$cluster <- as.character(design.df$cluster)
design.df$cluster.level <- as.character(design.df$cluster.level)
#create the gene x cell expression sparse matrix
gbm <- as(matrix(0,nrow = 15000,ncol = 9000,dimnames = list(paste0("gene",1:15000),paste0("cell",1:9000))),"dgCMatrix")
set.seed(1)
gbm[sample(15000*9000,0.1*15000*9000,replace = F)] <- rnorm(0.1*15000*9000,1.5, 0.8)
#create the data.frame mapping cells to clusters
observations.df <- rbind(data.frame(cell = paste0("cell",1:9000), cluster = sample(paste0("cluster",1:9),9000,replace=T),
treatment = sample(c("ctrl","treatment1","treatment2"), 9000, replace=T), stringsAsFactors = F) %>% dplyr::mutate(cluster.level = "top"),
data.frame(cell = paste0("cell",1:9000), cluster = sample(paste0("cluster",1:27),9000,replace=T),
treatment = sample(c("ctrl","treatment1","treatment2"), 9000, replace=T), stringsAsFactors = F) %>% dplyr::mutate(cluster.level = "converged"))
observations.df$treatment <- factor(observations.df$treatment, levels = c("ctrl","treatment1","treatment2"))
我构建了一个shiny
应用程序,该应用程序具有两个绘图函数,对应于test
数据的两个差分表达式,并且我还有一组renderUI
用于将单元格子集为cluster
(对于选定的cluster.level
)factor
、 和contrast
。
差异表达测试(应用于选定的基因,但对本文而言并不重要)是:
Gamma
glm
A:将 a与 a拟合log
link
function
到所选基因的非零值细胞中的表达水平
logistic
glm
B:将a与alogit
link
function
与所选基因在细胞中表达(即表达或不表达)的数量进行拟合。
我的问题更多与renderUI
我允许用户对单元格进行子集化的 's 集有关。因为它们存在依赖关系,所以我将它们中的!is.null()
条件用于input
它们所依赖的 s,但我希望获得有关更好方法的帮助来实现这一目标,希望这会更快。此外,我还在!is.null()
绘图功能中使用了这些条件,以及actionButton
防止由于进行不同选择时发生的不需要的处理而发生滞后,但我也希望在改进该代码方面获得帮助也。
这是shiny
我对上述数据的代码:
library(shiny)
library(ggplot2)
library(ggpmisc)
server <- function(input, output)
{
output$selected.cluster.level <- renderUI({
radioButtons("selected.cluster.level", "Select Cluster Level", choices = c("top","converged"), inline=TRUE)
})
output$selected.clusters <- renderUI({
all.clusters <- unique(dplyr::filter(design.df,cluster.level %in% input$selected.cluster.level)$cluster)
selectInput("selected.clusters", "Select Clusters", choices = all.clusters,multiple = T,selected = all.clusters)
})
output$selected.factor <- renderUI({
if(!is.null(input$selected.clusters)){
all.factors <- unique(dplyr::filter(design.df,cluster %in% input$selected.clusters)$factor)
selectInput("selected.factor", "Select Factor", choices = all.factors,multiple = F,selected = all.factors[1])
}
})
output$selected.contrast <- renderUI({
if(!is.null(input$selected.clusters) & !is.null(input$selected.factor)){
all.contrasts <- unique(dplyr::filter(design.df,factor == input$selected.factor & cluster %in% input$selected.clusters)$contrast)
selectInput("selected.contrast", "Select Contrast", choices = all.contrasts,multiple = F,selected = all.contrasts[1])
}
})
output$selected.gene <- renderUI({
selectInput("selected.gene", "Select Gene", choices = rownames(gbm),multiple = F,selected = rownames(gbm)[1])
})
test.A.plot <- reactive({
test.A.plot <- NULL
if(input$goButton > 0){
if(!is.null(input$selected.gene) & !is.null(input$selected.clusters) & !is.null(input$selected.factor) & !is.null(input$selected.contrast)){
selected.gene <- isolate(input$selected.gene)
selected.clusters <- isolate(input$selected.clusters)
selected.factor <- isolate(input$selected.factor)
selected.contrast <- isolate(input$selected.contrast)
factor.levels <- strsplit(selected.contrast,split="\\s+?vs\\.\\s+?")[[1]] %>% rev()
selected.observations.df <- dplyr::filter(observations.df,cluster.level %in% input$selected.cluster.level) %>%
dplyr::filter(cluster %in% selected.clusters) %>%
dplyr::select_(.dots = c("cell","cluster",selected.factor)) %>%
dplyr::filter(!!as.symbol(selected.factor) %in% factor.levels)
colnames(selected.observations.df)[which(colnames(selected.observations.df) == selected.factor)] <- "selected.factor"
selected.observations.df$selected.factor <- factor(selected.observations.df$selected.factor,levels = factor.levels)
selected.gbm.vals <- gbm[which(rownames(gbm) == selected.gene),which(colnames(gbm) %in% selected.observations.df$cell)]
plot.df <- selected.observations.df %>%
dplyr::left_join(data.frame(cell=names(selected.gbm.vals),value=unname(selected.gbm.vals)),by=c("cell"="cell")) %>%
dplyr::filter(value > 0)
plot.df$cluster <- factor(plot.df$cluster)
test.A.plot <- ggplot(plot.df,aes(x=selected.factor,y=value)) +
geom_violin(aes(fill=selected.factor,color=selected.factor),alpha=0.3) +
geom_boxplot(width=0.1,aes(color=selected.factor),fill=NA) +
geom_smooth(mapping=aes(x=selected.factor,y=value,group=cluster),color="black",method="glm",method.args=list(family=Gamma(link='log')),size=1,se=T) +
stat_poly_eq(mapping=aes(x=selected.factor,y=value,group=cluster,label=stat(p.value.label)),formula=y~x,method="glm",method.args=list(family=Gamma(link='log')),parse=T,npcx="center",npcy="bottom") +
scale_fill_manual(limits=factor.levels,values=scales::hue_pal()(length(factor.levels)),drop=F) +
scale_color_manual(limits=factor.levels,values=scales::hue_pal()(length(factor.levels)),drop=F) +
facet_wrap(as.formula("~ cluster")) + theme_minimal() + ylab(paste0("#",selected.gene," value"))+theme(plot.title=element_text(hjust=0.5),legend.title=element_blank(),axis.ticks.x=element_blank(),axis.text.x=element_blank(),axis.title.x=element_blank())
}
}
return(test.A.plot)
})
test.B.plot <- reactive({
test.B.plot <- NULL
if(input$goButton > 0){
if(!is.null(input$selected.gene) & !is.null(input$selected.clusters) & !is.null(input$selected.factor) & !is.null(input$selected.contrast)){
selected.gene <- isolate(input$selected.gene)
selected.clusters <- isolate(input$selected.clusters)
selected.factor <- isolate(input$selected.factor)
selected.contrast <- isolate(input$selected.contrast)
factor.levels <- strsplit(selected.contrast,split="\\s+?vs\\.\\s+?")[[1]] %>% rev()
selected.observations.df <- dplyr::filter(observations.df,cluster.level %in% input$selected.cluster.level) %>%
dplyr::filter(cluster %in% selected.clusters) %>%
dplyr::select_(.dots = c("cell","cluster",selected.factor)) %>%
dplyr::filter(!!as.symbol(selected.factor) %in% factor.levels)
colnames(selected.observations.df)[which(colnames(selected.observations.df) == selected.factor)] <- "selected.factor"
selected.observations.df$selected.factor <- factor(selected.observations.df$selected.factor,levels = factor.levels)
selected.gbm.vals <- gbm[which(rownames(gbm) == selected.gene),which(colnames(gbm) %in% selected.observations.df$cell)]
expected.fractions.df <- selected.observations.df %>%
dplyr::group_by(selected.factor) %>%
dplyr::summarise(f.expressed.cells=n()/nrow(selected.observations.df)) %>%
dplyr::mutate(expected.f.expressed.cells = qlogis(f.expressed.cells)) %>%
dplyr::select(-f.expressed.cells)
plot.df <- selected.observations.df %>%
dplyr::left_join(data.frame(cell=names(selected.gbm.vals),value=unname(selected.gbm.vals)),by=c("cell"="cell")) %>%
dplyr::mutate(is.expressed = ifelse(value > 0,1,0)) %>% dplyr::select(-value) %>% dplyr::left_join(expected.fractions.df)
plot.df$cluster <- factor(plot.df$cluster)
plot.summary.df <- plot.df %>%
dplyr::group_by(cluster,selected.factor) %>%
dplyr::tally() %>%
dplyr::rename(total.cells=n) %>%
dplyr::left_join(plot.df %>% dplyr::group_by(cluster,selected.factor) %>% dplyr::mutate(n.expressed.cells=sum(is.expressed)) %>% dplyr::select(-is.expressed) %>% unique()) %>%
dplyr::mutate(f.expressed.cells=n.expressed.cells/total.cells) %>%
dplyr::select(f.expressed.cells,cluster,selected.factor) %>%
unique() %>% dplyr::left_join(expected.fractions.df)
#figure out n.col from facet_wrap
facet.wrapped.plot <- ggplot(data = plot.summary.df, aes(x = selected.factor, y = f.expressed.cells, fill = selected.factor)) +
geom_bar(stat = 'identity') +
scale_x_discrete(name = NULL,labels = levels(plot.summary.df$selected.factor), breaks = sort(unique(plot.summary.df$selected.factor))) +
facet_wrap(as.formula("~ cluster")) + theme_minimal()+theme(legend.position="none",plot.title=element_text(hjust=0.5)) + ylab("Fraction of cells")
n.cols <- wrap_dims(length(facet.wrapped.plot))[2]
plot.list <- lapply(levels(plot.df$cluster),function(l){
p.df <- dplyr::filter(plot.summary.df,cluster == l)
n.df <- dplyr::filter(plot.df,cluster == l)
fit <- glm(is.expressed ~ selected.factor + offset(expected.f.expressed.cells), data = n.df, family = binomial(link = 'logit'))
fit.text <- paste0(format(round(summary(fit)$coefficients[2,1],3),scientiffic=T)," (P = ",format(round(summary(fit)$coefficients[2,4],3),scientiffic=T),")")
cluster.plot <- ggplot(data = p.df, aes(x = selected.factor, y = f.expressed.cells, fill = selected.factor)) +
geom_bar(stat = 'identity') +
stat_function(fun = logitTrendLine,args=list(ests=coef(fit)),size=2,color="black") +
annotate("text",size=4,vjust=0,hjust=0.5,x=1.5,y=0,label=fit.text,color="black") +
scale_x_discrete(name = NULL,labels = factor.levels, breaks = factor.levels) + ylim(0,1) +
theme_minimal()+theme(legend.position="none",plot.title=element_text(hjust=0.5)) + ylab("Fraction of cells") + ggtitle(l)
})
plot.list <- plot.list[which(sapply(plot.list,function(i) !is.null(i)))]
if(length(plot.list) > 0) test.B.plot <- ggpubr::as_ggplot(gridExtra::grid.arrange(grobs=plot.list,ncol=n.cols,top=grid::textGrob(selected.gene)))
}
}
return(test.B.plot)
})
output$out.plot <- renderPlot({
if(input$outputType == "A"){
test.A.plot()
} else if(input$outputType == "B"){
test.B.plot()
}
})
}
ui <- fluidPage(
titlePanel("Data Explorer"),
sidebarLayout(
sidebarPanel(
tags$head(
tags$style(HTML(".multicol {-webkit-column-count: 3; /* Chrome, Safari, Opera */-moz-column-count: 3; /* Firefox */column-count: 3;}")),
tags$style(type="text/css", "#loadmessage {position: fixed;top: 0px;left: 0px;width: 100%;padding: 5px 0px 5px 0px;text-align: center;font-weight: bold;font-size: 100%;color: #000000;background-color: #CCFF66;z-index: 105;}"),
tags$style(type="text/css",".shiny-output-error { visibility: hidden; }",".shiny-output-error:before { visibility: hidden; }")),
selectInput("outputType", "Output Type", choices = c("A","B")),
uiOutput("selected.cluster.level"),
uiOutput("selected.clusters"),
uiOutput("selected.factor"),
uiOutput("selected.contrast"),
uiOutput("selected.gene"),
actionButton(icon=icon("chart-line"),"goButton","Render figure"),
),
mainPanel(
plotOutput("out.plot")
)
)
)
shinyApp(ui = ui, server = server)