0

我有单细胞基因表达数据,这意味着我有data.frame数千个细胞,其中每个细胞都映射到一个cluster(即,clusters 代表细胞类型)并具有实验设计背景(例如,细胞被应用了某种treatment)。在这个特定示例中,我有两个级别的clusters (topconverged),因此在data.frame映射到clusters 的单元格中,每个单元格出现两次,每个集群级别出现一次。此外,还有一个 sparse gene x cell matrix,它具有每个细胞中每个基因的表达单元。

为简单起见,在我的示例中,每个都由cluster来自所有级别的单元格组成treatment

以下是数据:

library(dplyr)
library(Matrix)

#create the experimental design
factors <- "treatment"
contrasts <- c("treatment1 vs. ctrl","treatment2 vs. ctrl","treatment2 vs. treatment1")
top.clusters = paste0("cluster",1:9)
converged.clusters = paste0("cluster",1:27)
design.df <- rbind(expand.grid(factors,contrasts,top.clusters) %>% dplyr::mutate(cluster.level = "top"),
                   expand.grid(factors,contrasts,converged.clusters) %>% dplyr::mutate(cluster.level = "converged"))
colnames(design.df)[1:3] <- c("factor","contrast","cluster")
design.df$factor <- as.character(design.df$factor)
design.df$contrast <- as.character(design.df$contrast)
design.df$cluster <- as.character(design.df$cluster)
design.df$cluster.level <- as.character(design.df$cluster.level)

#create the gene x cell expression sparse matrix
gbm <- as(matrix(0,nrow = 15000,ncol = 9000,dimnames = list(paste0("gene",1:15000),paste0("cell",1:9000))),"dgCMatrix")
set.seed(1)
gbm[sample(15000*9000,0.1*15000*9000,replace = F)] <- rnorm(0.1*15000*9000,1.5, 0.8)

#create the data.frame mapping cells to clusters
observations.df <- rbind(data.frame(cell = paste0("cell",1:9000), cluster = sample(paste0("cluster",1:9),9000,replace=T),
                              treatment = sample(c("ctrl","treatment1","treatment2"), 9000, replace=T), stringsAsFactors = F) %>% dplyr::mutate(cluster.level = "top"),
                         data.frame(cell = paste0("cell",1:9000), cluster = sample(paste0("cluster",1:27),9000,replace=T),
                                    treatment = sample(c("ctrl","treatment1","treatment2"), 9000, replace=T), stringsAsFactors = F) %>% dplyr::mutate(cluster.level = "converged"))
observations.df$treatment <- factor(observations.df$treatment, levels = c("ctrl","treatment1","treatment2")) 

我构建了一个shiny应用程序,该应用程序具有两个绘图函数,对应于test数据的两个差分表达式,并且我还有一组renderUI用于将单元格子集为cluster(对于选定的cluster.levelfactor、 和contrast

差异表达测试(应用于选定的基因,但对本文而言并不重要)是:

Gamma glmA:将 a与 a拟合log link function到所选基因的非零值细胞中的表达水平

logistic glmB:将a与alogit link function与所选基因在细胞中表达(即表达或不表达)的数量进行拟合。

我的问题更多与renderUI我允许用户对单元格进行子集化的 's 集有关。因为它们存在依赖关系,所以我将它们中的!is.null()条件用于input它们所依赖的 s,但我希望获得有关更好方法的帮助来实现这一目标,希望这会更快。此外,我还在!is.null()绘图功能中使用了这些条件,以及actionButton防止由于进行不同选择时发生的不需要的处理而发生滞后,但我也希望在改进该代码方面获得帮助也。

这是shiny我对上述数据的代码:

library(shiny)
library(ggplot2)
library(ggpmisc)

server <- function(input, output)
{
  output$selected.cluster.level <- renderUI({
    radioButtons("selected.cluster.level", "Select Cluster Level", choices = c("top","converged"), inline=TRUE)
  })
  
  output$selected.clusters <- renderUI({
    all.clusters <- unique(dplyr::filter(design.df,cluster.level %in% input$selected.cluster.level)$cluster)
    selectInput("selected.clusters", "Select Clusters", choices = all.clusters,multiple = T,selected = all.clusters)
  })

  output$selected.factor <- renderUI({
    if(!is.null(input$selected.clusters)){
      all.factors <- unique(dplyr::filter(design.df,cluster %in% input$selected.clusters)$factor)
      selectInput("selected.factor", "Select Factor", choices = all.factors,multiple = F,selected = all.factors[1])
    }
  })

  output$selected.contrast <- renderUI({
    if(!is.null(input$selected.clusters) & !is.null(input$selected.factor)){
      all.contrasts <- unique(dplyr::filter(design.df,factor == input$selected.factor & cluster %in% input$selected.clusters)$contrast)
      selectInput("selected.contrast", "Select Contrast", choices = all.contrasts,multiple = F,selected = all.contrasts[1])
    }
  })

  output$selected.gene <- renderUI({
    selectInput("selected.gene", "Select Gene", choices = rownames(gbm),multiple = F,selected = rownames(gbm)[1])
  })
  
  test.A.plot <- reactive({
    test.A.plot <- NULL
    if(input$goButton > 0){
      if(!is.null(input$selected.gene) & !is.null(input$selected.clusters) & !is.null(input$selected.factor) & !is.null(input$selected.contrast)){
        selected.gene <- isolate(input$selected.gene)
        selected.clusters <- isolate(input$selected.clusters)
        selected.factor <- isolate(input$selected.factor)
        selected.contrast <- isolate(input$selected.contrast)
        factor.levels <- strsplit(selected.contrast,split="\\s+?vs\\.\\s+?")[[1]] %>% rev()
        selected.observations.df <- dplyr::filter(observations.df,cluster.level %in% input$selected.cluster.level) %>%
          dplyr::filter(cluster %in% selected.clusters) %>%
          dplyr::select_(.dots = c("cell","cluster",selected.factor)) %>%
          dplyr::filter(!!as.symbol(selected.factor) %in% factor.levels)
        colnames(selected.observations.df)[which(colnames(selected.observations.df) == selected.factor)] <- "selected.factor"
        selected.observations.df$selected.factor <- factor(selected.observations.df$selected.factor,levels = factor.levels)
        selected.gbm.vals <- gbm[which(rownames(gbm) == selected.gene),which(colnames(gbm) %in% selected.observations.df$cell)]
        plot.df <- selected.observations.df %>%
          dplyr::left_join(data.frame(cell=names(selected.gbm.vals),value=unname(selected.gbm.vals)),by=c("cell"="cell")) %>%
          dplyr::filter(value > 0)
        plot.df$cluster <- factor(plot.df$cluster)
        test.A.plot <- ggplot(plot.df,aes(x=selected.factor,y=value)) +
          geom_violin(aes(fill=selected.factor,color=selected.factor),alpha=0.3) +
          geom_boxplot(width=0.1,aes(color=selected.factor),fill=NA) +
          geom_smooth(mapping=aes(x=selected.factor,y=value,group=cluster),color="black",method="glm",method.args=list(family=Gamma(link='log')),size=1,se=T) +
          stat_poly_eq(mapping=aes(x=selected.factor,y=value,group=cluster,label=stat(p.value.label)),formula=y~x,method="glm",method.args=list(family=Gamma(link='log')),parse=T,npcx="center",npcy="bottom") + 
          scale_fill_manual(limits=factor.levels,values=scales::hue_pal()(length(factor.levels)),drop=F) +
          scale_color_manual(limits=factor.levels,values=scales::hue_pal()(length(factor.levels)),drop=F) +
          facet_wrap(as.formula("~ cluster")) + theme_minimal() + ylab(paste0("#",selected.gene," value"))+theme(plot.title=element_text(hjust=0.5),legend.title=element_blank(),axis.ticks.x=element_blank(),axis.text.x=element_blank(),axis.title.x=element_blank())
      }
    }
    return(test.A.plot)
  })
  
  test.B.plot <- reactive({
    test.B.plot <- NULL
    if(input$goButton > 0){
      if(!is.null(input$selected.gene) & !is.null(input$selected.clusters) & !is.null(input$selected.factor) & !is.null(input$selected.contrast)){
        selected.gene <- isolate(input$selected.gene)
        selected.clusters <- isolate(input$selected.clusters)
        selected.factor <- isolate(input$selected.factor)
        selected.contrast <- isolate(input$selected.contrast)
        factor.levels <- strsplit(selected.contrast,split="\\s+?vs\\.\\s+?")[[1]] %>% rev()
        selected.observations.df <- dplyr::filter(observations.df,cluster.level %in% input$selected.cluster.level) %>%
          dplyr::filter(cluster %in% selected.clusters) %>%
          dplyr::select_(.dots = c("cell","cluster",selected.factor)) %>%
          dplyr::filter(!!as.symbol(selected.factor) %in% factor.levels)
        colnames(selected.observations.df)[which(colnames(selected.observations.df) == selected.factor)] <- "selected.factor"
        selected.observations.df$selected.factor <- factor(selected.observations.df$selected.factor,levels = factor.levels)
        selected.gbm.vals <- gbm[which(rownames(gbm) == selected.gene),which(colnames(gbm) %in% selected.observations.df$cell)]
        expected.fractions.df <- selected.observations.df %>%
          dplyr::group_by(selected.factor) %>%
          dplyr::summarise(f.expressed.cells=n()/nrow(selected.observations.df)) %>%
          dplyr::mutate(expected.f.expressed.cells = qlogis(f.expressed.cells)) %>%
          dplyr::select(-f.expressed.cells)
        plot.df <- selected.observations.df %>%
          dplyr::left_join(data.frame(cell=names(selected.gbm.vals),value=unname(selected.gbm.vals)),by=c("cell"="cell")) %>%
          dplyr::mutate(is.expressed = ifelse(value > 0,1,0)) %>% dplyr::select(-value) %>% dplyr::left_join(expected.fractions.df)
        plot.df$cluster <- factor(plot.df$cluster)
        plot.summary.df <- plot.df %>%
          dplyr::group_by(cluster,selected.factor) %>%
          dplyr::tally() %>%
          dplyr::rename(total.cells=n) %>%
          dplyr::left_join(plot.df %>% dplyr::group_by(cluster,selected.factor) %>% dplyr::mutate(n.expressed.cells=sum(is.expressed)) %>% dplyr::select(-is.expressed) %>% unique()) %>%
          dplyr::mutate(f.expressed.cells=n.expressed.cells/total.cells) %>%
          dplyr::select(f.expressed.cells,cluster,selected.factor) %>%
          unique() %>% dplyr::left_join(expected.fractions.df)
        #figure out n.col from facet_wrap
        facet.wrapped.plot <- ggplot(data = plot.summary.df, aes(x = selected.factor, y = f.expressed.cells, fill = selected.factor)) +
          geom_bar(stat = 'identity') +
          scale_x_discrete(name = NULL,labels = levels(plot.summary.df$selected.factor), breaks = sort(unique(plot.summary.df$selected.factor))) +
          facet_wrap(as.formula("~ cluster")) + theme_minimal()+theme(legend.position="none",plot.title=element_text(hjust=0.5)) + ylab("Fraction of cells")
        n.cols <- wrap_dims(length(facet.wrapped.plot))[2]
        plot.list <- lapply(levels(plot.df$cluster),function(l){
          p.df <- dplyr::filter(plot.summary.df,cluster == l)
          n.df <- dplyr::filter(plot.df,cluster == l)
          fit <- glm(is.expressed ~ selected.factor + offset(expected.f.expressed.cells), data = n.df, family = binomial(link = 'logit'))
          fit.text <- paste0(format(round(summary(fit)$coefficients[2,1],3),scientiffic=T)," (P = ",format(round(summary(fit)$coefficients[2,4],3),scientiffic=T),")")
          cluster.plot <- ggplot(data = p.df, aes(x = selected.factor, y = f.expressed.cells, fill = selected.factor)) +
            geom_bar(stat = 'identity') +
            stat_function(fun = logitTrendLine,args=list(ests=coef(fit)),size=2,color="black") +
            annotate("text",size=4,vjust=0,hjust=0.5,x=1.5,y=0,label=fit.text,color="black") + 
            scale_x_discrete(name = NULL,labels = factor.levels, breaks = factor.levels) + ylim(0,1) +
            theme_minimal()+theme(legend.position="none",plot.title=element_text(hjust=0.5)) + ylab("Fraction of cells") + ggtitle(l)
        })
        plot.list <- plot.list[which(sapply(plot.list,function(i) !is.null(i)))]
        if(length(plot.list) > 0) test.B.plot <- ggpubr::as_ggplot(gridExtra::grid.arrange(grobs=plot.list,ncol=n.cols,top=grid::textGrob(selected.gene)))
      }
    }
    return(test.B.plot)
  })

  output$out.plot <- renderPlot({
    if(input$outputType == "A"){
      test.A.plot()
    } else if(input$outputType == "B"){
      test.B.plot()
    }
  })
}

ui <- fluidPage(
  titlePanel("Data Explorer"),
  sidebarLayout(
    sidebarPanel(
      tags$head(
        tags$style(HTML(".multicol {-webkit-column-count: 3; /* Chrome, Safari, Opera */-moz-column-count: 3; /* Firefox */column-count: 3;}")),
        tags$style(type="text/css", "#loadmessage {position: fixed;top: 0px;left: 0px;width: 100%;padding: 5px 0px 5px 0px;text-align: center;font-weight: bold;font-size: 100%;color: #000000;background-color: #CCFF66;z-index: 105;}"),
        tags$style(type="text/css",".shiny-output-error { visibility: hidden; }",".shiny-output-error:before { visibility: hidden; }")),
      selectInput("outputType", "Output Type", choices = c("A","B")),
      uiOutput("selected.cluster.level"),
      uiOutput("selected.clusters"),
      uiOutput("selected.factor"),
      uiOutput("selected.contrast"),
      uiOutput("selected.gene"),
      actionButton(icon=icon("chart-line"),"goButton","Render figure"),
    ),
    mainPanel(
      plotOutput("out.plot")
    )
  )
)

shinyApp(ui = ui, server = server)
4

0 回答 0