0
structure(list(drug = c("Chlorambucil", "Fludarabine", "FludarabineMafosfamide", 
"NDI031301", "CMPB", "Tofacitinib", "Peficitinib", "FludarabineMafosfamide", 
"PDB", "Filgotinib", "Dexamethasone", "CMPA", "Lenalidomide", 
"Dexamethasone", "Gandotinib", "NDI031301", "Filgotinib", "PDB", 
"CMPB", "Ruxolitinib", "CC122", "Atovaquone", "CC122", "SAR20347", 
"Momelotinib", "Momelotinib", "Tofacitinib", "Fludarabine", "Fludarabine", 
"Cerdulatinib", "Lenalidomide", "Atovaquone", "Chlorambucil", 
"CMPA", "FludarabineMafosfamide", "FludarabineMafosfamide", "Fludarabine", 
"Atovaquone", "Momelotinib", "PDB", "Filgotinib", "Chlorambucil", 
"Dexamethasone", "Tofacitinib", "SAR20347", "CMPB", "Momelotinib", 
"Fludarabine", "Cerdulatinib", "Peficitinib", "Atovaquone", "CC122", 
"CMPA", "NDI031301", "PDB", "CMPA", "Lenalidomide", "SAR20347", 
"Tofacitinib", "Gandotinib", "Lenalidomide", "Peficitinib", "CMPB", 
"CC122", "Dexamethasone", "FludarabineMafosfamide", "Ruxolitinib", 
"CMPB", "Peficitinib", "Tofacitinib", "FludarabineMafosfamide", 
"Filgotinib", "Dexamethasone", "CMPA", "Dexamethasone", "Gandotinib", 
"NDI031301", "Filgotinib", "SAR20347", "CMPB", "Ruxolitinib", 
"Peficitinib", "Atovaquone", "CC122", "SAR20347", "Momelotinib", 
"Momelotinib", "Tofacitinib", "Fludarabine", "Fludarabine", "Cerdulatinib", 
"Atovaquone", "Chlorambucil", "CMPA", "NDI031301"), dose = c(1, 
1, 10, 1, 0.1, 1, 1, 1, 100, 1, 10, 1, 10, 100, 1, 10, 10, 10, 
1, 1, 0.1, 3, 1, 1, 1, 0.1, 10, 1, 10, 1, 1, 30, 30, 0.1, 0.01, 
0.1, 0.01, 0.3, 0.001, 1, 0.01, 0.3, 0.1, 0.01, 0.1, 0.001, 0.01, 
0.1, 0.01, 0.1, 0.03, 0.01, 0.01, 0.01, 0.1, 0.001, 0.01, 0.01, 
0.1, 0.01, 0.1, 0.01, 0.01, 0.001, 1, 10, 10, 0.1, 1, 1, 1, 1, 
10, 1, 100, 1, 10, 10, 10, 1, 1, 10, 3, 1, 1, 1, 0.1, 10, 10, 
1, 1, 30, 30, 0.1, 1), drug.dose = c("Chlorambucil_1uM", "Fludarabine_1uM", 
"FludarabineMafosfamide_10ug/mlplus1ug/ml", "NDI031301_1uM", 
"CMPB_0.1uM", "Tofacitinib_1uM", "Peficitinib_1uM", "FludarabineMafosfamide_1ug/mlplus1ug/ml", 
"PDB_100ng/ml", "Filgotinib_1uM", "Dexamethasone_10uM", "CMPA_1uM", 
"Lenalidomide_10uM", "Dexamethasone_100uM", "Gandotinib_1uM", 
"NDI031301_10uM", "Filgotinib_10uM", "PDB_10ng/ml", "CMPB_1uM", 
"Ruxolitinib_1uM", "CC122_0.1uM", "Atovaquone_3uM", "CC122_1uM", 
"SAR20347_1uM", "Momelotinib_1uM", "Momelotinib_0.1uM", "Tofacitinib_10uM", 
"Fludarabine_1ug/ml", "Fludarabine_10ug/ml", "Cerdulatinib_1uM", 
"Lenalidomide_1uM", "Atovaquone_30uM", "Chlorambucil_30uM", "CMPA_0.1uM", 
"FludarabineMafosfamide_0.01ug/mlplus1ug/ml", "FludarabineMafosfamide_0.1ug/mlplus1ug/ml", 
"Fludarabine_0.01ug/ml", "Atovaquone_0.3uM", "Momelotinib_0.001uM", 
"PDB_1ng/ml", "Filgotinib_0.01uM", "Chlorambucil_0.3uM", "Dexamethasone_0.1uM", 
"Tofacitinib_0.01uM", "SAR20347_0.1uM", "CMPB_0.001uM", "Momelotinib_0.01uM", 
"Fludarabine_0.1ug/ml", "Cerdulatinib_0.01uM", "Peficitinib_0.1uM", 
"Atovaquone_0.03uM", "CC122_0.01uM", "CMPA_0.01uM", "NDI031301_0.01uM", 
"PDB_0.1ng/ml", "CMPA_0.001uM", "Lenalidomide_0.01uM", "SAR20347_0.01uM", 
"Tofacitinib_0.1uM", "Gandotinib_0.01uM", "Lenalidomide_0.1uM", 
"Peficitinib_0.01uM", "CMPB_0.01uM", "CC122_0.001uM", "Dexamethasone_1uM", 
"FludarabineMafosfamide_10ug/mlplus1ug/ml", "Ruxolitinib_10uM", 
"CMPB_0.1uM", "Peficitinib_1uM", "Tofacitinib_1uM", "FludarabineMafosfamide_1ug/mlplus1ug/ml", 
"Filgotinib_1uM", "Dexamethasone_10uM", "CMPA_1uM", "Dexamethasone_100uM", 
"Gandotinib_1uM", "NDI031301_10uM", "Filgotinib_10uM", "SAR20347_10uM", 
"CMPB_1uM", "Ruxolitinib_1uM", "Peficitinib_10uM", "Atovaquone_3uM", 
"CC122_1uM", "SAR20347_1uM", "Momelotinib_1uM", "Momelotinib_0.1uM", 
"Tofacitinib_10uM", "Fludarabine_10ug/ml", "Fludarabine_1ug/ml", 
"Cerdulatinib_1uM", "Atovaquone_30uM", "Chlorambucil_30uM", "CMPA_0.1uM", 
"NDI031301_1uM"), combo = c("none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none", "none", "none", "none", "none", "none", "none", 
"none", "none"), cluster = c(3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L), dosage = c("1uM", "1uM", "10ug/mlplus1ug/ml", 
"1uM", "0.1uM", "1uM", "1uM", "1ug/mlplus1ug/ml", "100ng/ml", 
"1uM", "10uM", "1uM", "10uM", "100uM", "1uM", "10uM", "10uM", 
"10ng/ml", "1uM", "1uM", "0.1uM", "3uM", "1uM", "1uM", "1uM", 
"0.1uM", "10uM", "1ug/ml", "10ug/ml", "1uM", "1uM", "30uM", "30uM", 
"0.1uM", "0.01ug/mlplus1ug/ml", "0.1ug/mlplus1ug/ml", "0.01ug/ml", 
"0.3uM", "0.001uM", "1ng/ml", "0.01uM", "0.3uM", "0.1uM", "0.01uM", 
"0.1uM", "0.001uM", "0.01uM", "0.1ug/ml", "0.01uM", "0.1uM", 
"0.03uM", "0.01uM", "0.01uM", "0.01uM", "0.1ng/ml", "0.001uM", 
"0.01uM", "0.01uM", "0.1uM", "0.01uM", "0.1uM", "0.01uM", "0.01uM", 
"0.001uM", "1uM", "10ug/mlplus1ug/ml", "10uM", "0.1uM", "1uM", 
"1uM", "1ug/mlplus1ug/ml", "1uM", "10uM", "1uM", "100uM", "1uM", 
"10uM", "10uM", "10uM", "1uM", "1uM", "10uM", "3uM", "1uM", "1uM", 
"1uM", "0.1uM", "10uM", "10ug/ml", "1ug/ml", "1uM", "30uM", "30uM", 
"0.1uM", "1uM")), row.names = c(NA, -95L), class = "data.frame")

对不起菜鸟问题,我有这个复杂的药物集群数据,如屏幕截图所示。

我想将它们显示成一个堆叠的 geom_col 类型的图,x 轴是“药物”,Y 轴是出现的计数,并按集群分面。

到目前为止,这很容易。但我也想通过使用颜色填充来匹配它们的剂量来查看这些药物和剂量在每个集群中的分布。实际剂量有不同的单位等。

我将数字剂量提取到它自己的立柱中。我想分配一个因子向量(“min”、“low”、“high”、“max”)来反映剂量水平,因为我知道每种药物都有 4 种不同的剂量。

问题是不同药物的数字剂量不同,所以我不能简单地使用等级

例如,有些药物剂量范围从 0.03 到 30,有些等级从 0.3 到 300,有些范围从 0.01 到 10。

那么如何使用该数字药物剂量列将药物水平分配给每种药物呢?

4

1 回答 1

2

这是一种方法rank()和加入。我们可以利用每种药物在药物中具有相同单位的事实

library(dplyr)
df %>%
  arrange(drug) %>% #for visualization
  group_by(drug) %>% #group by drug
  select(dose) %>% #get rid of extra columns
  filter(!duplicated(dose)) %>% #remove duplicates
  mutate(rank = rank(dose), #rank doses, mostly for visualization of results
         category = c("min","low","high","max")[rank]) #assign category
# A tibble: 67 x 4
# Groups:   drug [19]
   drug           dose  rank category
   <chr>         <dbl> <dbl> <chr>   
 1 Atovaquone    3         3 high    
 2 Atovaquone   30         4 max     
 3 Atovaquone    0.3       2 low     
 4 Atovaquone    0.03      1 min     
 5 CC122         0.1       3 high    
 6 CC122         1         4 max     
 7 CC122         0.01      2 low     
 8 CC122         0.001     1 min     
 9 Cerdulatinib  1         2 low     
10 Cerdulatinib  0.01      1 min     
# … with 57 more rows

现在我们可以加入到原来的data.frame:

df %>%
  arrange(drug) %>%
  group_by(drug) %>% 
  select(dose) %>%
  filter(!duplicated(dose)) %>%
  mutate(rank = rank(dose), #rank doses
         category = c("min","low","high","max")[rank]) %>%
  right_join(df)
# A tibble: 95 x 8
# Groups:   drug [19]
   drug        dose dosage  rank category drug.dose         combo cluster
   <chr>      <dbl> <chr>  <dbl> <chr>    <chr>             <chr>   <int>
 1 Atovaquone  3    3uM        3 high     Atovaquone_3uM    none        4
 2 Atovaquone  3    3uM        3 high     Atovaquone_3uM    none        6
 3 Atovaquone 30    30uM       4 max      Atovaquone_30uM   none        4
 4 Atovaquone 30    30uM       4 max      Atovaquone_30uM   none        6
 5 Atovaquone  0.3  0.3uM      2 low      Atovaquone_0.3uM  none        5
 6 Atovaquone  0.03 0.03uM     1 min      Atovaquone_0.03uM none        5
 7 CC122       0.1  0.1uM      3 high     CC122_0.1uM       none        4
 8 CC122       1    1uM        4 max      CC122_1uM         none        4
 9 CC122       1    1uM        4 max      CC122_1uM         none        6
10 CC122       0.01 0.01uM     2 low      CC122_0.01uM      none        5
# … with 85 more rows
于 2021-03-30T17:09:31.197 回答