0

使用以下数据:

    > mytable<-read.delim("mytable.csv",sep=",",header=T)
    > class(mytable)
   [1] "data.frame"
    > mytable

         count lang1 lang2
    1   908446    ar    ar
    2       96    ar    bg
    3       73    ar    bo
    4        2    ar   chr
    5       61    ar    da
    6     1282    ar    de
    7       84    ar    el
    8    28067    ar    en
    9     1178    ar    es
    10     962    ar    et
    11   25945    ar    fa
    12     100    ar    fi
    13     765    ar    fr
    14      18    ar    he
    15       1    ar    hi
    16    1036    ar    ht
    17     267    ar    hu
    18      17    ar    hy
    19    3306    ar    id
    20      23    ar    is
    21     262    ar    it
    22       1    ar    iu
    23     265    ar    ja
    24      46    ar    ka
    25     400    ar    ko
    26      43    ar    lt
    27     160    ar    lv
    28       1    ar    my
    29    1539    ar    nl
    30      28    ar    no
    31  558362    ar  none
    32     507    ar    pl
    33     847    ar    pt
    34     577    ar    ru
    35     369    ar    sk
    36     309    ar    sl
    37     127    ar    sv
    38       1    ar    ta
    39       9    ar    th
    40     911    ar    tl
    41     585    ar    tr
    42       3    ar    uk
    43   46861    ar   und
    44    6499    ar    ur
    45    2245    ar    vi
    46      17    ar    zh
    47      13    ca    ar
    48       1    ca    bg
    49      27    ca    da
    50     100    ca    de
    51     946    ca    en
    52    8840    ca    es
    53      56    ca    et
    54      15    ca    fi
    55     912    ca    fr
    56      97    ca    ht
    57      64    ca    hu
    58      96    ca    id
    59       8    ca    is
    60     556    ca    it
    61      12    ca    ja
    62       2    ca    ko
    63      13    ca    lt
    64      58    ca    lv
    65      47    ca    nl
    66       6    ca    no
    67    7729    ca  none
    68      26    ca    pl
    69    1032    ca    pt
    70      10    ca    ru
    71      62    ca    sk
    72      57    ca    sl
    73      32    ca    sv
    74      93    ca    tl
    75      39    ca    tr
    76     275    ca   und
    77      53    ca    vi
    78      14    cs    ar
    79      33    cs    bg
    80       1    cs    da
    81      64    cs    de
    82    1729    cs    en
    83     162    cs    es
    84      47    cs    et
    85       6    cs    fi
    86      39    cs    fr
    87      27    cs    ht
    88      28    cs    hu
    89      30    cs    id
    90       2    cs    is
    91      30    cs    it
    92       5    cs    ja
    93      12    cs    lt
    94      26    cs    lv
    95      18    cs    nl
    96     790    cs  none
    97      77    cs    pl
    98      86    cs    pt
    99     366    cs    ru
    100   1497    cs    sk
    101     83    cs    sl
    102      2    cs    sv
    103     26    cs    tl
    104     16    cs    tr
    105      1    cs    uk
    106    186    cs   und
    107     60    cs    vi
    108      3    cs    zh

我希望在以下解决方案中将相似的计数更紧密地聚集在一起:

> Xmytable<-xtabs(mytable$count ~ mytable$lang1 + mytable$lang2, mytable)
> heatmap(Xmytable) 

聚类不良的热图

所以这是我的问题:

1.) 是否有另一种方法来操作此数据集以根据计数生成具有色谱的热图?(我想创建一个类似于我展示的热图)

2.)可以改进聚类以将相似的颜色分组在一起吗?

谢谢!

4

2 回答 2

1

你可以试试这个。

library(ggplot2)
ggplot(x, aes(x = lang1, y = lang2, fill = count)) + geom_bin2d()

要添加树状图,请考虑此线程和/或发布另一个问题。用 ggplot2 再现晶格树状图

于 2013-06-08T07:04:11.957 回答
0

这是迄今为止我发现的最佳选择:

在此处输入图像描述

mytable<-read.delim("mytable.csv",sep=",",header=T)
mytable$ln<-log(mytable$count)
mytable#count<-NULL
mytable

"bio","twit","ln"
"ar","ar",13.7194907264167
"ar","bg",4.56434819146784
"ar","bo",4.29045944114839
"ar","chr",0.693147180559945
"ar","da",4.11087386417331
"ar","de",7.15617663748062
"ar","el",4.43081679884331
"ar","en",10.2423497879763
"ar","es",7.07157336421153
"ar","et",6.86901445066571
"ar","fa",10.1637341918018
"ar","fi",4.60517018598809
"ar","fr",6.63987583382654
"ar","he",2.89037175789616
"ar","hi",0
"ar","ht",6.94312242281943
"ar","hu",5.58724865840025
"ar","hy",2.83321334405622
"ar","id",8.10349427838097
"ar","is",3.13549421592915
"ar","it",5.5683445037611
"ar","iu",0
"ar","ja",5.57972982598622
"ar","ka",3.8286413964891
"ar","ko",5.99146454710798
"ar","lt",3.76120011569356
"ar","lv",5.07517381523383
"ar","my",0
"ar","nl",7.33888813383888
"ar","no",3.3322045101752
"ar","NONE",13.2327627765388
"ar","pl",6.22851100359118
"ar","pt",6.74170069465205
"ar","ru",6.3578422665081
"ar","sk",5.91079664404053
"ar","sl",5.73334127689775
"ar","sv",4.84418708645859
"ar","ta",0
"ar","th",2.19722457733622
"ar","tl",6.81454289725996
"ar","tr",6.37161184723186
"ar","uk",1.09861228866811
"ar","und",10.7549410519963
"ar","ur",8.77940359789435
"ar","vi",7.71646080017636
"ar","zh",2.83321334405622
"ca","ar",2.56494935746154
"ca","bg",0
"ca","da",3.29583686600433
"ca","de",4.60517018598809
"ca","en",6.85224256905188
"ca","es",9.08704215563169
"ca","et",4.02535169073515
"ca","fi",2.70805020110221
"ca","fr",6.81563999007433
"ca","ht",4.57471097850338
"ca","hu",4.15888308335967
"ca","id",4.56434819146784
"ca","is",2.07944154167984
"ca","it",6.32076829425058
"ca","ja",2.484906649788
"ca","ko",0.693147180559945
"ca","lt",2.56494935746154
"ca","lv",4.06044301054642
"ca","nl",3.85014760171006
"ca","no",1.79175946922805
"ca","NONE",8.95273476710687
"ca","pl",3.25809653802148
"ca","pt",6.93925394604151
"ca","ru",2.30258509299405
"ca","sk",4.12713438504509
"ca","sl",4.04305126783455
"ca","sv",3.46573590279973
"ca","tl",4.53259949315326
"ca","tr",3.66356164612965
"ca","und",5.61677109766657
"ca","vi",3.97029191355212
"cs","ar",2.63905732961526
"cs","bg",3.49650756146648
"cs","da",0
"cs","de",4.15888308335967
"cs","en",7.45529848568329
"cs","es",5.08759633523238
"cs","et",3.85014760171006
"cs","fi",1.79175946922805
"cs","fr",3.66356164612965
"cs","ht",3.29583686600433
"cs","hu",3.3322045101752
"cs","id",3.40119738166216
"cs","is",0.693147180559945
"cs","it",3.40119738166216
"cs","ja",1.6094379124341
"cs","lt",2.484906649788
"cs","lv",3.25809653802148
"cs","nl",2.89037175789616
"cs","NONE",6.67203294546107
"cs","pl",4.34380542185368
"cs","pt",4.45434729625351
"cs","ru",5.90263333340137
"cs","sk",7.31121838441963
"cs","sl",4.4188406077966
"cs","sv",0.693147180559945
"cs","tl",3.25809653802148
"cs","tr",2.77258872223978
"cs","uk",0
"cs","und",5.2257466737132
"cs","vi",4.0943445622221
"cs","zh",1.09861228866811


Xmytable<-xtabs(mytable$ln ~ mytable$lang1 + mytable$lang2, mytable)
library(pheatmap)
pheatmap(Xmytable, cluster_rows=T)

我想使用 ggplot() 添加一个选项,这似乎需要使用 kmeans。但是,由于我有非数字值,我无法将 kmeans 应用于此数据集,这就是为什么上面共享的链接并不能真正回答这种情况的问题(它是热图的有用链接不过一般来说)。

于 2013-06-14T00:07:37.863 回答