3

我有一个需要旋转的数据框,但该数据框有重复的标识符,因此spread函数会出错Error: Duplicate identifiers for rows (5, 6)

Dimension = c("A","A","B","B","A","A")   
Date = c("Mon","Tue","Mon","Wed","Fri","Fri")    
Metric = c(23,25,7,9,7,8)
df = data.frame(Dimension,Date,Metric)
df

  Dimension Date Metric
1         A  Mon     23
2         A  Tue     25
3         B  Mon      7
4         B  Wed      9
5         A  Fri      7
6         A  Fri      8

library(tidyr)
df1 = spread(df, Date, Metric, fill = " ")

Error: Duplicate identifiers for rows (5, 6)

然后我合并行并粘贴Metric

dfa = aggregate(df[3], df[-3], 
                FUN = function(X) paste(unique(X), collapse=", "))

  Dimension Date Metric
1         A  Fri   7, 8
2         A  Mon     23
3         B  Mon      7
4         A  Tue     25
5         B  Wed      9

然后重复,当然它现在可以工作了:

df1 = spread(dfa, Date, Metric, fill = " ")
df1
  Dimension  Fri Mon Tue Wed
1         A 7, 8  23  25    
2         B        7       9

问题:有没有一种“更简单”的方法可以做到这一点,或者我的方法是否足够有效,所以我不需要为此失眠?谢谢!

编辑。所有代码 - 我的和 2 akrun 的代码都可以很好地处理这个小数据集。但是,akrun 的dplyr版本在我的真实数据集上中断了。这是dput.

structure(list(Dimension = c(10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12303485675, 12303485675, 12303485675, 12303485675, 12303485675, 
12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
12701670437), Date = structure(c(1L, 3L, 5L, 7L, 9L, 10L, 11L, 
12L, 13L, 14L, 16L, 18L, 19L, 20L, 22L, 23L, 24L, 26L, 27L, 28L, 
30L, 32L, 33L, 34L, 40L, 41L, 42L, 47L, 48L, 49L, 51L, 52L, 53L, 
54L, 55L, 58L, 59L, 60L, 61L, 62L, 63L, 65L, 66L, 68L, 69L, 70L, 
74L, 75L, 76L, 2L, 3L, 5L, 7L, 8L, 10L, 11L, 15L, 17L, 20L, 21L, 
24L, 25L, 28L, 30L, 31L, 34L, 36L, 42L, 43L, 46L, 48L, 49L, 53L, 
54L, 56L, 65L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 7L, 24L, 30L, 
38L, 65L, 4L, 6L, 7L, 24L, 28L, 29L, 30L, 35L, 37L, 39L, 44L, 
45L, 50L, 57L, 64L, 65L), .Label = c("16", "analog tuner", "aspect ratio", 
"assembled in country of origin", "backlight technology", "battery type", 
"brand", "brightness", "color class", "component video", "composite video", 
"country of origin", "depth w/ stand", "digital audio output", 
"digital tuner", "display technology", "features", "green compliance certificate/authority", 
"green compliant", "hdmi", "headphone jack", "height w/ stand", 
"limited warranty", "manufacturer", "maximum resolution", "media player", 
"motion interpolation technology", "mpn", "multi pack indicator", 
"name", "native contrast ratio", "number of hdmi ports", "number of usb ports", 
"operating power consumption", "origin of components", "package contents", 
"primary color", "product dimensions", "product in in (l x w x h)", 
"product model", "product series", "product type", "remote control incl", 
"remote included", "resolution", "response time", "rms output power", 
"scan format", "screen size", "shipping weight (in lb)", "sound system", 
"speaker output power (w)", "speakers", "standard refresh rate", 
"standby power consumption", "total number of hdmi ports", "tv definition", 
"tv features", "tv refresh rate (hz)", "tv resolution", "tv screen size (in)", 
"tv screen size range", "tv speakers", "tv technology", "unspsc", 
"usb", "vertical viewing angle", "vesa mount standard", "vga", 
"video signal standard", "viewing angle", "warranty length", 
"wattage", "weight (approx)", "weight w/ stand (approx)", "width w/ stand"
), class = "factor"), Metric = structure(c(40L, 13L, 57L, 69L, 
43L, 72L, 72L, 45L, 38L, 72L, 55L, 44L, 72L, 72L, 15L, 3L, 69L, 
72L, 46L, 26L, 70L, 27L, 1L, 29L, 26L, 54L, 58L, 12L, 39L, 25L, 
42L, 11L, 72L, 37L, 28L, 52L, 36L, 39L, 24L, 19L, 72L, 33L, 72L, 
18L, 72L, 49L, 6L, 10L, 23L, 62L, 13L, 48L, 64L, 31L, 72L, 72L, 
41L, 66L, 72L, 72L, 64L, 16L, 63L, 65L, 4L, 32L, 21L, 58L, 71L, 
35L, 8L, 20L, 72L, 37L, 17L, 33L, 14L, 7L, 72L, 50L, 14L, 2L, 
34L, 59L, 59L, 60L, 5L, 33L, 51L, 47L, 67L, 67L, 53L, 61L, 68L, 
51L, 43L, 30L, 72L, 9L, 22L, 49L, 56L, 33L), .Label = c("1", 
"1-year limited", "1 Year", "1,000:1", "1,140 x 145 x 705 in ; 65.6 lb", 
"10.40 lb", "100 x 100", "1080p", "1080p (HDTV)", "11.20 lb", 
"14", "14 W", "16:9", "178 degrees", "18.30 in", "1920 x 1080", 
"2", "200 x 100", "21", "22 in", "22 in FHD LED TV; Remote Control", 
"25.4", "26.20 in", "29", "29 in", "29L1350U", "3", "300 mW", 
"33.80 W", "36.5 x 6.5 x 23.0", "365 Nit", "50 W", "52161505", 
"6 W", "6.50 ms", "60", "60 Hz", "7.10 in", "720p", "9", "ATSC", 
"Audyssey EQ", "Black", "CEC", "China", "ClearScan 120 Hz", "Does Not Contain a Battery", 
"Edge LED", "HDTV", "HDTV 1080p", "Imported", "Internet Apps", 
"KDL40W600B", "L1350U", "LCD", "LCD, Internet Connected, LED", 
"LED", "LED-LCD TV", "LG", "LG 47LY340C - 47 in - commercial use LED-backlit L", 
"No", "NTSC", "PLED2243A", "ProScan", "PROSCAN PLED2243A 22 in 1080p 60 Hz LED HDTV - PTR", 
"Sleep Timer; Auto Program", "Sony", "Sony KDL40W600B 40 in 1080p 60 Hz Smart LED TV (20", 
"Toshiba", "Toshiba 29L1350U 29 in 720p LED-LCD TV - 16:9 - HD", 
"yes", "Yes"), class = "factor")), .Names = c("Dimension", "Date", 
"Metric"), class = c("data.table", "data.frame"), row.names = c(NA, 
-104L), .internal.selfref = <pointer: 0x00000000003d0788>)
4

2 回答 2

6

您可以dcastdata.tableie 的开发版本中使用。v1.9.5. 安装说明是here

library(data.table)#v1.9.5+
dcast(setDT(df), Dimension~Date, value.var='Metric', 
               fun.aggregate=function(x) toString(unique(x)))
#   Dimension  Fri Mon Tue Wed
#1:         A 7, 8  23  25    
#2:         B        7       9

或者

library(dplyr)
library(tidyr)
df %>%
   group_by(Dimension, Date) %>% 
   summarise(Metric=toString(unique(Metric))) %>% 
   spread(Date, Metric, fill='')
#   Dimension  Fri Mon Tue Wed
#1         A 7, 8  23  25    
#2         B        7       9

更新

使用来自 `OP's post 的新数据集

 setDF(df2)
 df2 %>% 
     group_by(Dimension, Date) %>% 
     summarise(Metric=toString(unique(Metric))) %>%
     spread(Date, Metric, fill='') %>%
     head(2) %>%
     select(1:3)
 #    Dimension 16 analog tuner
 #1 10994030020  9             
 #2 12300245685            NTSC
于 2015-07-03T15:04:24.820 回答
0

我解决了所有问题,现在所有 3 个解决方案都运行了:1 是我的,2 和 3 是 @akrun 的。该代码是完全可重现的,如下所示。事实上,正如@akrun 所设想的那样,带有 dplyr 和 tidyr 的版本 3 在更大的数据集上运行得最快(测试一个是 300 行以适应正文中的 30000 个字符),差异在更大的数据集上更加明显。我自己的版本“1”在较小的数据集(100 行左右)上最快,至少在我的机器上是这样。希望这对某人有帮助!

library(microbenchmark)
library(data.table)#v1.9.5+
library(reshape2)
library(tidyr)
library(dplyr)

df = structure(list(GTIN = c(10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12303485675, 12303485675, 12303485675, 12303485675, 12303485675, 
                            12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
                            12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
                            12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
                            12701670437, 13201067215, 13201067215, 13201067215, 13201067215, 
                            13201067215, 13964253832, 13964253832, 13964253832, 13964253832, 
                            13964253832, 14818899589, 14818899589, 14818899589, 14818899589, 
                            14818899589, 14818899589, 19748359455, 19748359455, 19748359455, 
                            19748359455, 19748359455, 19748359455, 19748359455, 19748359455, 
                            19748359455, 19748383566, 19748383566, 19748383566, 19748383566, 
                            19748383566, 19748383566, 19748383566, 19748383566, 19748383566, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012), 
                   Key = structure(c(1L, 10L, 12L, 14L, 
                                     16L, 20L, 21L, 24L, 26L, 29L, 35L, 43L, 44L, 45L, 49L, 56L, 57L, 
                                     59L, 61L, 62L, 64L, 67L, 68L, 69L, 77L, 78L, 79L, 84L, 85L, 87L, 
                                     92L, 93L, 96L, 99L, 100L, 110L, 111L, 112L, 113L, 114L, 115L, 
                                     118L, 119L, 122L, 123L, 125L, 129L, 131L, 134L, 6L, 9L, 12L, 
                                     14L, 15L, 20L, 21L, 30L, 41L, 45L, 47L, 57L, 58L, 62L, 64L, 65L, 
                                     69L, 71L, 79L, 80L, 83L, 85L, 86L, 96L, 99L, 108L, 118L, 121L, 
                                     122L, 123L, 125L, 126L, 127L, 128L, 14L, 57L, 64L, 75L, 118L, 
                                     11L, 13L, 14L, 57L, 62L, 63L, 64L, 70L, 74L, 76L, 81L, 82L, 89L, 
                                     109L, 116L, 118L, 14L, 57L, 62L, 64L, 118L, 14L, 57L, 62L, 64L, 
                                     118L, 14L, 17L, 52L, 57L, 64L, 118L, 14L, 17L, 53L, 57L, 62L, 
                                     62L, 64L, 75L, 118L, 14L, 17L, 52L, 53L, 57L, 62L, 64L, 75L, 
                                     118L, 5L, 7L, 14L, 23L, 25L, 27L, 28L, 31L, 33L, 32L, 34L, 38L, 
                                     42L, 46L, 48L, 50L, 57L, 62L, 64L, 72L, 73L, 79L, 82L, 88L, 91L, 
                                     95L, 94L, 98L, 97L, 106L, 117L, 118L, 120L, 124L, 130L, 133L, 
                                     5L, 7L, 14L, 18L, 23L, 25L, 27L, 28L, 31L, 33L, 32L, 34L, 36L, 
                                     39L, 42L, 46L, 48L, 50L, 57L, 60L, 62L, 64L, 72L, 73L, 79L, 82L, 
                                     88L, 91L, 95L, 94L, 98L, 97L, 106L, 117L, 118L, 120L, 124L, 130L, 
                                     133L, 5L, 7L, 8L, 14L, 18L, 23L, 25L, 27L, 28L, 31L, 33L, 32L, 
                                     34L, 38L, 39L, 42L, 46L, 48L, 55L, 57L, 62L, 64L, 66L, 72L, 73L, 
                                     79L, 88L, 91L, 95L, 98L, 97L, 101L, 102L, 117L, 118L, 120L, 124L, 
                                     132L, 133L, 2L, 3L, 4L, 5L, 7L, 14L, 18L, 19L, 22L, 23L, 25L, 
                                     27L, 28L, 31L, 33L, 32L, 34L, 36L, 37L, 39L, 40L, 42L, 46L, 48L, 
                                     50L, 51L, 54L, 55L, 57L, 60L, 62L, 64L, 72L, 73L, 79L, 82L, 88L, 
                                     90L, 91L, 98L, 97L, 102L, 103L, 104L, 105L, 107L, 117L, 118L), 
                                   .Label = c("16","16:9 Mode", "24p Technology", "3D", "Additional Features", "Analog Tuner", 
                                              "Analog TV Tuner", "Analog Video Input Signals", "aspect ratio", 
                                              "Aspect Ratio", "Assembled in Country of Origin", "Backlight Technology", 
                                              "Battery Type", "Brand", "Brightness", "Color Class", "Color Name", 
                                              "Color Temperature Control", "Compatible with Windows 7", "Component Video", 
                                              "Composite Video", "Connectivity", "Connector Type", "Country of Origin", 
                                              "Depth (Shipping)", "Depth with Stand", "Diagonal Size", "Diagonal Size (cm)", 
                                              "Digital Audio Output", "Digital Tuner", "Digital TV Tuner", 
                                              "Dimensions", "Dimensions & Weight Details", "Display Format", 
                                              "Display Technology", "DLNA", "Dynamic Contrast Ratio", "Enclosure Color", 
                                              "ENERGY STAR Qualified", "Expansion Slots", "Features", "Flat Panel Mount Interface", 
                                              "Green Compliance Certificate/Authority", "Green Compliant", 
                                              "HDMI", "HDMI Ports Qty", "headphone jack", "Height (Shipping)", 
                                              "Height with Stand", "Image Aspect Ratio", "Internet Streaming Services", 
                                              "Item Package Quantity", "Item Weight", "LAN Protocol", "LCD Backlight Technology", 
                                              "Limited Warranty", "Manufacturer", "maximum resolution", "Media Player", 
                                              "Motion Enhancement Technology", "Motion Interpolation Technology", 
                                              "MPN", "Multi Pack Indicator", "Name", "Native Contrast Ratio", 
                                              "Nominal Voltage", "Number of HDMI Ports", "Number of USB Ports", 
                                              "Operating Power Consumption", "Origin of Components", "package contents", 
                                              "PC Interface", "Power Device", "Primary Color", "Product Dimensions", 
                                              "Product in Inches (L x W x H)", "Product Model", "Product Series", 
                                              "Product Type", "remote control included", "Remote Included", 
                                              "Resolution", "response time", "RMS Output Power", "Scan Format", 
                                              "screen size", "Screen Size", "Series", "Shipping Weight (in pounds)", 
                                              "Sound Effects", "Sound Output Mode", "Sound System", "Speaker Output Power (W)", 
                                              "Speaker System", "Speaker(s)", "Speakers", "Stand", "Stand Design", 
                                              "Standard Refresh Rate", "Standby Power Consumption", "Stereo Reception System", 
                                              "Supported Audio Formats", "Supported Memory Cards", "Supported Pictures Formats", 
                                              "Supported Video Formats", "Surround Mode", "Timer Functions", 
                                              "Total Number of HDMI Ports", "Tv Definition", "TV Features", 
                                              "TV Refresh Rate (Hz)", "TV Resolution", "TV Screen Size (inches)", 
                                              "TV Screen Size Range", "TV Speakers", "Tv Technology", "TV Tuner", 
                                              "UNSPSC", "USB", "USB Port", "Vertical Viewing Angle", "VESA Mount Standard", 
                                              "VGA", "Video Interface", "Video Signal Standard", "viewing angle", 
                                              "warranty length", "wattage", "Weight (Approximate)", "Weight (Shipping)", 
                                              "Weight with Stand (Approximate)", "Widescreen Modes", "Width (Shipping)", 
                                              "Width with Stand"), class = "factor"), 
                   Value = structure(c(83L, 
                                       19L, 118L, 156L, 90L, 176L, 176L, 92L, 78L, 176L, 115L, 91L, 
                                       176L, 176L, 21L, 5L, 156L, 176L, 97L, 39L, 157L, 40L, 1L, 46L, 
                                       39L, 114L, 120L, 17L, 80L, 38L, 88L, 16L, 176L, 77L, 42L, 109L, 
                                       76L, 80L, 37L, 30L, 176L, 68L, 176L, 28L, 176L, 105L, 8L, 14L, 
                                       35L, 133L, 19L, 103L, 141L, 51L, 176L, 176L, 86L, 150L, 176L, 
                                       176L, 141L, 22L, 138L, 142L, 6L, 66L, 33L, 120L, 175L, 74L, 11L, 
                                       32L, 176L, 77L, 24L, 68L, 20L, 9L, 176L, 106L, 20L, 2L, 72L, 
                                       122L, 122L, 123L, 7L, 68L, 107L, 100L, 151L, 151L, 113L, 132L, 
                                       152L, 107L, 90L, 50L, 176L, 13L, 34L, 105L, 116L, 68L, 122L, 
                                       122L, 60L, 124L, 68L, 146L, 146L, 165L, 147L, 68L, 170L, 90L, 
                                       1L, 170L, 171L, 68L, 146L, 90L, 75L, 146L, 166L, 167L, 163L, 
                                       47L, 68L, 146L, 89L, 1L, 73L, 146L, 164L, 162L, 48L, 68L, 110L, 
                                       133L, 156L, 55L, 81L, 56L, 10L, 87L, 137L, 173L, 12L, 90L, 29L, 
                                       53L, 36L, 19L, 156L, 59L, 159L, 169L, 140L, 117L, 22L, 143L, 
                                       153L, 27L, 26L, 154L, 108L, 176L, 23L, 68L, 178L, 99L, 62L, 61L, 
                                       129L, 133L, 156L, 176L, 54L, 81L, 63L, 15L, 87L, 134L, 174L, 
                                       12L, 176L, 176L, 57L, 53L, 45L, 19L, 156L, 95L, 64L, 160L, 169L, 
                                       140L, 117L, 22L, 144L, 153L, 27L, 26L, 154L, 108L, 176L, 23L, 
                                       68L, 178L, 99L, 71L, 67L, 111L, 133L, 133L, 156L, 176L, 3L, 65L, 
                                       43L, 82L, 87L, 136L, 173L, 80L, 90L, 176L, 29L, 25L, 31L, 121L, 
                                       156L, 44L, 158L, 85L, 169L, 140L, 119L, 145L, 153L, 125L, 154L, 
                                       108L, 131L, 126L, 23L, 68L, 178L, 99L, 155L, 52L, 176L, 94L, 
                                       177L, 101L, 133L, 156L, 176L, 98L, 172L, 41L, 84L, 69L, 18L, 
                                       87L, 135L, 173L, 12L, 176L, 79L, 176L, 4L, 58L, 53L, 49L, 19L, 
                                       130L, 104L, 121L, 156L, 96L, 70L, 161L, 168L, 139L, 119L, 22L, 
                                       93L, 102L, 153L, 154L, 108L, 127L, 148L, 112L, 128L, 149L, 23L, 
                                       68L), .Label = c("1", "1-year limited", "1 x composite video/audio input ( RCA phono x 3 ) - rear 1 x USB ( 4 pin USB Type A ) - side 1 x component video input ( RCA phono x 3 ) - rear 2 x audio line-in ( RCA phono x 2 ) 1 x VGA input ( 15 pin HD D-Sub (HD-15) ) 2 x HDMI ( 19 pin HDMI Type A ) 1 x VGA input ( 15 pin HD D-Sub (HD-15) ) 1 x audio input ( mini-phone 3.5 mm ) 1 x digital audio input (optical) 1 x antenna", 
                                                        "1 x SD Memory Card", "1 Year", "1,000:1", "1,140 x 145 x 705 inches ; 65.6 pounds", 
                                                        "10.40 lb", "100 x 100", "102 cm", "1080p", "1080p (FullHD)", 
                                                        "1080p (HDTV)", "11.20 lb", "117 cm", "14", "14 W", "140 cm ( 138.7 cm viewable )", 
                                                        "16:09", "178°", "18.30\"", "1920 x 1080", "1x analog, 1x digital", 
                                                        "2", "2 port(s)", "2 speakers", "2 x main channel speaker - built-in", 
                                                        "200 x 100", "200 x 200 mm", "21", "21.8 in", "22\"", "22\" FHD LED TV; Remote Control", 
                                                        "25.4", "26.20\"", "28.9 in", "29", "29\"", "29L1350U", "3", 
                                                        "3 x HDMI input ( 19 pin HDMI Type A ) - rear 1 x HDMI input ( 19 pin HDMI Type A ) - side 1 x component video input - rear 2 x USB ( 4 pin USB Type A ) - side 1 x network ( RJ-45 ) - side 1 x VGA input ( 15 pin HD D-Sub (HD-15) ) - side 1 x antenna - rear 1 x digital audio output (optical) - rear 1 x composite video/audio input ( RCA phono x 3 ) - side 1 x audio input ( mini-phone stereo 3.5 mm ) - side 1 x audio input ( mini-phone stereo 3.5 mm ) - rear", 
                                                        "300 mW", "32\"", "32SL400", "33.1 in", "33.80 W", "34 x 7.8 x 7.6 inches", 
                                                        "34.5 x 7.4 x 7.2 inches", "34.6 in", "36.5 x 6.5 x 23.0", "365 Nit", 
                                                        "38 in", "4 port(s)", "4 x HDMI input ( 19 pin HDMI Type A ) 1 x component video input 1 x HD component video / RGB input 1 x composite video/audio input 1 x digital audio output (optical) 1 x Ethernet ( RJ-45 )", 
                                                        "4 x HDMI input ( 19 pin HDMI Type A ) 1 x component video input 1 x HD component video / RGB input 1 x composite video/audio input 1 x digital audio output (optical) 1 x USB ( 4 pin USB Type A )", 
                                                        "40\"", "400 x 200 mm", "400 x 400 mm", "40E200U", "42LN5400", 
                                                        "44.3 in", "45.2 lbs", "46\"", "46G300U", "5.4 in", "50 W", "50.9 in", 
                                                        "52161505", "55\" Class ( 54.6\" viewable )", "55WX800", "57.3 lbs", 
                                                        "6 W", "6.2 pounds", "6.50 ms", "6.7 pounds", "60", "60 Hz", 
                                                        "7.10\"", "7000000:1", "720p", "8.8 in", "82 cm", "9", "9 in", 
                                                        "AC 120/230 V ( 50/60 Hz )", "ATSC", "ATSC, QAM", "Audyssey EQ", 
                                                        "black", "Black", "CEC", "China", "Cinema", "Cinema Mode 24 fps", 
                                                        "ClearFrame 120Hz", "ClearFrame 240Hz", "ClearScan 120Hz", "Compatible with Windows 7 software and devices carry Microsoft’s assurance that these products have passed tests for compatibility and reliability with 32-bit and 64-bit Windows 7.", 
                                                        "Component, composite, HDMI", "Does Not Contain a Battery", "Dolby Volume, Invisible Speaker System, Audyssey EQ", 
                                                        "Dynamic Bass Boost (DBB)", "Edge LED", "Ethernet", "HDTV", "HDTV 1080p", 
                                                        "Imported", "Included", "Internet Apps", "Invisible Speaker System", 
                                                        "JPEG photo playback, sleep timer, CrystalCoat, DynaLight, ColorStream HD Component Video Inputs, Digital Noise Reduction (DNR)", 
                                                        "JPG", "KDL40W600B", "L1350U", "LCD", "LCD , Internet Connected , LED", 
                                                        "LCD TV", "LED", "LED-backlit LCD TV", "LED-LCD TV", "LED backlight", 
                                                        "LG", "LG 47LY340C - 47\" - commercial use LED-backlit LCD flat panel display - 1080p (FullHD) - direct-lit LED - dark titan", 
                                                        "Lg LG 42-Inch LED-Backlit LCD TV - 42LN5400 1080p 120Hz HDTV (42LN5400)", 
                                                        "Main channel speaker", "MP3", "MP3, AAC, LPCM", "MPEG-2, MPEG-4, AVCHD", 
                                                        "Mute button, Invisible Speaker System, Audyssey EQ", "Net TV", 
                                                        "NICAM", "No", "NTSC", "Panel with stand - 44.3 in x 13.7 in x 30.2 in x 46.3 lbs", 
                                                        "Panel with stand - 50.4 in x 14 in x 33.5 in x 71 lbs Panel without stand - 50.4 in x 1.1 in x 30.4 in x 59.3 lbs", 
                                                        "Panel without stand - 30.5 in x 1.4 in x 18.9 in", "Panel without stand - 39.1 in x 3.5 in x 25.3 in x 33.5 lbs", 
                                                        "PLED2243A", "Power adapter", "Power supply - internal", "ProScan", 
                                                        "PROSCAN PLED2243A 22\" 1080p 60Hz LED HDTV - PTR-PLED2243A", 
                                                        "REGZA E Series", "REGZA G Series", "REGZA SL Series", "Samsung", 
                                                        "Samsung UN46D7900 46-Inch 1080p 240HZ 3D LED HDTV Bundle with 3D Starter Kit and 3D Blu-Ray Player (Silver)", 
                                                        "SD Memory Card", "Sleep", "Sleep Timer; Auto Program", "Sony", 
                                                        "Sony KDL40W600B 40-Inch 1080p 60Hz Smart LED TV (2014 Model)", 
                                                        "Stereo", "Tabletop", "TheaterWide", "Toshiba", "Toshiba 29L1350U 29\" 720p LED-LCD TV - 16:9 - HDTV - Audyssey EQ, Audyssey ABX - 3 x HDMI - USB - Media PlayerShow More +", 
                                                        "Toshiba 32SL400U 32-Inch 720p Ultra Thin LED HDTV, Black", "Toshiba 40E200U 40-Inch 1080p LCD HDTV (Black Gloss)", 
                                                        "Toshiba 46G300U 46-Inch 1080p 120 Hz LCD HDTV (Black Gloss)", 
                                                        "Toshiba 55WX800U - 55\" LED TV - 1080p (FullHD)", "TV Wall Mount Kit for Samsung 40\" TV for UN40H6350, UN40H5500, UN40FH6030, UN40H5203, UN40H6400, UN40H4005, UN40H6203, UN40H5003, UN40EH5300, UN40EH5000, UN40HU6950, RM40D, UN40F6300, UN40H5203AF, UN40H5203AFXZA, UN40F5500, UN40EH6000, UN40F6400, UN40ES6100, DB40D, UN40ES6500, UN40B6000, LN40A550, H40B, HG40NA577LF, LN40E550, UN40C6300, LN40C530, UN40H5003AF, UA-40H5100, DM40D, LN40D630, MD40C, LN40D550, UN40EH6030, PE40C, LN40B530, LN40B650, LN40D503, ME40C, LN-S4052D, LN40A530, UN40EH5300FXZAB, LNT4065F, LNT4061F, LN40A750, LNT4071F, LNT4069FX Tvs. Includes 32\"-60\" Flat TV Wall Mount + 2 High Speed Gold Plated HDMI Cables + TV Cleaner Set + Microfiber Cleaning Cloth.", 
                                                        "Ultra Slim Flat Wall Mount for Samsung 55\" for (ME55C, UN55H6350, UN55H6400, UN55H7150, UN55FH6030, UN55F8000, UN55HU8550, UN55H8000, UN55F9000, UN55H6203, UN55HU9000, UN55F7500, UN55FH6200, UN55FH6003, UN55HU7250, UN55FH6003, UN55F7100, UN55HU8700, UN55HU6950, UN55F6300, UN55F6400, UN55D8000, UN55ES6100, UN55EH6000, UN55ES7500, UN55ES8000, UN55D7000, MD55C, 55UB8500, UN55ES6600, UN55ES7100, UN55B8000, UN55C6300, UN55C7000, UN55C8000, UN55ES6500, UN55D6300, UN55D6400, UN55ES6580, UN55EH6070, UN55B7000, UN55B6000, UN55HU6840, 55UB8200, UN55D6500, UN55ES6003, UN55D6000, UN55B8500, LN55C630, UN55D6050, UN55HU7200, UN55HU7200H, UN55D7050, UN55ES6150, UN55HU6950FXZA, UN55JS9000, UE55D, UN55HU8550FXZA, UN55HU7250FXZA) Tvs. Includes Tilt Wall Mount + 2 HDMI Cables + TV Cleaner Set + Microfiber Cleaning Cloth", 
                                                        "UN40H5003", "UN46D7900", "UN55ES6003", "UN55FH6003", "VGA (HD-15)", 
                                                        "VGA (HD-15), HDMI", "Vizio", "Vizio M220NV M221NV HDTV Vesa Mount Adapter 1712-0101-7920", 
                                                        "Wi-Fi, LAN", "With stand", "Without stand", "yes", "Yes", "Yes ( 3D glasses sold separately )", 
                                                        "Yes , 1 port(s)"), class = "factor")), 
              .Names = c("GTIN", "Key", 
                         "Value"), class = "data.frame", row.names = c(NA, -300L))

df = setDF(df)

############################
dfa = aggregate(df[3], df[-3], FUN = function(X) paste(unique(X), collapse=", "))
test1 = spread(dfa, Key, Value, fill = '')

print("My solution")
print(microbenchmark(aggregate(df[3], df[-3], FUN = function(X) paste(unique(X), collapse=", ")),spread(dfa, Key, Value, fill = " ")),times=100)
cat("\n")

############################
#akrun's solution #1
#Dcast and data.table
test2 = dcast(setDT(df), GTIN~Key, value.var='Value', 
              fun.aggregate=function(x) toString(unique(x)))

############################
#akrun's solution #2
#dplyr 
test3 = df %>%
  group_by(GTIN, Key) %>% 
  summarise(Value=toString(unique(Value))) %>% 
  spread(Key, Value, fill='')

#timing
print("dcast and aggregate")
print(microbenchmark(dcast(setDT(df), GTIN~Key, value.var='Value', 
                           fun.aggregate=function(x) toString(unique(x))),times=100))
cat("\n")

print("dplyr and tidyr")
print(microbenchmark(df %>%
                       group_by(GTIN, Key) %>% 
                       summarise(Value=toString(unique(Value))) %>% 
                       spread(Key, Value, fill=''),times=100))

结果:

[1] "My solution"
Unit: milliseconds
                                                                          expr       min        lq      mean    median
 aggregate(df[3], df[-3], FUN = function(X) paste(unique(X), collapse = ", ")) 40.423792 41.549188 44.496005 43.840846
                                           spread(dfa, Key, Value, fill = " ")  1.421713  1.533474  1.717194  1.626987
       uq       max neval cld
 44.84422 75.155029   100   b
  1.71860  4.422517   100  a 

[1] "dcast and aggregate"
Unit: milliseconds
                                                                                               expr      min       lq
 dcast(setDT(df), GTIN ~ Key, value.var = "Value", fun.aggregate = function(x) toString(unique(x))) 45.48038 47.35578
    mean  median       uq      max neval
 50.2578 48.9037 50.40563 67.42152   100

[1] "dplyr and tidyr"
Unit: milliseconds
                                                                                                             expr      min
 df %>% group_by(GTIN, Key) %>% summarise(Value = toString(unique(Value))) %>%      spread(Key, Value, fill = "") 40.70889
       lq     mean   median       uq      max neval
 41.41405 43.97719 43.06974 43.97275 93.33509   100

1000 行的结果显示解决方案 #3 比 #1 快 10%。行数越多 - 越快

于 2015-07-08T17:51:56.773 回答