2

Please bear with me since I'm a first time poster. I am attempting to take time series data from long format to wide format, but reshape2 (and reshape) are not outputting what I want. I am attempting to use cast or dcast to make my data into the following format

id State contract.type Q1.2011 Q2.2011 ... Q2.2014

The source data is titled Med and is in the following format:

    > dput(head(Med))
structure(list(State = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("AK", 
"AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA", "HI", 
"IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI", 
"MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV", 
"NY", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", 
"VA", "VT", "WA", "WI", "WV", "WY"), class = "factor"), Rebate.Category = structure(c(1L, 
1L, 1L, 1L, 1L, 1L), .Label = c("FFS", "MCO"), class = "factor"), 
    Qtr.Yr = structure(1:6, .Label = c("Q1.2011", "Q2.2011", 
    "Q3.2011", "Q4.2011", "Q1.2012", "Q2.2012", "Q3.2012", "Q4.2012", 
    "Q1.2013", "Q2.2013", "Q3.2013", "Q4.2013", "Q1.2014", "Q2.2014", 
    "Q3.2014", "Q4.2014", "Q1.2015", "Q2.2015"), class = c("ordered", 
    "factor")), NDC = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("30", 
    "64"), class = "factor"), Medicaid.Units = structure(c(290L, 
    306L, 320L, 228L, 162L, 320L), .Label = .Label = c("0.00", "4,010.00", 
    "4,076.00", "4,080.00", "4,084.00", "4,081.00", "4,089.00", 
    "4,091.00", "4,446.00", "4,440.00", "4,100.00", "4,104.00", 
    "4,151.00", "4,160.00", "4,161.00", "4,410.00", "4,414.00", 
    "4,418.00", "4,444.00", "4,451.00", "4,480.00", "4,488.00", 
    "4,440.00", "4,488.00", "4,500.00", "4,510.00", "4,558.00", 
    "4,560.00", "4,571.00", "4,600.00", "4,604.00", "4,610.00", 
    "4,678.00", "4,680.00", "4,740.00", "4,770.00", "4,800.00", 
    "4,850.00", "4,860.00", "4,910.00", "4,946.00", "4,960.00", 
    "4,971.00", "40,014.00", "40,440.00", "40,484.00", "40,166.00", 
    "40,180.00", "40,480.00", "40,500.00", "40,618.00", "40,740.00", 
    "40,770.00", "40,817.00", "404,460.00", "409,010.00", "44,406.00", 
    "44,440.00", "44,460.00", "44,510.00", "44,560.00", "44,580.00", 
    "44,700.00", "44,760.00", "44,841.00", "44,880.00", "44,940.00", 
    "44,948.00", "41,080.00", "41,400.00", "41,556.00", "41,600.00", 
    "41,780.00", "41,900.00", "41,960.00", "410.00", "414.00", 
    "44,010.00", "44,440.00", "44,751.00", "44,860.00", "44,880.00", 
    "44,040.00", "44,180.00", "44,880.00", "44,891.00", "45,000.00", 
    "45,484.00", "45,740.00", "45.50", "450.00", "451.00", "46,080.00", 
    "46,101.00", "46,160.00", "46,441.00", "46,560.00", "47,580.00", 
    "48,046.00", "48,060.00", "48,178.00", "48,846.00", "48,949.44", 
    "480.00", "49,440.00", "1,018.00", "1,046.00", "1,040.00", 
    "1,080.00", "1,400.00", "1,441.00", "1,460.00", "1,110.00", 
    "1,180.00", "1,446.00", "1,440.00", "1,491.00", "1,400.00", 
    "1,401.00", "1,460.00", "1,490.00", "1,491.00", "1,541.00", 
    "1,510.00", "1,511.00", "1,571.00", "1,640.00", "1,648.00", 
    "1,700.00", "1,741.00", "1,741.00", "1,760.00", "1,810.00", 
    "1,841.00", "1,846.00", "1,880.00", "1,896.00", "1,941.00", 
    "1,940.00", "1,941.00", "1,960.00", "1.00", "10,760.00", 
    "14,400.00", "14,600.00", "14,660.00", "144.11", "141,440.00", 
    "146.00", "11,680.00", "14,151.00", "14,510.00", "14,700.00", 
    "14,810.00", "14,891.00", "14.00", "140.00", "141.00", "168.68", 
    "17,460.00", "17,468.00", "170.00", "19,440.00", "4,000.00", 
    "4,004.00", "4,060.00", "4,071.00", "4,410.00", "4,450.00", 
    "4,480.00", "4,496.00", "4,188.00", "4,400.00", "4,441.00", 
    "4,410.00", "4,480.00", "4,600.00", "4,710.00", "4,711.00", 
    "4,840.00", "4,900.00", "4,941.00", "4,960.00", "4,964.00", 
    "40,741.00", "40,904.00", "40.00", "400.00", "404.00", "441.00", 
    "41,446.00", "44,000.00", "44,060.00", "44,746.00", "44,860.00", 
    "44,980.00", "45,400.00", "460.00", "47,160.00", "47,740.00", 
    "494.49", "4,010.00", "4,041.00", "4,080.00", "4,441.00", 
    "4,160.00", "4,410.00", "4,440.00", "4,610.00", "4,718.00", 
    "4,740.00", "4,768.00", "4,800.00", "4,910.00", "4,964.00", 
    "410.00", "415.44", "44,480.00", "44,740.00", "446.00", "444.00", 
    "45,418.00", "45,688.00", "46,418.00", "47,141.00", "47,180.00", 
    "48,410.00", "480.00", "484.00", "49,946.00", "5,040.00", 
    "5,400.00", "5,414.00", "5,460.00", "5,110.00", "5,180.00", 
    "5,440.00", "5,444.00", "5,441.00", "5,640.00", "5,760.00", 
    "5,794.00", "5,810.00", "5,946.00", "5,970.00", "50,810.00", 
    "51,480.00", "510.00", "540.00", "55,540.00", "56,110.00", 
    "57,661.00", "580.07", "588.91", "6,000.00", "6,060.00", 
    "6,410.00", "6,480.00", "6,140.00", "6,180.00", "6,400.00", 
    "6,480.00", "6,600.00", "6,646.00", "6,690.00", "6,710.00", 
    "6,900.00", "6.00", "60.00", "600.00", "64,044.00", "614.00", 
    "64,100.00", "660.00", "67,481.00", "690.00", "7,100.00", 
    "7,410.00", "7,451.00", "7,480.00", "7,500.00", "7,680.00", 
    "7,740.00", "7,760.00", "7,800.00", "7,860.00", "7,980.00", 
    "70,086.00", "70,680.00", "710.00", "74,800.00", "74,911.00", 
    "748.48", "751.00", "780.00", "784.00", "8,040.00", "8,460.00", 
    "8,510.00", "8,541.00", "8,584.00", "8,640.00", "8,740.00", 
    "8,880.00", "8,940.00", "840.00", "845.00", "9,000.00", "9,046.00", 
    "9,140.00", "9,400.00", "9,410.00", "9,480.00", "9,600.00", 
    "9,646.00", "9,660.00", "9,710.00", "90,140.00", "90.00", 
    "900.00", "905.00", "91,814.00", "960.00", "984.00", "996.77", 
    "0.50", "4,019.00", "4,044.00", "4,440.00", "4,480.00", "4,144.00", 
    "4,180.00", "4,400.00", "4,414.00", "4,468.00", "4,504.00", 
    "4,546.00", "4,551.00", "4,648.00", "4,710.00", "4,780.00", 
    "4,840.00", "4,980.00", "4.78", "40,080.00", "40,440.00", 
    "40,680.00", "40,851.00", "40,941.00", "40,914.00", "40,976.00", 
    "40,998.00", "40.00", "407,460.00", "44,044.00", "44,490.00", 
    "44,116.00", "44,176.00", "44,640.00", "44,810.00", "41,000.00", 
    "41,480.00", "41,140.00", "41,410.00", "41,480.00", "41,481.00", 
    "41,606.00", "41,841.00", "41,880.00", "41,908.00", "44,440.00", 
    "44,468.00", "44,680.00", "44,110.00", "44,446.00", "44,440.00", 
    "46,410.00", "46,680.00", "460.46", "465.00", "47,640.00", 
    "47,700.00", "47,810.00", "48,600.00", "48,900.00", "49,680.00", 
    "491.00", "495.00", "499.58", "1,048.00", "1,081.00", "1,410.00", 
    "1,456.00", "1,474.00", "1,476.00", "1,100.00", "1,114.00", 
    "1,184.00", "1,186.00", "1,456.00", "1,441.00", "1,484.00", 
    "1,580.00", "1,844.00", "1,941.00", "1.66", "10,150.00", 
    "10,181.00", "14,144.00", "11,440.00", "14,110.00", "14,404.00", 
    "144.09", "15,764.00", "154.45", "155.00", "16,580.00", "18,500.00", 
    "19,460.00", "19,510.00", "4,486.00", "4,108.00", "4,140.00", 
    "4,168.00", "4,408.00", "4,460.00", "4,464.00", "4,471.00", 
    "4,540.00", "4,660.00", "4,781.00", "40,061.00", "40,484.00", 
    "405.00", "44,447.00", "44,900.00", "441.00", "44,544.00", 
    "45,468.00", "454.87", "461.00", "47,080.00", "471.44", "484.00", 
    "496.78", "4,161.00", "4,480.00", "4,541.00", "4,560.00", 
    "4,571.00", "40,160.00", "40.00", "41,144.00", "414.00", 
    "418.00", "44,100.00", "44,400.00", "44,504.00", "46,500.00", 
    "46,860.00", "46,980.00", "47,445.00", "47,641.00", "47,880.00", 
    "470.00", "48,900.00", "484.00", "488.00", "49,740.00", "5,046.00", 
    "5,400.00", "5,484.00", "5,804.00", "5,880.00", "5,948.00", 
    "504.00", "51,065.00", "55,570.00", "55,680.00", "56,510.00", 
    "564.00", "57,468.00", "57,180.00", "584.00", "59,510.00", 
    "6,164.00", "6,460.00", "6,410.00", "6,660.00", "6,756.00", 
    "6,941.00", "6,948.00", "656.00", "680.00", "696.00", "7,080.00", 
    "7,441.00", "7,440.00", "7,146.00", "7,160.00", "7,456.00", 
    "7,560.00", "7,998.00", "744.00", "748.00", "8,400.00", "8,180.00", 
    "8,446.00", "8,446.00", "8,514.00", "8,580.00", "8,581.00", 
    "8,674.00", "8,700.00", "8,760.00", "8,810.00", "84,140.00", 
    "84,900.00", "840.00", "84.08", "87,410.00", "880.00", "884.00", 
    "89,700.00", "9,060.00", "9,064.00", "9,444.00", "9,664.00", 
    "9,780.00", "9,840.00", "9,900.00", "964.00", "0.89", "0.96", 
    "4,090.00", "4,451.00", "4,470.00", "4,106.00", "4,140.00", 
    "4,484.00", "4,418.00", "4,444.00", "4,444.00", "4,588.00", 
    "4,681.00", "4,718.00", "4,856.00", "4,891.00", "4,944.00", 
    "4.50", "40,068.00", "40,070.00", "40,488.00", "40,149.00", 
    "40,160.00", "40,410.00", "40,496.00", "40,596.00", "40,768.00", 
    "40,860.00", "40,916.00", "44,087.00", "44,400.00", "44,460.00", 
    "44,596.00", "44,646.00", "44,858.00", "44,881.00", "441.00", 
    "41,444.00", "41,840.00", "44,440.00", "44,100.00", "44,500.00", 
    "446.00", "44,496.00", "44,960.00", "444.00", "45,448.00", 
    "45,140.00", "45,456.00", "45,780.00", "45.00", "46,010.00", 
    "46,444.00", "46,941.00", "47,040.00", "47,466.00", "47,110.00", 
    "48,000.00", "48,049.00", "48,640.00", "48,645.00", "49,411.00", 
    "1,041.00", "1,041.00", "1,044.00", "1,094.00", "1,404.00", 
    "1,480.00", "1,171.00", "1,181.00", "1,188.00", "1,404.00", 
    "1,441.00", "1,546.00", "1,670.00", "1,818.00", "10.00", 
    "14,960.00", "11,010.00", "11,480.00", "11,488.00", "11,910.00", 
    "14,181.00", "14,490.00", "14,644.00", "15,100.00", "16,180.00", 
    "17,446.00", "18,946.00", "180.00", "19,460.00", "19,688.00", 
    "4,418.00", "4,481.00", "4,780.00", "4,946.00", "4.00", "41,040.00", 
    "41,400.00", "4,440.00", "4,100.00", "4,500.00", "4,580.00", 
    "4,680.00", "4,840.00", "4,946.00", "4,960.00", "4,980.00", 
    "4,981.00", "45,400.00", "476.00", "484.00", "486.00", "496.00", 
    "5,470.00", "5,510.00", "5,580.00", "5,688.00", "5,700.00", 
    "5,764.00", "5,940.00", "5,980.00", "54,491.00", "54,611.00", 
    "548.00", "551.00", "56.00", "6,078.00", "6,090.00", "6,446.00", 
    "6,464.00", "6,461.00", "6,474.60", "6,616.00", "610.00", 
    "611.00", "68,040.00", "7,010.00", "7,044.00", "7,058.00", 
    "7,084.00", "7,441.00", "7,440.00", "7,470.00", "7,646.00", 
    "7,676.00", "7,685.00", "7,696.58", "7,871.00", "714.00", 
    "718.00", "746.00", "75,540.00", "8,154.00", "8,761.00", 
    "8,866.00", "8.00", "9,040.00", "9,480.00", "9,414.47", "9,460.00", 
    "9,541.00", "9,540.00", "9,788.00", "9,810.00", "944.00", 
    "944.00", "4,064.00", "4,140.00", "4,450.00", "4,476.00", 
    "4,496.00", "4,591.00", "4,786.00", "4,941.00", "4,951.00", 
    "4.01", "40,010.00", "40,018.00", "40,196.00", "40,560.00", 
    "40,610.00", "40,764.00", "40,800.00", "40,980.00", "44,040.00", 
    "44,441.00", "44,591.00", "41,541.00", "44,441.00", "44,414.00", 
    "44,700.00", "44,950.00", "44,400.00", "44,704.00", "440.00", 
    "45,491.00", "45,450.00", "45,510.00", "45,788.00", "46,480.00", 
    "46,508.00", "46,571.00", "46,614.00", "46,860.00", "47,884.00", 
    "48,418.00", "49,500.00", "1,101.00", "1,451.00", "1,551.00", 
    "1,584.00", "1,676.00", "1,708.00", "1,984.00", "14,480.00", 
    "14,710.00", "14,400.00", "14,460.00", "15,500.00", "160.00", 
    "17,540.00", "17,847.00", "18,480.00", "4,068.00", "4,490.00", 
    "4,494.00", "4,496.00", "4,648.00", "4,704.00", "4,748.00", 
    "4,760.00", "4,804.00", "4,840.60", "4,867.76", "4,904.19", 
    "4,947.47", "41,880.00", "446.00", "46,691.00", "48,811.00", 
    "4,050.00", "4,409.81", "4,108.00", "4,500.45", "4,860.00", 
    "5,408.00", "5,441.45", "5,586.00", "5,944.00", "5,991.00", 
    "5.10", "6,061.00", "6,541.00", "6,771.00", "6,776.00", "6,780.00", 
    "641.00", "7,086.00", "7,444.00", "7,456.00", "7,541.00", 
    "7,610.00", "7,614.00", "7,644.00", "7,910.00", "7.76", "716.00", 
    "740.00", "8,188.00", "8,696.00", "8,740.00", "8,784.00", 
    "8,850.00", "860.00", "88.00", "896.00", "9,410.00", "9,450.00", 
    "9,141.00", "9,168.00", "9,404.00", "9,471.00", "9,661.00", 
    "9,964.00"), class = "factor"), id = c(1, 1, 1, 1, 1, 1)), .Names = c("State", 
"Rebate.Category", "Qtr.Yr", "NDC", "Medicaid.Units", "id"), row.names = c("2185", 
"2184", "2182", "2180", "1503", "1501"), class = "data.frame")

id is just a rownumber. I subset by NDC into two subsets, Med1 and Med2. I then use the following code to cast. If I cast with id included on the left hand side of the equation, I get some of the right numbers. For instance, AK has 120 in Q4.2014, but instead of putting the data in with one row for each state/rebate category pair, quarter by quarter, I have one good number per line and NAs for the rest of the time points. If I cast without id on the LHS, then I get an integer between 0 and 5 filled out for the entire sheet in every cell.

 TMed1<-dcast(Med1,id+Rebate.Category+State~Qtr.Yr,value.var="Medicaid.Units",drop=FALSE)

The output is

head(TMed1)
  id Rebate.Category State Q1.2011 Q2.2011 Q3.2011 Q4.2011 Q1.2012 Q2.2012
  1  1      FFS       AK    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
  2  1      FFS       AL    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
  3  1      FFS       AR    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
  4  1      FFS       AZ    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
  5  1      FFS       CA    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
  6  1      FFS       CO    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
Q3.2012 Q4.2012 Q1.2013 Q2.2013 Q3.2013 Q4.2013 Q1.2014 Q2.2014 Q3.2014
1    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
2    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
3    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
4    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
5    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
6    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>    <NA>
  Q4.2014 Q1.2015 Q2.2015
1  120.00    <NA>    <NA>
2    <NA>    <NA>    <NA>
3    <NA>    <NA>    <NA>
4    <NA>    <NA>    <NA>
5    <NA>    <NA>    <NA>
6    <NA>    <NA>    <NA>

I also used the plyr id function to create a unique id for each combo of state and rebate combination as well, but I end up with the low integers again. Does anybody have any idea how to get the values of the medicaid column to go quarter by quarter for the unique id combos?

EDIT: Substituted original sample for dput sample recommended

4

2 回答 2

0

我找到了答案!答案是用户 1362215、BondedDust 的建议和修补的组合。答案是 fun.aggregate 需要设置为 sum,而且从 excel 导出的 csv 以及我正在阅读的 csv 会导致 Medicaid.Units 被作为一个因素被读入,而它应该被读入整数。我转到 Excel 并重新导出为数字字段(不带逗号),它以整数形式读入,并与 user1362215 的代码一起正常工作。以前,只要数字有逗号,他/她的代码就会导致生成 NA,否则单元格是正确的并且在正确的位置。删除逗号并使用 fun.aggregate=sum 解决了这个问题。

谢谢大家!!!(如果我的帐户足够大可以拥有该特权,我会在适当的情况下投票)

于 2015-04-28T20:59:08.317 回答
0

你需要做两件事:

  1. 转换Medicaid.Units为数字。目前它是一个因子,有 817 个级别。

  2. 在每个州/季度/类别/id有多个条目的情况下设置聚合函数。

    Med$Medicaid.Units = as.numeric(as.character(Med$Medicaid.Units)) TMed1<-dcast(Med1,id+Rebate.Category+State~Qtr.Yr,value.var="Medicaid.Units", drop=FALSE,fun.aggregate = sum)

于 2015-04-28T15:20:08.200 回答