5

我已经使用 .CVS 文件加载了一个表

 mydata = read.csv("CS2Data.csv")  # read csv file

这给了我:

 mydata

       Date   DCM  TMUS   SKM   RCI  SPOK
1  11/2/2015 -0.88 -2.16 -1.04  1.12  0.67
2  12/1/2015  1.03  3.26 -2.25 -5.51 -0.23
3   1/4/2016  1.94  1.29  0.13 -1.16  0.11
4   2/1/2016 -0.41 -2.94  0.99  3.93 -0.19
5   3/1/2016 -0.68  1.27 -0.79 -2.06 -0.33
6   4/1/2016  1.82  1.22 -0.05 -1.27 -0.46
7   5/2/2016 -0.36  3.40  0.63 -2.77  0.46
8   6/1/2016  1.94  0.77  0.51 -0.26  1.66
9   7/1/2016  0.12  3.18  1.84 -1.34 -0.67
10  8/1/2016 -1.83 -0.20 -1.10 -0.90 -1.91
11  9/1/2016  0.05  0.31  1.11  0.80  1.17
12 10/3/2016 -0.02  3.19 -0.81 -4.00  0.29

我想找到每个月(行)的 5 个数字中任意 3 个的所有组合。我尝试combn根据我在这里找到的答案使用该功能:

combin <- combn(mydata, 3, rowSums, simplify = TRUE)

但这给了我错误-

“FUN(x[a], ...) 中的错误:‘x’必须是数字”

接下来我尝试分别命名每一列

DCM=mydata[2]
TMUS=mydata[3]
SKM=mydata[4]
RCI=mydata[5]
SPOK=mydata[6]

然后使用:

stock_ret <- data.table(DCM, TMUS,SKM,RCI,SPOK)
combin <- combn(stock_ret, 3, rowSums, simplify = TRUE)

我怀疑有一种更简单的方法可以直接使用 .CVS 文件中的列标题来执行此操作,但我被卡住了。

4

2 回答 2

2

获取除第一列之外的所有日期(问题中错误的来源):

mydata <- mydata[,-1]

用于combn计算一次选择 3 列:

combn(mydata, m = 3, FUN = rowSums, simplify = TRUE)

例子:

> mydata <- iris[1:10,1:4]
> combn(mydata, m = 3, FUN = rowSums, simplify = TRUE)
      [,1] [,2] [,3] [,4]
 [1,] 10.0  8.8  6.7  5.1
 [2,]  9.3  8.1  6.5  4.6
 [3,]  9.2  8.1  6.2  4.7
 [4,]  9.2  7.9  6.3  4.8
 [5,] 10.0  8.8  6.6  5.2
 [6,] 11.0  9.7  7.5  6.0
 [7,]  9.4  8.3  6.3  5.1
 [8,]  9.9  8.6  6.7  5.1
 [9,]  8.7  7.5  6.0  4.5
[10,]  9.5  8.1  6.5  4.7
于 2016-11-10T21:31:09.560 回答
1

申请任何数据框的一般逻辑:

set.seed(1) # for reproducibility

# create a dataframe frame

df <- as.data.frame(matrix(c(rnorm(10), rnorm(10), rnorm(10),rnorm(10),rnorm(10)), nrow=10))
df # show it
 #              V1          V2          V3          V4         V5
 #    1  -0.6264538  1.51178117  0.91897737  1.35867955 -0.1645236
 #   2   0.1836433  0.38984324  0.78213630 -0.10278773 -0.2533617
 #   ...
 #   10 -0.3053884  0.59390132  0.41794156  0.76317575  0.8811077

combinations <- combn(5,3) #123 124 125 ...345

# all combination of any 3 of the 5 columns
lapply(1:dim(combinations)[[2]], function(x) {df[combinations[,x]]})

# sums of all combination of any 3 of the 5 columns
lapply(1:dim(combinations)[[2]], function(x) {rowSums(df[combinations[,x]])})

# use "matrix(unlist(...), nrow)" for better presentation and easier later handlings
matrix(unlist(lapply(1:dim(combinations)[[2]], function(x) {rowSums(df[combinations[,x]])})),nrow=nrow(df))

针对提问者具体数据的解决方法:

mydata <- as.data.frame(matrix(c(
11/2/2015, -0.88, -2.16, -1.04, 1.12, 0.67,
12/1/2015, 1.03, 3.26, -2.25, -5.51, -0.23,
1/4/2016, 1.94, 1.29, 0.13, -1.16, 0.11,
2/1/2016, -0.41, -2.94,  0.99, 3.93, -0.19,
3/1/2016, -0.68, 1.27, -0.79, -2.06, -0.33,
4/1/2016, 1.82, 1.22, -0.05, -1.27, -0.46,
5/2/2016, -0.36, 3.40, 0.63, -2.77, 0.46,
6/1/2016, 1.94, 0.77, 0.51, -0.26, 1.66,
7/1/2016, 0.12, 3.18, 1.84, -1.34, -0.67,
8/1/2016, -1.83, -0.20, -1.10, -0.90, -1.91,
9/1/2016, 0.05, 0.31, 1.11,  0.80, 1.17,
10/3/2016, -0.02, 3.19, -0.81, -4.00, 0.29), nrow=12, byrow=TRUE))
names(mydata) <- c("Date", "DCM", "TMUS", "SKM", "RCI", "SPOK") # name the columns
mydata # show the dataframe
#           Date   DCM  TMUS   SKM   RCI  SPOK
# 1  0.0027295285 -0.88 -2.16 -1.04  1.12  0.67
# 2  0.0059553350  1.03  3.26 -2.25 -5.51 -0.23
# ............................................
# 12 0.0016534392 -0.02  3.19 -0.81 -4.00  0.29

combinations <- combn(5,3) #123 124 125 ...345

# all combination of any 3 of the 5 columns
lapply(1:dim(combinations)[[2]], function(x) {mydata[,2:6][combinations[,x]]})

# sums of all combination of any 3 of the 5 columns
lapply(1:dim(combinations)[[2]], function(x) {rowSums(mydata[,2:6][combinations[,x]])})

# use "matrix(unlist(...), nrow)" for better presentation and easier later handlings
matrix(unlist(lapply(1:dim(combinations)[[2]], function(x) {rowSums(mydata[,2:6][combinations[,x]])})),nrow=nrow(mydata))

#       [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10]
# [1,] -4.08 -1.92 -2.37 -0.80 -1.25  0.91 -2.08 -2.53 -0.37  0.75
# [2,]  2.04 -1.22  4.06 -6.73 -1.45 -4.71 -4.50  0.78 -2.48 -7.99
# [3,]  3.36  2.07  3.34  0.91  2.18  0.89  0.26  1.53  0.24 -0.92
# ...............................................................
# [12,]  2.36 -0.83  3.46 -4.83 -0.54 -3.73 -1.62  2.67 -0.52 -4.52

正确执行。
检查,例如,第 10 种情况;0.75=sum(-1.04, 1.12, 0.67) -7.99=sum(-2.25, -5.51, -0.23) ...

于 2016-11-10T21:39:28.223 回答