0

嗨,我有一个由 Python 生成的字符串,我需要读入 R 来分析它。

下面两个字符串之间的唯一区别是长度(列表中的元素数)。并且 R 无法成功读取较长的内容。

textWork <- "[('08/10/2013 01:50:16 AM INFO', 'product1', '', '61.12000', '1'), ('08/10/2013 02:04:23 AM INFO', 'product1', '', '61.12000', '1'), ('08/11/2013 02:29:46 AM INFO', 'product1', '', '61.12000', '1'), ('08/12/2013 12:58:43 AM INFO', 'product1', '', '61.12000', '1'), ('08/12/2013 01:12:18 AM INFO', 'product1', '', '61.12000', '1'), ('08/13/2013 01:14:57 AM INFO', 'product1', '', '61.12000', '1'), ('08/14/2013 02:01:42 AM INFO', 'product1', '', '61.12000', '1'), ('08/14/2013 02:04:43 AM INFO', 'product1', '', '61.12000', '1'), ('08/15/2013 01:09:23 AM INFO', 'product1', '', '61.12000', '1'), ('08/15/2013 01:22:50 AM INFO', 'product1', '', '61.12000', '1'), ('08/16/2013 12:56:52 AM INFO', 'product1', '', '61.12000', '1'), ('08/16/2013 01:09:38 AM INFO', 'product1', '', '61.12000', '1'), ('08/17/2013 12:54:20 AM INFO', 'product1', '', '61.12000', '1'), ('08/17/2013 01:07:51 AM INFO', 'product1', '', '61.12000', '1'), ('08/18/2013 12:54:14 AM INFO', 'product1', '', '61.12000', '1'), ('08/18/2013 01:09:37 AM INFO', 'product1', '', '61.12000', '1'), ('08/19/2013 12:54:13 AM INFO', 'product1', '', '61.12000', '1'), ('08/19/2013 01:10:06 AM INFO', 'product1', '', '61.12000', '1'), ('08/20/2013 02:09:17 AM INFO', 'product1', '', '61.12000', '1'), ('08/20/2013 02:25:56 AM INFO', 'product1', '', '61.12000', '1'), ('08/21/2013 01:21:03 AM INFO', 'product1', '', '61.12000', '1'), ('08/21/2013 01:34:59 AM INFO', 'product1', '', '61.12000', '1'), ('08/22/2013 01:32:54 AM INFO', 'product1', '', '61.12000', '1'), ('08/22/2013 01:55:25 AM INFO', 'product1', '', '61.12000', '1'), ('08/23/2013 01:23:44 AM INFO', 'product1', '', '61.12000', '1'), ('08/23/2013 01:41:08 AM INFO', 'product1', '', '61.12000', '1'), ('08/24/2013 01:17:46 AM INFO', 'product1', '', '61.12000', '1'), ('08/24/2013 01:31:12 AM INFO', 'product1', '', '61.12000', '1'), ('08/25/2013 12:57:21 AM INFO', 'product1', '', '61.12000', '1'), ('08/25/2013 01:10:55 AM INFO', 'product1', '', '61.12000', '1'), ('08/26/2013 12:56:37 AM INFO', 'product1', '', '61.12000', '1'), ('08/26/2013 01:11:03 AM INFO', 'product1', '', '61.12000', '1'), ('08/27/2013 01:00:15 AM INFO', 'product1', '', '61.12000', '1'), ('08/27/2013 01:13:09 AM INFO', 'product1', '', '61.12000', '1'), ('08/28/2013 01:07:21 AM INFO', 'product1', '', '61.12000', '1'), ('08/28/2013 01:24:13 AM INFO', 'product1', '', '61.12000', '1'), ('08/29/2013 12:57:08 AM INFO', 'product1', '', '61.12000', '1'), ('08/29/2013 01:10:57 AM INFO', 'product1', '', '61.12000', '1'), ('08/30/2013 12:56:22 AM INFO', 'product1', '', '61.12000', '1'), ('08/30/2013 01:10:43 AM INFO', 'product1', '', '61.12000', '1'), ('08/31/2013 12:53:37 AM INFO', 'product1', '', '61.12000', '1'), ('08/31/2013 01:08:01 AM INFO', 'product1', '', '61.12000', '1'), ('09/01/2013 12:52:11 AM INFO', 'product1', '', '61.12000', '1'), ('09/01/2013 01:06:40 AM INFO', 'product1', '', '61.12000', '1'), ('09/02/2013 12:50:31 AM INFO', 'product1', '', '61.12000', '1'), ('09/02/2013 01:05:16 AM INFO', 'product1', '', '61.12000', '1'), ('09/03/2013 12:54:07 AM INFO', 'product1', '', '61.12000', '1'), ('09/03/2013 01:09:32 AM INFO', 'product1', '', '61.12000', '1'), ('09/04/2013 01:16:11 AM INFO', 'product1', '', '61.12000', '1'), ('09/05/2013 12:59:34 AM INFO', 'product1', '', '61.12000', '1'), ('09/06/2013 12:55:00 AM INFO', 'product1', '', '61.12000', '1'), ('09/07/2013 01:13:40 AM INFO', 'product1', '', '61.12000', '1'), ('09/09/2013 01:07:43 AM INFO', 'product1', '', '61.12000', '1')]"

textNotWork <- "[('08/10/2013 01:50:16 AM INFO', 'product1', '', '61.12000', '1'), ('08/10/2013 02:04:23 AM INFO', 'product1', '', '61.12000', '1'), ('08/11/2013 02:29:46 AM INFO', 'product1', '', '61.12000', '1'), ('08/12/2013 12:58:43 AM INFO', 'product1', '', '61.12000', '1'), ('08/12/2013 01:12:18 AM INFO', 'product1', '', '61.12000', '1'), ('08/13/2013 01:14:57 AM INFO', 'product1', '', '61.12000', '1'), ('08/10/2013 01:50:16 AM INFO', 'product1', '', '61.12000', '1'), ('08/10/2013 02:04:23 AM INFO', 'product1', '', '61.12000', '1'), ('08/11/2013 02:29:46 AM INFO', 'product1', '', '61.12000', '1'), ('08/12/2013 12:58:43 AM INFO', 'product1', '', '61.12000', '1'), ('08/12/2013 01:12:18 AM INFO', 'product1', '', '61.12000', '1'), ('08/13/2013 01:14:57 AM INFO', 'product1', '', '61.12000', '1'), ('08/10/2013 01:50:16 AM INFO', 'product1', '', '61.12000', '1'), ('08/10/2013 02:04:23 AM INFO', 'product1', '', '61.12000', '1'), ('08/11/2013 02:29:46 AM INFO', 'product1', '', '61.12000', '1'), ('08/12/2013 12:58:43 AM INFO', 'product1', '', '61.12000', '1'), ('08/12/2013 01:12:18 AM INFO', 'product1', '', '61.12000', '1'), ('08/13/2013 01:14:57 AM INFO', 'product1', '', '61.12000', '1'), ('08/10/2013 01:50:16 AM INFO', 'product1', '', '61.12000', '1'), ('08/10/2013 02:04:23 AM INFO', 'product1', '', '61.12000', '1'), ('08/11/2013 02:29:46 AM INFO', 'product1', '', '61.12000', '1'), ('08/12/2013 12:58:43 AM INFO', 'product1', '', '61.12000', '1'), ('08/12/2013 01:12:18 AM INFO', 'product1', '', '61.12000', '1'), ('08/13/2013 01:14:57 AM INFO', 'product1', '', '61.12000', '1'), ('08/14/2013 02:01:42 AM INFO', 'product1', '', '61.12000', '1'), ('08/14/2013 02:04:43 AM INFO', 'product1', '', '61.12000', '1'), ('08/15/2013 01:09:23 AM INFO', 'product1', '', '61.12000', '1'), ('08/15/2013 01:22:50 AM INFO', 'product1', '', '61.12000', '1'), ('08/16/2013 12:56:52 AM INFO', 'product1', '', '61.12000', '1'), ('08/16/2013 01:09:38 AM INFO', 'product1', '', '61.12000', '1'), ('08/17/2013 12:54:20 AM INFO', 'product1', '', '61.12000', '1'), ('08/17/2013 01:07:51 AM INFO', 'product1', '', '61.12000', '1'), ('08/18/2013 12:54:14 AM INFO', 'product1', '', '61.12000', '1'), ('08/18/2013 01:09:37 AM INFO', 'product1', '', '61.12000', '1'), ('08/19/2013 12:54:13 AM INFO', 'product1', '', '61.12000', '1'), ('08/19/2013 01:10:06 AM INFO', 'product1', '', '61.12000', '1'), ('08/20/2013 02:09:17 AM INFO', 'product1', '', '61.12000', '1'), ('08/20/2013 02:25:56 AM INFO', 'product1', '', '61.12000', '1'), ('08/21/2013 01:21:03 AM INFO', 'product1', '', '61.12000', '1'), ('08/21/2013 01:34:59 AM INFO', 'product1', '', '61.12000', '1'), ('08/22/2013 01:32:54 AM INFO', 'product1', '', '61.12000', '1'), ('08/22/2013 01:55:25 AM INFO', 'product1', '', '61.12000', '1'), ('08/23/2013 01:23:44 AM INFO', 'product1', '', '61.12000', '1'), ('08/23/2013 01:41:08 AM INFO', 'product1', '', '61.12000', '1'), ('08/24/2013 01:17:46 AM INFO', 'product1', '', '61.12000', '1'), ('08/24/2013 01:31:12 AM INFO', 'product1', '', '61.12000', '1'), ('08/25/2013 12:57:21 AM INFO', 'product1', '', '61.12000', '1'), ('08/25/2013 01:10:55 AM INFO', 'product1', '', '61.12000', '1'), ('08/26/2013 12:56:37 AM INFO', 'product1', '', '61.12000', '1'), ('08/26/2013 01:11:03 AM INFO', 'product1', '', '61.12000', '1'), ('08/27/2013 01:00:15 AM INFO', 'product1', '', '61.12000', '1'), ('08/27/2013 01:13:09 AM INFO', 'product1', '', '61.12000', '1'), ('08/28/2013 01:07:21 AM INFO', 'product1', '', '61.12000', '1'), ('08/28/2013 01:24:13 AM INFO', 'product1', '', '61.12000', '1'), ('08/29/2013 12:57:08 AM INFO', 'product1', '', '61.12000', '1'), ('08/29/2013 01:10:57 AM INFO', 'product1', '', '61.12000', '1'), ('08/30/2013 12:56:22 AM INFO', 'product1', '', '61.12000', '1'), ('08/30/2013 01:10:43 AM INFO', 'product1', '', '61.12000', '1'), ('08/31/2013 12:53:37 AM INFO', 'product1', '', '61.12000', '1'), ('08/31/2013 01:08:01 AM INFO', 'product1', '', '61.12000', '1'), ('09/01/2013 12:52:11 AM INFO', 'product1', '', '61.12000', '1'), ('09/01/2013 01:06:40 AM INFO', 'product1', '', '61.12000', '1'), ('09/02/2013 12:50:31 AM INFO', 'product1', '', '61.12000', '1'), ('09/02/2013 01:05:16 AM INFO', 'product1', '', '61.12000', '1'), ('09/03/2013 12:54:07 AM INFO', 'product1', '', '61.12000', '1'), ('09/03/2013 01:09:32 AM INFO', 'product1', '', '61.12000', '1'), ('09/04/2013 01:16:11 AM INFO', 'product1', '', '61.12000', '1'), ('09/05/2013 12:59:34 AM INFO', 'product1', '', '61.12000', '1'), ('09/06/2013 12:55:00 AM INFO', 'product1', '', '61.12000', '1'), ('09/07/2013 01:13:40 AM INFO', 'product1', '', '61.12000', '1'), ('09/09/2013 01:07:43 AM INFO', 'product1', '', '61.12000', '1')]"

在此处输入图像描述

Question(1) 可以看到,这是一个Python中的元组列表,而原来的data(textNotWork)实际上包含了更多的元组元素(字符串更长),我无法成功读取文本。有谁知道到底发生了什么?我怎样才能读取一个很长的字符串。

问题(2)如何将其转换为 R 中具有五个变量(似乎一个变量是空字符串)数据帧的数据帧,以便我可以将其转换为时间序列并对其进行分析。

谢谢

4

1 回答 1

1

转换你的python结构的一个想法(我认为这里给出的解决方案对于任何python结构都是通用的)是将它们(使用python)保存为json格式并在使用R之后读取它们。所以你可以做这样的事情:

Python

textNotWork = [('08/10/2013 01:50:16 AM INFO', ...]
with open("testing.json", "w") as file:
    json.dump(textNotWork,file)

R

library(rjson)
matrix(unlist(fromJSON(file='testing.json')),
          ncol=5,byrow=TRUE)

 [1,] "08/10/2013 01:50:16 AM INFO" "product1" ""   "61.12000" "1" 
 [2,] "08/10/2013 02:04:23 AM INFO" "product1" ""   "61.12000" "1" 
 [3,] "08/11/2013 02:29:46 AM INFO" "product1" ""   "61.12000" "1" 
 [4,] "08/12/2013 12:58:43 AM INFO" "product1" ""   "61.12000" "1" 
于 2013-10-21T17:34:24.033 回答