我有一个多 dask 核心系列,我想将其合并到一个数据帧中,以进一步写入 csv 文件,我该怎么做。我在尝试执行相同操作时遇到以下错误,请建议...
数据
1,2014-04-07T10:51:09.277Z,214536502,0
1,2014-04-07T10:54:09.868Z,214536500,0
1,2014-04-07T10:54:46.998Z,214536506,0
1,2014-04-07T10:57:00.306Z,214577561,0
2,2014-04-07T13:56:37.614Z,214662742,0
2,2014-04-07T13:57:19.373Z,214662742,0
2,2014-04-07T13:58:37.446Z,214825110,0
2,2014-04-07T13:59:50.710Z,214757390,0
2,2014-04-07T14:00:38.247Z,214757407,0
2,2014-04-07T14:02:36.889Z,214551617,0
代码
import dask
import datetime as dt
clicksdat = dd.read_csv('C:\Users\TG\Downloads\yoochoose-dataFull\yoochoose-clicks100.dat', names=['Sid','Timestamp','itemid','itemcategory'], dtype={'sid':np.int64,'timestamp':np.object,'itemid':np.object,'itemcategory':np.object})
clicksdat['Timestamp']=clicksdat.Timestamp.apply(pd.to_datetime)
segment = ['EM']*24
segment[7:10] = ['M']*3
segment[10:13] = ['A']*3
segment[13:18] = ['E']*5
segment[18:23] = ['N']*5
segment[23] = 'MN'
maxtemp=clicksdat.groupby('Sid')['Timestamp'].max()
mintemp=clicksdat.groupby('Sid')['Timestamp'].min()
duration=(maxtemp.sub(mintemp).apply(lambda x: x.total_seconds() ))
day=maxtemp.apply(lambda x: x.day )
month=maxtemp.apply(lambda x: x.month)
noofnavigations=[clicksdat.groupby('Sid').count().Timestamp][0]
totalitems=clicksdat.groupby('Sid')['itemid'].nunique()
totalcats=clicksdat.groupby('Sid')['itemcategory'].nunique()
timesegment= maxtemp.apply(lambda x: segment[x.hour])
segmentchange=((maxtemp.apply(lambda x: segment[x.hour])!=mintemp.apply(lambda x: segment[x.hour])))
purchased=(clicksdat['Sid'].unique()).apply(lambda x: x in buyersession)
print(type(maxtemp),type(mintemp),type(duration),type(day),type(month),type(noofnavigations),type(totalitems),type(totalcats),type(timesegment),type(segmentchange),type(purchased))
#percentile_list = pd.DataFrame({'purchased' : purchased,'duration':duration,'day':day,'month':month,'noofnavigations':noofnavigations,'totalitems':totalitems,'totalcats':totalcats,'timesegment':timesegment,'segmentchange':segmentchange },index=noofnavigations.index)
percentile_list = dd.concat([purchased,duration,day,month,noofnavigations,totalitems,totalcats,timesegment,segmentchange],axis=1)
percentile_list.to_csv('C:\Users\TG\Downloads\yoochoose-dataFull\yoochoose-clicks1001-727.csv')
错误
(<class 'dask.dataframe.core.Series'>, <class 'dask.dataframe.core.Series'>, <class 'dask.dataframe.core.Series'>, <class 'dask.dataframe.core.Series'>, <class 'dask.dataframe.core.Series'>, <class 'dask.dataframe.core.Series'>, <class 'dask.dataframe.core.Series'>, <class 'dask.dataframe.core.Series'>, <class 'dask.dataframe.core.Series'>, <class 'dask.dataframe.core.Series'>, <class 'dask.dataframe.core.Series'>)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-121-ad7fc3cf8839> in <module>()
25 print(type(maxtemp),type(mintemp),type(duration),type(day),type(month),type(noofnavigations),type(totalitems),type(totalcats),type(timesegment),type(segmentchange),type(purchased))
26 #percentile_list = pd.DataFrame({'purchased' : purchased,'duration':duration,'day':day,'month':month,'noofnavigations':noofnavigations,'totalitems':totalitems,'totalcats':totalcats,'timesegment':timesegment,'segmentchange':segmentchange },index=noofnavigations.index)
---> 27 percentile_list = dd.concat([purchased,duration,day,month,noofnavigations,totalitems,totalcats,timesegment,segmentchange],axis=1)
28
29 percentile_list.to_csv('C:\Users\TG\Downloads\yoochoose-dataFull\yoochoose-clicks1001-727.csv')
C:\Users\TG\Anaconda3\envs\dato-env\lib\site-packages\dask\dataframe\multi.pyc in concat(dfs, axis, join, interleave_partitions)
576 else:
577 if axis == 1:
--> 578 raise ValueError('Unable to concatenate DataFrame with unknown '
579 'division specifying axis=1')
580 else:
ValueError: Unable to concatenate DataFrame with unknown division specifying axis=1