到目前为止,这是我想出的。我认为它非常接近,并且没有真正高价值的东西有点抵消 - 它很接近。
很高兴听到任何建议,使其更加 Pythonic。
文件:columnsplit.py
#!/usr/bin/python
import sys, operator
# usage
# columnsplit.py <filename> <#cols>
# columnsplit.py test.csv 12
#
#determine number of devices per column
def devicelisting(fulllist,percolumn):
devicelist=[]
fobj=open(fulllist,'r')
for line in fobj:
(key, val) = line.split(',')
devicelist.append((key,int(val)))
devicespercol=(len(devicelist)/int(percolumn))
return(devicelist,devicespercol)
def devicesplit(fulllist,numcolumns,roundnum):
if roundnum == 0:
devices=sorted(fulllist, key=lambda device: device[1], reverse=True)
devicestemp=devices
else:
devices=sorted(fulllist, key=lambda device: device[1])
devicestemp=devices
deviceslice=[]
for idx, val in zip(range(numcolumns), devices):
deviceslice.append(val)
devicestemp.remove(val)
return(deviceslice,devicestemp)
def makecolumns(roundnumber,percol):
column=[]
for i in range(percol):
exec('tempslice=deviceslice%s' % i)
column.append(tempslice[roundnumber])
return(column)
# what this is going to do is generate how many devices will fill each of the intended
# number of columns. What is left over will be run again against the lowest value of columns
if __name__ == '__main__':
tempslice=[]
devices,percol=devicelisting(sys.argv[1],sys.argv[2])
# devices is the devices/value as tuples nested in a list
# percol is going to be how many devices per column
# you can len(devices) to count how many devices we have
# prints out the device list in reverse.
# print sorted(devices, key=lambda x: x[1], reverse=True)
# what we will need to do here is split the device list into number of desired slices. i.e. if we want 12 columns
# and we have 108 devices there should be 9 slices of 12.
# this will leave a remaining slice - of less than 9 which will be added to the 12 columns in order of smallest column first
devicesleft=devices
numcolumns=int(sys.argv[2])
for i in range(percol):
sendcol,devicesleft=devicesplit(devicesleft,numcolumns,i)
exec('deviceslice%s=sendcol' % i)
# and finally create the columns
for i in range(0,numcolumns):
sendcol=makecolumns(i,percol)
exec('column%s=sendcol' % i)
# add the left over devices
j=numcolumns
# sort remaining reverse.
devices=sorted(devicesleft, key=lambda device: device[1], reverse=True)
for i in range(len(devices)):
j-=1
exec('column%s.append(devices[i])' % j)
# prints out the resulting columns
for i in range(0,numcolumns):
exec('tempcol=column%s' % i)
print tempcol
print sum([pair[1] for pair in tempcol])
我跑过的测试文件。
文件:test44a.csv
SQCIEOEO,1272
HIKTXYZH,281
JZHRZXKX,5793
UBGTOLUX,147
WBVYFNBN,9
VMHTKHBU,32
GILGFWDA,1334
YKUMWOKT,2066
PFSVTUIP,51
GPJRWKMD,673
TYJZUNZS,27
XTFUHPNX,2102
VFSPABFG,65
ROYOZKRS,189
IARDNRVL,587
LBFSQTQL,973
ZJBZKGFB,21301
UEPUOHMW,20
HEAVWVGH,0
XMANFQZE,719
ZADKGIMB,82
NCVBJIYR,27
NYMJUSQR,20646
EQFKHEOH,2050
ERRLAENN,19
HIPRQNIE,12557
MVNHODYT,20
UEDBIRIN,14
JAZJEMXL,28
UMDLALPN,36
GCUUGTNA,0
XRCGIKTR,12
KSBPEYBZ,20657
LELLPAYW,43792
DTRKMFLK,73
WNQEXJWI,41
CYXHXYHI,10
CSUSTTOX,120
NFHZLSJH,23
FAMDKJLM,25
HIUEHBNJ,261
UIBNCQKP,40
WSPHKYOQ,30025
ZBUJKFWR,0
OQWVSKFM,49
SHZUXKKU,21
CZBMYQDX,45
RXGBCCTR,17
SPMLASXS,15
ZWNXGXRI,59
WTVUJZSB,22
WYDZBWQU,19100
MDFMVCFV,6133
ZSSGQJPM,25
CKHMJZOG,85
YRFZOWTB,28
AYNWBSRA,14
LJGBTVOW,13110
GWJPWXWU,16
PCUDYNEY,179
MSVNLMOX,62
WUYPPNMW,2285
KVLGTIBI,11
KWMIKQHW,11
JDKUPYRM,1851
DARXQYDY,68
UUPXIDEP,139
SKQZMTFY,4377
ZEPOWAEA,189
BWXRVAPP,167
VFMDIRTA,561
BKANEGMD,2122
LBRICWID,1775
TGVOGLDC,3650
QQGZHAAJ,81
KAXPHJSS,122
LKAOHISA,32
ONOVZSYQ,41
IEPQEPZP,62
QWEXGXQS,0
IQGPZYQO,15
MEJLXIBG,10
MRWRHWHX,10
TMVAJLSS,57
BYIAXYOJ,173
DYUAGWGT,248
ODLVZSST,21
EOTOZLHA,6476
KPBHOQQR,30
OLSVIYOW,539
CZSCSLVX,17
ZPMYBTZL,11
IATWRKOF,12507
WGBEFQBH,41
PUJIFEFE,382
TSDULCGU,9070
DARUKFAG,209
MBLRRNYH,250
IIQNNWSG,25
OWBZYIUC,1808
ILXTRXZD,2012
ZLVRZUYH,269
CPVPLOWZ,108
KYZJGTMO,635
EJHWGHZG,25
TUXTOWBR,11
LXGXLCWW,2313
AVFHPRWT,915
AEPHMPNF,32
KLZZHAQT,56
XWQJZNFA,611
JKHYCDSC,1455
运行它的命令:python columnsplit.py test44a 12(12 是所需列的数量)。
示例输出列的值为第一列。:
1) 45577 [('LELLPAYW', 43792), ('HEAVWVGH', 0), ('XRCGIKTR', 12), ('ODLVZSST', 21), ('VMHTKHBU', 32), ('TMVAJLSS', 57), ('KAXPHJSS', 122), ('ZLVRZUYH', 269), ('SQCIEOEO', 1272)]
2) 31906 [('WSPHKYOQ', 30025), ('GCUUGTNA', 0), ('UEDBIRIN', 14), ('WTVUJZSB', 22), ('LKAOHISA', 32), ('ZWNXGXRI', 59), ('UUPXIDEP', 139), ('HIKTXYZH', 281), ('GILGFWDA', 1334)]
3) 23416 [('ZJBZKGFB', 21301), ('ZBUJKFWR', 0), ('AYNWBSRA', 14), ('NFHZLSJH', 23), ('AEPHMPNF', 32), ('MSVNLMOX', 62), ('UBGTOLUX', 147), ('PUJIFEFE', 382), ('JKHYCDSC', 1455)]
4) 23276 [('KSBPEYBZ', 20657), ('QWEXGXQS', 0), ('SPMLASXS', 15), ('FAMDKJLM', 25), ('UMDLALPN', 36), ('IEPQEPZP', 62), ('BWXRVAPP', 167), ('OLSVIYOW', 539), ('LBRICWID', 1775)]
5) 23342 [('NYMJUSQR', 20646), ('WBVYFNBN', 9), ('IQGPZYQO', 15), ('ZSSGQJPM', 25), ('UIBNCQKP', 40), ('VFSPABFG', 65), ('BYIAXYOJ', 173), ('VFMDIRTA', 561), ('OWBZYIUC', 1808)]
6) 21877 [('WYDZBWQU', 19100), ('CYXHXYHI', 10), ('GWJPWXWU', 16), ('IIQNNWSG', 25), ('WNQEXJWI', 41), ('DARXQYDY', 68), ('PCUDYNEY', 179), ('IARDNRVL', 587), ('JDKUPYRM', 1851)]
7) 16088 [('LJGBTVOW', 13110), ('MEJLXIBG', 10), ('RXGBCCTR', 17), ('EJHWGHZG', 25), ('ONOVZSYQ', 41), ('DTRKMFLK', 73), ('ROYOZKRS', 189), ('XWQJZNFA', 611), ('ILXTRXZD', 2012)]
8) 15607 [('HIPRQNIE', 12557), ('MRWRHWHX', 10), ('CZSCSLVX', 17), ('TYJZUNZS', 27), ('WGBEFQBH', 41), ('QQGZHAAJ', 81), ('ZEPOWAEA', 189), ('KYZJGTMO', 635), ('EQFKHEOH', 2050)]
9) 17952 [('IATWRKOF', 12507), ('KVLGTIBI', 11), ('ERRLAENN', 19), ('NCVBJIYR', 27), ('CZBMYQDX', 45), ('ZADKGIMB', 82), ('DARUKFAG', 209), ('GPJRWKMD', 673), ('YKUMWOKT', 2066), ('LXGXLCWW', 2313)]
10) 15982 [('TSDULCGU', 9070), ('KWMIKQHW', 11), ('UEPUOHMW', 20), ('JAZJEMXL', 28), ('OQWVSKFM', 49), ('CKHMJZOG', 85), ('DYUAGWGT', 248), ('XMANFQZE', 719), ('XTFUHPNX', 2102), ('TGVOGLDC', 3650)]
11) 14358 [('EOTOZLHA', 6476), ('ZPMYBTZL', 11), ('MVNHODYT', 20), ('YRFZOWTB', 28), ('PFSVTUIP', 51), ('CPVPLOWZ', 108), ('MBLRRNYH', 250), ('AVFHPRWT', 915), ('BKANEGMD', 2122), ('SKQZMTFY', 4377)]
12) 15683 [('MDFMVCFV', 6133), ('TUXTOWBR', 11), ('SHZUXKKU', 21), ('KPBHOQQR', 30), ('KLZZHAQT', 56), ('CSUSTTOX', 120), ('HIUEHBNJ', 261), ('LBFSQTQL', 973), ('WUYPPNMW', 2285), ('JZHRZXKX', 5793)]