按大小对文件名进行排序,然后用于itertools.groupby
将大小相似的文件组合在一起。
import os
import os.path
import itertools
#creates dummy files with a given number of bytes.
def create_file(name, size):
if os.path.isfile(name): return
file = open(name, "w")
file.write("X" * size)
file.close()
#create some sample files
create_file("foo.txt", 4)
create_file("bar.txt", 4)
create_file("baz.txt", 4)
create_file("qux.txt", 8)
create_file("lorem.txt", 8)
create_file("ipsum.txt", 16)
#get the filenames in this directory
filenames = [filename for filename in os.listdir(".") if os.path.isfile(filename)]
#sort by size
filenames.sort(key=lambda name: os.stat(name).st_size)
#group by size and iterate
for size, items_iterator in itertools.groupby(filenames, key=lambda name: os.stat(name).st_size):
items = list(items_iterator)
print "{} item(s) of size {}:".format(len(items), size)
#insert hashlib code here, or whatever else you want to do
for item in items:
print item
结果:
3 item(s) of size 4:
bar.txt
baz.txt
foo.txt
2 item(s) of size 8:
lorem.txt
qux.txt
1 item(s) of size 16:
ipsum.txt
1 item(s) of size 968:
test.py