只是尝试没有熊猫:
from collections import defaultdict
import datetime as dt
from itertools import groupby
def bin_ts(dtime, delta):
modulo = dtime.timestamp() % delta.total_seconds()
return dtime - dt.timedelta(seconds=modulo)
src_data = [
('1-1-1900 10:41:00', 1),
('3-1-1900 09:54:00', 1),
('4-1-1900 15:45:00', 1),
('5-1-1900 18:41:00', 1),
('4-1-1900 15:45:00', 1)
]
ts_data = [(dt.datetime.strptime(ts, '%d-%m-%Y %H:%M:%S'), count) for ts, count in src_data]
bin_size = dt.timedelta(minutes=15)
binned = [(bin_ts(ts, bin_size), count) for ts, count in ts_data]
def time_fmt(ts):
res = "%s - %s" % (ts.strftime('%H:%M'), (ts + bin_size).strftime('%H:%M'))
return res
binned_time = [(time_fmt(ts), count) for ts, count in binned]
cnts = defaultdict(int)
for ts, group in groupby(binned_time, lambda x: x[0]):
for row in group:
cnts[ts] += row[1]
output = list(cnts.items())
output.sort(key=lambda x: x[0])
from pprint import pprint
pprint(output)
导致:
[('09:45 - 10:00', 1),
('10:30 - 10:45', 1),
('15:45 - 16:00', 2),
('18:30 - 18:45', 1)]