好的,这是一个树实现:
import itertools
class treeNode:
def __init__(self, segments):
self.value = None
self.overlapping = None
self.below = None
self.above = None
if len(segments) > 0:
self.value = sum(itertools.chain.from_iterable(segments))/(2*len(segments))
self.overlapping = set()
belowSegs = set()
aboveSegs = set()
for segment in segments:
if segment[0] <= self.value:
if segment[1] < self.value:
belowSegs.add(segment)
else:
self.overlapping.add(segment)
else:
aboveSegs.add(segment)
self.below = treeNode(belowSegs)
self.above = treeNode(aboveSegs)
else:
self.overlapping = None
def getOverlaps(self, item):
if self.overlapping is None:
return set()
if item < self.value:
return {x for x in self.overlapping if x[0]<=item and item<=x[1]} | self.below.getOverlaps(item)
elif item > self.value:
return {x for x in self.overlapping if x[0]<=item and item<=x[1]} | self.above.getOverlaps(item)
else:
return self.overlapping
演示:
import random as r
maxInt = 100
numSegments = 200000
rng = range(maxInt-1)
lefts = [r.choice(rng) for x in range(0, numSegments)]
segments = [(x, r.choice(range(x+1, maxInt))) for x in lefts] # Construct a list of 200,000 random segments from integers between 0 and 100
tree = treeNode(segments) # Builds the tree structure based on a list of segments/ranges
def testReturnSegments():
for item in range(0,100000):
item = item % maxInt
overlaps = tree.getOverlaps(item)
import cProfile
cProfile.run('testReturnSegments()')
这使用 200k 段,并找到 100k 数字的重叠段,并在我的 2.3 GHz Intel i5 上运行 94 秒。这是 cProfile 输出:
输出:
1060004 function calls (580004 primitive calls) in 95.700 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 95.700 95.700 <string>:1(<module>)
580000/100000 11.716 0.000 93.908 0.001 stackoverflow.py:27(getOverlaps)
239000 43.461 0.000 43.461 0.000 stackoverflow.py:31(<setcomp>)
241000 38.731 0.000 38.731 0.000 stackoverflow.py:33(<setcomp>)
1 1.788 1.788 95.700 95.700 stackoverflow.py:46(testReturnSegments)
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
1 0.004 0.004 0.004 0.004 {range}