Here is a seemingly simple problem: given a list of iterators that yield sequences of integers in ascending order, write a concise generator that yields only the integers th
def postings(posts):
sets = (set(l) for l in posts)
return sorted(reduce(set.intersection, sets))
... you could try and take advantage of the fact that the lists are ordered, but since reduce, generator expressions and set are all implemented in C, you'll probably have a hard time doing better than the above with logic implemented in python.
import heapq, itertools
def intersect(*its):
for key, values in itertools.groupby(heapq.merge(*its)):
if len(list(values)) == len(its):
yield key
>>> list(intersect(*postings))
[100, 322]
What about this:
import heapq
def inalliters(iterators):
heap=[(iterator.next(),iterator) for iterator in iterators]
heapq.heapify(heap)
maximal = max(heap)[0]
while True:
value,iterator = heapq.heappop(heap)
if maximal==value: yield value
nextvalue=iterator.next()
heapq.heappush(heap,(nextvalue,iterator))
maximal=max(maximal,nextvalue)
postings = [iter([1, 100, 142, 322, 12312]),
iter([2, 100, 101, 322, 1221]),
iter([100, 142, 322, 956, 1222])]
print [x for x in inalliters(postings)]
I haven't tested it very thoroughly (just ran your example), but I believe the basic idea is sound.
I want to show that there's an elegant solution, which only iterates forward once. Sorry, I don't know the Python well enough, so I use fictional classes. This one reads input, an array of iterators, and writes to output on-the-fly without ever going back or using any array function!.
def intersect (input, output)
do:
min = input[0]
bingo = True
for i in input:
if (i.cur < min.cur):
bingo = False
min = i
if bingo:
output.push(min.cur)
while (min.step())
This solution will compute the intersection of your iterators. It works by advancing the iterators one step at a time and looking for the same value in all of them. When found, such values are yielded -- this makes the intersect function a generator itself.
import operator
def intersect(sequences):
"""Compute intersection of sequences of increasing integers.
>>> list(intersect([[1, 100, 142, 322, 12312],
... [2, 100, 101, 322, 1221],
... [100, 142, 322, 956, 1222]]))
[100, 322]
"""
iterators = [iter(seq) for seq in sequences]
last = [iterator.next() for iterator in iterators]
indices = range(len(iterators) - 1)
while True:
# The while loop stops when StopIteration is raised. The
# exception will also stop the iteration by our caller.
if reduce(operator.and_, [l == last[0] for l in last]):
# All iterators contain last[0]
yield last[0]
last = [iterator.next() for iterator in iterators]
# Now go over the iterators once and advance them as
# necessary. To stop as soon as the smallest iterator is
# exhausted we advance each iterator only once per iteration
# in the while loop.
for i in indices:
if last[i] < last[i+1]:
last[i] = iterators[i].next()
if last[i] > last[i+1]:
last[i+1] = iterators[i+1].next()
This one runs in O(n*m) where n is the sum of all iterator lengths, and m is the number of lists. It can be made O(n*logm) by using a heap in line 6.
def intersection(its):
if not its: return
vs = [next(it) for it in its]
m = max(vs)
while True:
v, i = min((v,i) for i,v in enumerate(vs))
if v == m:
yield m
vs[i] = next(its[i])
m = max(m, vs[i])