问题
xrange
function doesn't work for large integers:
>>> N = 10**100
>>> xrange(N)
Traceback (most recent call last):
...
OverflowError: long int too large to convert to int
>>> xrange(N, N+10)
Traceback (most recent call last):
...
OverflowError: long int too large to convert to int
Python 3.x:
>>> N = 10**100
>>> r = range(N)
>>> r = range(N, N+10)
>>> len(r)
10
Is there a backport of py3k builtin range()
function for Python 2.x?
Edit
I'm looking for a complete implementation of "lazy" range()
, not just a partial implementation of some of its functionality.
回答1:
Okay, here's a go at a fuller reimplementation.
class MyXRange(object):
def __init__(self, a1, a2=None, step=1):
if step == 0:
raise ValueError("arg 3 must not be 0")
if a2 is None:
a1, a2 = 0, a1
if (a2 - a1) % step != 0:
a2 += step - (a2 - a1) % step
if cmp(a1, a2) != cmp(0, step):
a2 = a1
self.start, self.stop, self.step = a1, a2, step
def __iter__(self):
n = self.start
while cmp(n, self.stop) == cmp(0, self.step):
yield n
n += self.step
def __repr__(self):
return "MyXRange(%d,%d,%d)" % (self.start, self.stop, self.step)
# NB: len(self) will convert this to an int, and may fail
def __len__(self):
return (self.stop - self.start)//(self.step)
def __getitem__(self, key):
if key < 0:
key = self.__len__() + key
if key < 0:
raise IndexError("list index out of range")
return self[key]
n = self.start + self.step*key
if cmp(n, self.stop) != cmp(0, self.step):
raise IndexError("list index out of range")
return n
def __reversed__(self):
return MyXRange(self.stop-self.step, self.start-self.step, -self.step)
def __contains__(self, val):
if val == self.start: return cmp(0, self.step) == cmp(self.start, self.stop)
if cmp(self.start, val) != cmp(0, self.step): return False
if cmp(val, self.stop) != cmp(0, self.step): return False
return (val - self.start) % self.step == 0
And some testing:
def testMyXRange(testsize=10):
def normexcept(f,args):
try:
r = [f(args)]
except Exception, e:
r = type(e)
return r
for i in range(-testsize,testsize+1):
for j in range(-testsize,testsize+1):
print i, j
for k in range(-9, 10, 2):
r, mr = range(i,j,k), MyXRange(i,j,k)
if r != list(mr):
print "iter fail: %d, %d, %d" % (i,j,k)
if list(reversed(r)) != list(reversed(mr)):
print "reversed fail: %d, %d, %d" % (i,j,k)
if len(r) != len(mr):
print "len fail: %d, %d, %d" % (i,j,k)
z = [m for m in range(-testsize*2,testsize*2+1)
if (m in r) != (m in mr)]
if z != []:
print "contains fail: %d, %d, %d, %s" % (i,j,k,(z+["..."])[:10])
z = [m for m in range(-testsize*2, testsize*2+1)
if normexcept(r.__getitem__, m) != normexcept(mr.__getitem__, m)]
if z != []:
print "getitem fail: %d, %d, %d, %s" % (i,j,k,(z+["..."])[:10])
回答2:
I believe there is no backport (Py 3's completely removed the int/long distinction, after all, but in 2.* it's here to stay;-) but it's not hard to hack your own, e.g....:
import operator
def wowrange(start, stop, step=1):
if step == 0:
raise ValueError('step must be != 0')
elif step < 0:
proceed = operator.gt
else:
proceed = operator.lt
while proceed(start, stop):
yield start
start += step
Edit it appears the OP doesn't just want looping (the normal purpose of xrange, and
range in Py3), but also len
and the in
operator (the latter does work on the above generator, but slowly -- optimizations are possible). For such richness a class
is better...:
import operator
class wowrange(object):
def __init__(self, start, stop=None, step=1):
if step == 0: raise ValueError('step must be != 0')
if stop is None: start, stop = 0, start
if step < 0:
self.proceed = operator.gt
self.l = (stop-start+step+1)//step
else:
self.proceed = operator.lt
self.l = (stop-start+step-1)//step
self.lo = min(start, stop)
self.start, self.stop, self.step = start, stop, step
def __iter__(self):
start = self.start
while self.proceed(start, self.stop):
yield start
start += self.step
def __len__(self):
return self.l
def __contains__(self, x):
if x == self.stop:
return False
if self.proceed(x, self.start):
return False
if self.proceed(self.stop, x):
return False
return (x-self.lo) % self.step == 0
I wouldn't be surprised if there's an off-by-one or similar glitch lurking here, but, I hope this helps!
Edit again: I see indexing is ALSO required. Is it just too hard to write your own __getitem__
? I guess it is, so here it, too, is, served on a silver plate...:
def __getitem__(self, i):
if i < 0:
i += self.l
if i < 0: raise IndexError
elif if i >= self.l:
raise IndexError
return self.start + i * self.step
I don't know if 3.0 range
supports slicing (xrange
in recent 2.*
releases doesn't -- it used to, but that was removed because the complication was ridiculous and prone to bugs), but I guess I do have to draw a line in the sand somewhere, so I'm not going to add it;-).
回答3:
From the docs:
Note
xrange() is intended to be simple and fast. Implementations may impose restrictions to achieve this. The C implementation of Python restricts all arguments to native C longs (“short” Python integers), and also requires that the number of elements fit in a native C long. If a larger range is needed, an alternate version can be crafted using the itertools module: islice(count(start, step), (stop-start+step-1)//step).
Alternatively reimplement xrange using generators:
def myxrange(a1, a2=None, step=1):
if a2 is None:
start, last = 0, a1
else:
start, last = a1, a2
while cmp(start, last) == cmp(0, step):
yield start
start += step
and
N = 10**100
len(list(myxrange(N, N+10)))
回答4:
Edit
Issue 1546078: "xrange that supports longs, etc" on the Python issue tracker contains C patch and pure Python implementation of unlimited xrange written by Neal Norwitz (nnorwitz). See xrange.py
Edit
The latest version of irange
(renamed as lrange
) is at github.
Implementation based on py3k's rangeobject.c
irange.py
"""Define `irange.irange` class
`xrange`, py3k's `range` analog for large integers
See help(irange.irange)
>>> r = irange(2**100, 2**101, 2**100)
>>> len(r)
1
>>> for i in r:
... print i,
1267650600228229401496703205376
>>> for i in r:
... print i,
1267650600228229401496703205376
>>> 2**100 in r
True
>>> r[0], r[-1]
(1267650600228229401496703205376L, 1267650600228229401496703205376L)
>>> L = list(r)
>>> L2 = [1, 2, 3]
>>> L2[:] = r
>>> L == L2 == [2**100]
True
"""
def toindex(arg):
"""Convert `arg` to integer type that could be used as an index.
"""
if not any(isinstance(arg, cls) for cls in (long, int, bool)):
raise TypeError("'%s' object cannot be interpreted as an integer" % (
type(arg).__name__,))
return int(arg)
class irange(object):
"""irange([start,] stop[, step]) -> irange object
Return an iterator that generates the numbers in the range on demand.
Return `xrange` for small integers
Pure Python implementation of py3k's `range()`.
(I.e. it supports large integers)
If `xrange` and py3k `range()` differ then prefer `xrange`'s behaviour
Based on `[1]`_
.. [1] http://svn.python.org/view/python/branches/py3k/Objects/rangeobject.c?view=markup
>>> # on Python 2.6
>>> N = 10**80
>>> len(range(N, N+3))
3
>>> len(xrange(N, N+3))
Traceback (most recent call last):
...
OverflowError: long int too large to convert to int
>>> len(irange(N, N+3))
3
>>> xrange(N)
Traceback (most recent call last):
...
OverflowError: long int too large to convert to int
>>> irange(N).length() == N
True
"""
def __new__(cls, *args):
try: return xrange(*args) # use `xrange` for small integers
except OverflowError: pass
nargs = len(args)
if nargs == 1:
stop = toindex(args[0])
start = 0
step = 1
elif nargs in (2, 3):
start = toindex(args[0])
stop = toindex(args[1])
if nargs == 3:
step = args[2]
if step is None:
step = 1
step = toindex(step)
if step == 0:
raise ValueError("irange() arg 3 must not be zero")
else:
step = 1
else:
raise ValueError("irange(): wrong number of arguments," +
" got %s" % args)
r = super(irange, cls).__new__(cls)
r._start, r._stop, r._step = start, stop, step
return r
def length(self):
"""len(self) might throw OverflowError, this method shouldn't."""
if self._step > 0:
lo, hi = self._start, self._stop
step = self._step
else:
hi, lo = self._start, self._stop
step = -self._step
assert step
if lo >= hi:
return 0
else:
return (hi - lo - 1) // step + 1
__len__ = length
def __getitem__(self, i): # for L[:] = irange(..)
if i < 0:
i = i + self.length()
if i < 0 or i >= self.length():
raise IndexError("irange object index out of range")
return self._start + i * self._step
def __repr__(self):
if self._step == 1:
return "irange(%r, %r)" % (self._start, self._stop)
else:
return "irange(%r, %r, %r)" % (
self._start, self._stop, self._step)
def __contains__(self, ob):
if type(ob) not in (int, long, bool): # mimic py3k
# perform iterative search
return any(i == ob for i in self)
# if long or bool
if self._step > 0:
inrange = self._start <= ob < self._stop
else:
assert self._step
inrange = self._stop < ob <= self._start
if not inrange:
return False
else:
return ((ob - self._start) % self._step) == 0
def __iter__(self):
len_ = self.length()
i = 0
while i < len_:
yield self._start + i * self._step
i += 1
def __reversed__(self):
len_ = self.length()
new_start = self._start + (len_ - 1) * self._step
new_stop = self._start
if self._step > 0:
new_stop -= 1
else:
new_stop += 1
return irange(new_start, new_stop, -self._step)
test_irange.py
"""Unit-tests for irange.irange class.
Usage:
$ python -W error test_irange.py --with-doctest --doctest-tests
"""
import sys
from nose.tools import raises
from irange import irange
def eq_irange(a, b):
"""Assert that `a` equals `b`.
Where `a`, `b` are `irange` objects
"""
try:
assert a.length() == b.length()
assert a._start == b._start
assert a._stop == b._stop
assert a._step == b._step
if a.length() < 100:
assert list(a) == list(b)
try:
assert list(a) == range(a._start, a._stop, a._step)
except OverflowError:
pass
except AttributeError:
if type(a) == xrange:
assert len(a) == len(b)
if len(a) == 0: # empty xrange
return
if len(a) > 0:
assert a[0] == b[0]
if len(a) > 1:
a = irange(a[0], a[-1], a[1] - a[0])
b = irange(b[0], b[-1], b[1] - b[0])
eq_irange(a, b)
else:
raise
def _get_short_iranges_args():
# perl -E'local $,= q/ /; $n=100; for (1..20)
# > { say map {int(-$n + 2*$n*rand)} 0..int(3*rand) }'
input_args = """\
67
-11
51
-36
-15 38 19
43 -58 79
-91 -71
-56
3 51
-23 -63
-80 13 -30
24
-14 49
10 73
31
38 66
-22 20 -81
79 5 84
44
40 49
"""
return [[int(arg) for arg in line.split()]
for line in input_args.splitlines() if line.strip()]
def _get_iranges_args():
N = 2**100
return [(start, stop, step)
for start in range(-2*N, 2*N, N//2+1)
for stop in range(-4*N, 10*N, N+1)
for step in range(-N//2, N, N//8+1)]
def _get_short_iranges():
return [irange(*args) for args in _get_short_iranges_args()]
def _get_iranges():
return (_get_short_iranges() +
[irange(*args) for args in _get_iranges_args()])
@raises(TypeError)
def test_kwarg():
irange(stop=10)
@raises(TypeError, DeprecationWarning)
def test_float_stop():
irange(1.0)
@raises(TypeError, DeprecationWarning)
def test_float_step2():
irange(-1, 2, 1.0)
@raises(TypeError, DeprecationWarning)
def test_float_start():
irange(1.0, 2)
@raises(TypeError, DeprecationWarning)
def test_float_step():
irange(1, 2, 1.0)
@raises(TypeError)
def test_empty_args():
irange()
def test_empty_range():
for args in (
"-3",
"1 3 -1",
"1 1",
"1 1 1",
"-3 -4",
"-3 -2 -1",
"-3 -3 -1",
"-3 -3",
):
r = irange(*[int(a) for a in args.split()])
assert len(r) == 0
L = list(r)
assert len(L) == 0
def test_small_ints():
for args in _get_short_iranges_args():
ir, r = irange(*args), xrange(*args)
assert len(ir) == len(r)
assert list(ir) == list(r)
def test_big_ints():
N = 10**100
for args, len_ in [
[(N,), N],
[(N, N+10), 10],
[(N, N-10, -2), 5],
]:
try:
xrange(*args)
assert 0
except OverflowError:
pass
ir = irange(*args)
assert ir.length() == len_
try:
assert ir.length() == len(ir)
except OverflowError:
pass
#
ir[ir.length()-1]
#
if len(args) >= 2:
r = range(*args)
assert list(ir) == r
assert ir[ir.length()-1] == r[-1]
assert list(reversed(ir)) == list(reversed(r))
#
def test_negative_index():
assert irange(10)[-1] == 9
assert irange(2**100+1)[-1] == 2**100
def test_reversed():
for r in _get_iranges():
if type(r) == xrange: continue # known not to work for xrange
if r.length() > 1000: continue # skip long
assert list(reversed(reversed(r))) == list(r)
assert list(r) == range(r._start, r._stop, r._step)
def test_pickle():
import pickle
for r in _get_iranges():
rp = pickle.loads(pickle.dumps(r))
eq_irange(rp, r)
def test_equility():
for args in _get_iranges_args():
a, b = irange(*args), irange(*args)
assert a is not b
assert a != b
eq_irange(a, b)
def test_contains():
class IntSubclass(int):
pass
r10 = irange(10)
for i in range(10):
assert i in r10
assert IntSubclass(i) in r10
assert 10 not in r10
assert -1 not in r10
assert IntSubclass(10) not in r10
assert IntSubclass(-1) not in r10
def test_repr():
for r in _get_iranges():
eq_irange(eval(repr(r)), r)
def test_new():
assert repr(irange(True)) == repr(irange(1))
def test_overflow():
lo, hi = sys.maxint-2, sys.maxint+3
assert list(irange(lo, hi)) == list(range(lo, hi))
def test_getitem():
r = irange(sys.maxint-2, sys.maxint+3)
L = []
L[:] = r
assert len(L) == len(r)
assert L == list(r)
if __name__ == "__main__":
import nose
nose.main()
回答5:
Even if there was a backport, it would probably have to be modified. The underlying problem here is that in Python 2.x int
and long
are separate data types, even though int
s get automatically upcast to long
s as necessary. However, this doesn't necessarily happen in functions written in C, depending on how they're written.
来源:https://stackoverflow.com/questions/1482480/xrange2100-overflowerror-long-int-too-large-to-convert-to-int