Is there a well-hidden way to read tokens from a file or file-like object without reading entire lines? The application I immediately have (someone else\'s problem
You can read file in chunks with file.read(size). I would not recomment however to read by 1 byte, as this will drastically affect performance. Following snippet (not much tested, use on your own risk) reads file in chunks an yields numbers. You'll have to read through file first to determine rows starting position though.
def values_chunks(file_object, pos_from=0, chunk_size=32*1024):
file_object.seek(pos_from)
eol = False
tail = ''
while True:
raw_data = file_object.read(chunk_size)
raw_data = tail + raw_data
raw_data = raw_data.split('\n', 1) # to check for eol, split in tuple
if len(raw_data) > 1:
eol = True
raw_data = raw_data[0]
raw_values = raw_data.split()
if not eol and raw_data[-1] != ' ':
tail = raw_values[-1]
raw_values = raw_values[:-1]
else:
tail = ''
for value in raw_values: # either case we need only first tuple elem
yield int(value)
if not raw_data[0] or eol: # eof/eol
break
>>> with open('test', 'wb') as test:
... test.write(' '.join(map(str, range(10**5))))
... test.write('\n')
... test.write(' '.join(map(str, range(10**4))))
...
>>> values = list(values_chunks(open('test', 'rb')))
>>> len(values)
100000
>>> sum(values)
4999950000L