How do you read all examples from a TFRecords at once?
I\'ve been using tf.parse_single_example to read out individual examples using code similar to th
To read all the data just once, you need to pass num_epochs to the string_input_producer. When all the record are read, the .read method of reader will throw an error, which you can catch. Simplified example:
import tensorflow as tf
def read_and_decode(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image_raw': tf.FixedLenFeature([], tf.string)
})
image = tf.decode_raw(features['image_raw'], tf.uint8)
return image
def get_all_records(FILE):
with tf.Session() as sess:
filename_queue = tf.train.string_input_producer([FILE], num_epochs=1)
image = read_and_decode(filename_queue)
init_op = tf.initialize_all_variables()
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
try:
while True:
example = sess.run([image])
except tf.errors.OutOfRangeError, e:
coord.request_stop(e)
finally:
coord.request_stop()
coord.join(threads)
get_all_records('/path/to/train-0.tfrecords')
And to use tf.parse_example (which is faster than tf.parse_single_example) you need to first batch the examples like that:
batch = tf.train.batch([serialized_example], num_examples, capacity=num_examples)
parsed_examples = tf.parse_example(batch, feature_spec)
Unfortunately this way you'd need to know the num of examples beforehand.