I am building a next-character prediction LSTM for sentences. I was following the tutorial here https://indico.io/blog/tensorflow-data-inputs-part1-placeholders-protobufs-queues/ on how to make the data input process part of the tensorflow graph, and now I have a stateful LSTM that is fed with symbolic (!) batches generated by tf.contrib.training.batch_sequences_with_states, which are in turn read from TF.SequenceExamples of varying lengths (Char-RNN working on characters in a sentence), as shown in the code below.
The whole input and batching process is therefore part of the compute graph. The training works, but since the input is symbolic (not a TF.placeholder), I cannot figure out how to feed in my own sentence defined as a string to the LSTM to perform inference (sample from model). Any ideas?
import tensorflow as tf import numpy as np from tensorflow.python.util import nest import SequenceHandler import DataLoader # SETTINGS learning_rate = 0.001 batch_size = 128 num_unroll = 200 num_enqueue_threads = 10 lstm_size = 256 vocab_size = 39 # DATA key, context, sequences = SequenceHandler.loadSequence("input.tf") # Loads TF.SequenceExample sequence using TF.RecordReader # MODEL cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=lstm_size) initial_states = {"lstm_state_c": tf.zeros(cell.state_size[0], dtype=tf.float32), "lstm_state_h": tf.zeros(cell.state_size[0], dtype=tf.float32)} batch = tf.contrib.training.batch_sequences_with_states( input_key=key, input_sequences=sequences, input_context=context, input_length=tf.cast(context["length"], tf.int32), initial_states=initial_states, num_unroll=num_unroll, batch_size=batch_size, num_threads=num_enqueue_threads, capacity=batch_size * num_enqueue_threads * 2) # BATCH INPUT inputs = batch.sequences["inputs"] targets = batch.sequences["outputs"] # Convert input into float one-hot representation embedding = tf.constant(np.eye(vocab_size), dtype=tf.float32) inputs = tf.nn.embedding_lookup(embedding, inputs) # Reshape inputs (and targets respectively) into list of length T (unrolling length), with each element being a Tensor of shape (batch_size, input_dimensionality) inputs_by_time = tf.split(1, num_unroll, inputs) inputs_by_time = [tf.squeeze(elem, squeeze_dims=1) for elem in inputs_by_time] targets_by_time = tf.split(1, num_unroll, targets) targets_by_time = [tf.squeeze(elem, squeeze_dims=1) for elem in targets_by_time] targets_by_time_packed = tf.pack(targets_by_time) # Build RNN state_name=("lstm_state_c", "lstm_state_h") state_size = cell.state_size state_is_tuple = nest.is_sequence(state_size) state_name_tuple = nest.is_sequence(state_name) state_name_flat = nest.flatten(state_name) state_size_flat = nest.flatten(state_size) initial_state = nest.pack_sequence_as( structure=state_size, flat_sequence=[batch.state(s) for s in state_name_flat]) seq_lengths = batch.context["length"] (outputs, state) = tf.nn.state_saving_rnn(cell, inputs_by_time, state_saver=batch, sequence_length=seq_lengths, state_name=state_name) # Create softmax parameters, weights and bias, and apply to RNN outputs at each timestep with tf.variable_scope('softmax') as sm_vs: softmax_w = tf.get_variable("softmax_w", [lstm_size, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) logits = [tf.matmul(outputStep, softmax_w) + softmax_b for outputStep in outputs] logit = tf.pack(logits) probs = tf.nn.softmax(logit) with tf.name_scope('loss'): # Compute mean cross entropy loss for each output. loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logit, targets_by_time_packed) mean_loss = tf.reduce_mean(loss) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0.0)) learning_rate = tf.constant(learning_rate) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(mean_loss, tvars), 5.0) optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step) # TRAINING LOOP # Start a prefetcher in the background sess = tf.Session() tf.train.start_queue_runners(sess=sess) init_op = tf.initialize_all_variables() sess.run(init_op) # LOGGING summary_writer = tf.train.SummaryWriter("log", sess.graph) vocab_index_dict, index_vocab_dict, vocab_size = DataLoader.load_vocab("characters.json", "UTF-8") while True: # Step through batches, perform training trainOps = [mean_loss, state, train_op, global_step] res = sess.run(trainOps) # THIS WORKS - LOSS DECLINES testString = "Hello" # HOW TO SAMPLE FROM MODEL, GIVEN INPUT testString HERE?
In general, I have trouble understanding how to work with the data input as part of the compute graph, in terms of how to split it for cross-validation etc., and there seem to be no examples in that direction using TFRecords.