问题:

I am building a next-character prediction LSTM for sentences. I was following the tutorial here https://indico.io/blog/tensorflow-data-inputs-part1-placeholders-protobufs-queues/ on how to make the data input process part of the tensorflow graph, and now I have a stateful LSTM that is fed with symbolic (!) batches generated by tf.contrib.training.batch_sequences_with_states, which are in turn read from TF.SequenceExamples of varying lengths (Char-RNN working on characters in a sentence), as shown in the code below.

The whole input and batching process is therefore part of the compute graph. The training works, but since the input is symbolic (not a TF.placeholder), I cannot figure out how to feed in my own sentence defined as a string to the LSTM to perform inference (sample from model). Any ideas?

import tensorflow as tf import numpy as np from tensorflow.python.util import nest import SequenceHandler import DataLoader  # SETTINGS learning_rate = 0.001 batch_size = 128 num_unroll = 200 num_enqueue_threads = 10 lstm_size = 256 vocab_size = 39  # DATA key, context, sequences = SequenceHandler.loadSequence("input.tf")  # Loads TF.SequenceExample sequence using TF.RecordReader  # MODEL cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=lstm_size) initial_states = {"lstm_state_c": tf.zeros(cell.state_size[0], dtype=tf.float32), "lstm_state_h": tf.zeros(cell.state_size[0], dtype=tf.float32)} batch = tf.contrib.training.batch_sequences_with_states(     input_key=key,     input_sequences=sequences,     input_context=context,     input_length=tf.cast(context["length"], tf.int32),     initial_states=initial_states,     num_unroll=num_unroll,     batch_size=batch_size,     num_threads=num_enqueue_threads,     capacity=batch_size * num_enqueue_threads * 2)  # BATCH INPUT inputs = batch.sequences["inputs"] targets = batch.sequences["outputs"]  # Convert input into float one-hot representation embedding = tf.constant(np.eye(vocab_size), dtype=tf.float32) inputs = tf.nn.embedding_lookup(embedding, inputs)  # Reshape inputs (and targets respectively) into list of length T (unrolling length), with each element being a Tensor of shape (batch_size, input_dimensionality) inputs_by_time = tf.split(1, num_unroll, inputs) inputs_by_time = [tf.squeeze(elem, squeeze_dims=1) for elem in inputs_by_time] targets_by_time = tf.split(1, num_unroll, targets) targets_by_time = [tf.squeeze(elem, squeeze_dims=1) for elem in targets_by_time] targets_by_time_packed = tf.pack(targets_by_time)  # Build RNN state_name=("lstm_state_c", "lstm_state_h") state_size = cell.state_size state_is_tuple = nest.is_sequence(state_size) state_name_tuple = nest.is_sequence(state_name) state_name_flat = nest.flatten(state_name) state_size_flat = nest.flatten(state_size)  initial_state = nest.pack_sequence_as(     structure=state_size,     flat_sequence=[batch.state(s) for s in state_name_flat])  seq_lengths = batch.context["length"] (outputs, state) = tf.nn.state_saving_rnn(cell, inputs_by_time, state_saver=batch,                        sequence_length=seq_lengths, state_name=state_name)  # Create softmax parameters, weights and bias, and apply to RNN outputs at each timestep with tf.variable_scope('softmax') as sm_vs:     softmax_w = tf.get_variable("softmax_w", [lstm_size, vocab_size])     softmax_b = tf.get_variable("softmax_b", [vocab_size])     logits = [tf.matmul(outputStep, softmax_w) + softmax_b for outputStep in outputs]     logit = tf.pack(logits)     probs = tf.nn.softmax(logit)  with tf.name_scope('loss'):     # Compute mean cross entropy loss for each output.     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logit, targets_by_time_packed)     mean_loss = tf.reduce_mean(loss)  global_step = tf.get_variable('global_step', [],                               initializer=tf.constant_initializer(0.0))  learning_rate = tf.constant(learning_rate) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(mean_loss, tvars),                                   5.0) optimizer = tf.train.GradientDescentOptimizer(learning_rate)  train_op = optimizer.apply_gradients(zip(grads, tvars),                                      global_step=global_step)  # TRAINING LOOP  # Start a prefetcher in the background sess = tf.Session() tf.train.start_queue_runners(sess=sess) init_op = tf.initialize_all_variables() sess.run(init_op)  # LOGGING summary_writer = tf.train.SummaryWriter("log", sess.graph)  vocab_index_dict, index_vocab_dict, vocab_size = DataLoader.load_vocab("characters.json", "UTF-8")  while True:     # Step through batches, perform training     trainOps = [mean_loss, state, train_op,            global_step]     res = sess.run(trainOps) # THIS WORKS - LOSS DECLINES      testString = "Hello"     # HOW TO SAMPLE FROM MODEL, GIVEN INPUT testString HERE?

In general, I have trouble understanding how to work with the data input as part of the compute graph, in terms of how to split it for cross-validation etc., and there seem to be no examples in that direction using TFRecords.

转载请标明出处:Sample from tensorflow LSTM model when using symbolic batch inputs

文章来源: Sample from tensorflow LSTM model when using symbolic batch inputs

标签

tensorflow

lstm

num