I am trying to do some vanilla pattern recognition with an LSTM using Keras to predict the next element in a sequence.
My data look like this:
where
Not sure how applicable recurrent networks are for your sequences, ie how strongly dependent each element is on its preceding sequence as opposed to other factors. That being said (which doesn't help you one bit of course), if you don't want to pad your input with some bad value, a stateful model that processes a single timestep at once is the only alternative for variable length sequences IMHO. If you don't mind taking an alternative approach to encoding:
import numpy as np
import keras.models as kem
import keras.layers as kel
import keras.callbacks as kec
import sklearn.preprocessing as skprep
X_train, max_features = {'Sequence': [[1, 2, 4, 5, 8, 10, 16], [1, 2, 1, 5, 5, 1, 11, 16, 7]]}, 16
num_mem_units = 64
size_batch = 1
num_timesteps = 1
num_features = 1
num_targets = 1
num_epochs = 1500
model = kem.Sequential()
model.add(kel.LSTM(num_mem_units, stateful=True, batch_input_shape=(size_batch, num_timesteps, num_features),
return_sequences=True))
model.add(kel.Dense(num_targets, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam')
range_act = (0, 1) # sigmoid
range_features = np.array([0, max_features]).reshape(-1, 1)
normalizer = skprep.MinMaxScaler(feature_range=range_act)
normalizer.fit(range_features)
reset_state = kec.LambdaCallback(on_epoch_end=lambda *_ : model.reset_states())
# training
for seq in X_train['Sequence']:
X = seq[:-1]
y = seq[1:] # predict next element
X_norm = normalizer.transform(np.array(X).reshape(-1, 1)).reshape(-1, num_timesteps, num_features)
y_norm = normalizer.transform(np.array(y).reshape(-1, 1)).reshape(-1, num_timesteps, num_targets)
model.fit(X_norm, y_norm, epochs=num_epochs, batch_size=size_batch, shuffle=False,
callbacks=[reset_state])
# prediction
for seq in X_train['Sequence']:
model.reset_states()
for istep in range(len(seq)-1): # input up to not incl last
val = seq[istep]
X = np.array([val]).reshape(-1, 1)
X_norm = normalizer.transform(X).reshape(-1, num_timesteps, num_features)
y_norm = model.predict(X_norm)
yhat = int(normalizer.inverse_transform(y_norm[0])[0, 0])
y = seq[-1] # last
put = '{0} predicts {1:d}, expecting {2:d}'.format(', '.join(str(val) for val in seq[:-1]), yhat, y)
print(put)
which produces sth like:
1, 2, 4, 5, 8, 10 predicts 11, expecting 16
1, 2, 1, 5, 5, 1, 11, 16 predicts 7, expecting 7
with ridiculous loss, however.
The trick to training and classifying sequences is training with masking and classifying using a stateful network. Here's an example that I made that classifies whether a sequence of variable length starts with zero or not.
import numpy as np
np.random.seed(1)
import tensorflow as tf
tf.set_random_seed(1)
from keras import models
from keras.layers import Dense, Masking, LSTM
import matplotlib.pyplot as plt
def stateful_model():
hidden_units = 256
model = models.Sequential()
model.add(LSTM(hidden_units, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
return model
def train_rnn(x_train, y_train, max_len, mask):
epochs = 10
batch_size = 200
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_train_sig_cls_pair(t_stops, num_examples, mask):
x = []
y = []
max_t = int(np.max(t_stops))
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, max_t), dtype=np.int8)
sig[one_indices, 0] = 1
sig[:, t_stop:] = mask
x.append(sig)
cls = np.zeros(num_examples, dtype=np.bool)
cls[one_indices] = 1
y.append(cls)
return np.concatenate(x, axis=0), np.concatenate(y, axis=0)
def gen_test_sig_cls_pair(t_stops, num_examples):
x = []
y = []
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, t_stop), dtype=np.bool)
sig[one_indices, 0] = 1
x.extend(list(sig))
cls = np.zeros((num_examples, t_stop), dtype=np.bool)
cls[one_indices] = 1
y.extend(list(cls))
return x, y
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
signal_lengths = (10, 15, 20)
x_in, y_in = gen_train_sig_cls_pair(signal_lengths, 10, mask_val)
mod = train_rnn(x_in[:, :, None], y_in, int(np.max(signal_lengths)), mask_val)
testing_dat, expected = gen_test_sig_cls_pair(signal_lengths, 3)
state_mod = stateful_model()
state_mod.set_weights(mod.get_weights())
res = []
for s_i in range(len(testing_dat)):
seq_in = list(testing_dat[s_i])
seq_len = len(seq_in)
for t_i in range(seq_len):
res.extend(state_mod.predict(np.array([[[seq_in[t_i]]]])))
state_mod.reset_states()
fig, axes = plt.subplots(2)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(res, "ro", label="result", alpha=0.2)
axes[1].plot(np.concatenate(expected, axis=0), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
plt.show()
I am not clear about the embedding procedure. But still here is a way to implement a variable-length input LSTM. Just do not specify the timespan dimension when building LSTM.
import keras.backend as K
from keras.layers import LSTM, Input
I = Input(shape=(None, 200)) # unknown timespan, fixed feature size
lstm = LSTM(20)
f = K.function(inputs=[I], outputs=[lstm(I)])
import numpy as np
data1 = np.random.random(size=(1, 100, 200)) # batch_size = 1, timespan = 100
print f([data1])[0].shape
# (1, 20)
data2 = np.random.random(size=(1, 314, 200)) # batch_size = 1, timespan = 314
print f([data2])[0].shape
# (1, 20)