I am trying to do some vanilla pattern recognition with an LSTM using Keras to predict the next element in a sequence.
My data look like this:
where
Not sure how applicable recurrent networks are for your sequences, ie how strongly dependent each element is on its preceding sequence as opposed to other factors. That being said (which doesn't help you one bit of course), if you don't want to pad your input with some bad value, a stateful model that processes a single timestep at once is the only alternative for variable length sequences IMHO. If you don't mind taking an alternative approach to encoding:
import numpy as np
import keras.models as kem
import keras.layers as kel
import keras.callbacks as kec
import sklearn.preprocessing as skprep
X_train, max_features = {'Sequence': [[1, 2, 4, 5, 8, 10, 16], [1, 2, 1, 5, 5, 1, 11, 16, 7]]}, 16
num_mem_units = 64
size_batch = 1
num_timesteps = 1
num_features = 1
num_targets = 1
num_epochs = 1500
model = kem.Sequential()
model.add(kel.LSTM(num_mem_units, stateful=True, batch_input_shape=(size_batch, num_timesteps, num_features),
return_sequences=True))
model.add(kel.Dense(num_targets, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam')
range_act = (0, 1) # sigmoid
range_features = np.array([0, max_features]).reshape(-1, 1)
normalizer = skprep.MinMaxScaler(feature_range=range_act)
normalizer.fit(range_features)
reset_state = kec.LambdaCallback(on_epoch_end=lambda *_ : model.reset_states())
# training
for seq in X_train['Sequence']:
X = seq[:-1]
y = seq[1:] # predict next element
X_norm = normalizer.transform(np.array(X).reshape(-1, 1)).reshape(-1, num_timesteps, num_features)
y_norm = normalizer.transform(np.array(y).reshape(-1, 1)).reshape(-1, num_timesteps, num_targets)
model.fit(X_norm, y_norm, epochs=num_epochs, batch_size=size_batch, shuffle=False,
callbacks=[reset_state])
# prediction
for seq in X_train['Sequence']:
model.reset_states()
for istep in range(len(seq)-1): # input up to not incl last
val = seq[istep]
X = np.array([val]).reshape(-1, 1)
X_norm = normalizer.transform(X).reshape(-1, num_timesteps, num_features)
y_norm = model.predict(X_norm)
yhat = int(normalizer.inverse_transform(y_norm[0])[0, 0])
y = seq[-1] # last
put = '{0} predicts {1:d}, expecting {2:d}'.format(', '.join(str(val) for val in seq[:-1]), yhat, y)
print(put)
which produces sth like:
1, 2, 4, 5, 8, 10 predicts 11, expecting 16
1, 2, 1, 5, 5, 1, 11, 16 predicts 7, expecting 7
with ridiculous loss, however.