How do I create a variable-length input LSTM in Keras?

前端未结

关注

 3  440

悲&欢浪女

I am trying to do some vanilla pattern recognition with an LSTM using Keras to predict the next element in a sequence.

My data look like this:

where

相关标签:

3条回答

醉话见心

2020-12-04 09:54

Not sure how applicable recurrent networks are for your sequences, ie how strongly dependent each element is on its preceding sequence as opposed to other factors. That being said (which doesn't help you one bit of course), if you don't want to pad your input with some bad value, a stateful model that processes a single timestep at once is the only alternative for variable length sequences IMHO. If you don't mind taking an alternative approach to encoding:

import numpy as np
import keras.models as kem
import keras.layers as kel
import keras.callbacks as kec
import sklearn.preprocessing as skprep

X_train, max_features = {'Sequence': [[1, 2, 4, 5, 8, 10, 16], [1, 2, 1, 5, 5, 1, 11, 16, 7]]}, 16

num_mem_units = 64
size_batch = 1
num_timesteps = 1
num_features = 1
num_targets = 1
num_epochs = 1500

model = kem.Sequential()
model.add(kel.LSTM(num_mem_units, stateful=True,  batch_input_shape=(size_batch, num_timesteps, num_features),
  return_sequences=True))
model.add(kel.Dense(num_targets, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam')

range_act = (0, 1) # sigmoid
range_features = np.array([0, max_features]).reshape(-1, 1)
normalizer = skprep.MinMaxScaler(feature_range=range_act)
normalizer.fit(range_features)

reset_state = kec.LambdaCallback(on_epoch_end=lambda *_ : model.reset_states())

# training
for seq in X_train['Sequence']:
    X = seq[:-1]
    y = seq[1:] # predict next element
    X_norm = normalizer.transform(np.array(X).reshape(-1, 1)).reshape(-1, num_timesteps, num_features)
    y_norm = normalizer.transform(np.array(y).reshape(-1, 1)).reshape(-1, num_timesteps, num_targets)
    model.fit(X_norm, y_norm, epochs=num_epochs, batch_size=size_batch, shuffle=False,
      callbacks=[reset_state])

# prediction
for seq in X_train['Sequence']:
    model.reset_states() 
    for istep in range(len(seq)-1): # input up to not incl last
        val = seq[istep]
        X = np.array([val]).reshape(-1, 1)
        X_norm = normalizer.transform(X).reshape(-1, num_timesteps, num_features)
        y_norm = model.predict(X_norm)
    yhat = int(normalizer.inverse_transform(y_norm[0])[0, 0])
    y = seq[-1] # last
    put = '{0} predicts {1:d}, expecting {2:d}'.format(', '.join(str(val) for val in seq[:-1]), yhat, y)
    print(put)

which produces sth like:

1, 2, 4, 5, 8, 10 predicts 11, expecting 16
1, 2, 1, 5, 5, 1, 11, 16 predicts 7, expecting 7

with ridiculous loss, however.

0 讨论(0)

北恋

2020-12-04 10:02

The trick to training and classifying sequences is training with masking and classifying using a stateful network. Here's an example that I made that classifies whether a sequence of variable length starts with zero or not.

import numpy as np
np.random.seed(1)

import tensorflow as tf
tf.set_random_seed(1)

from keras import models
from keras.layers import Dense, Masking, LSTM

import matplotlib.pyplot as plt


def stateful_model():
    hidden_units = 256

    model = models.Sequential()
    model.add(LSTM(hidden_units, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
    model.add(Dense(1, activation='relu', name='output'))

    model.compile(loss='binary_crossentropy', optimizer='rmsprop')

    return model


def train_rnn(x_train, y_train, max_len, mask):
    epochs = 10
    batch_size = 200

    vec_dims = 1
    hidden_units = 256
    in_shape = (max_len, vec_dims)

    model = models.Sequential()

    model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
    model.add(LSTM(hidden_units, return_sequences=False))
    model.add(Dense(1, activation='relu', name='output'))

    model.compile(loss='binary_crossentropy', optimizer='rmsprop')

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
              validation_split=0.05)

    return model


def gen_train_sig_cls_pair(t_stops, num_examples, mask):
    x = []
    y = []
    max_t = int(np.max(t_stops))

    for t_stop in t_stops:
        one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)

        sig = np.zeros((num_examples, max_t), dtype=np.int8)
        sig[one_indices, 0] = 1
        sig[:, t_stop:] = mask
        x.append(sig)

        cls = np.zeros(num_examples, dtype=np.bool)
        cls[one_indices] = 1
        y.append(cls)

    return np.concatenate(x, axis=0), np.concatenate(y, axis=0)


def gen_test_sig_cls_pair(t_stops, num_examples):
    x = []
    y = []

    for t_stop in t_stops:
        one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)

        sig = np.zeros((num_examples, t_stop), dtype=np.bool)
        sig[one_indices, 0] = 1
        x.extend(list(sig))

        cls = np.zeros((num_examples, t_stop), dtype=np.bool)
        cls[one_indices] = 1
        y.extend(list(cls))

    return x, y


if __name__ == '__main__':
    noise_mag = 0.01
    mask_val = -10
    signal_lengths = (10, 15, 20)

    x_in, y_in = gen_train_sig_cls_pair(signal_lengths, 10, mask_val)

    mod = train_rnn(x_in[:, :, None], y_in, int(np.max(signal_lengths)), mask_val)

    testing_dat, expected = gen_test_sig_cls_pair(signal_lengths, 3)

    state_mod = stateful_model()
    state_mod.set_weights(mod.get_weights())

    res = []
    for s_i in range(len(testing_dat)):
        seq_in = list(testing_dat[s_i])
        seq_len = len(seq_in)

        for t_i in range(seq_len):
            res.extend(state_mod.predict(np.array([[[seq_in[t_i]]]])))

        state_mod.reset_states()

    fig, axes = plt.subplots(2)
    axes[0].plot(np.concatenate(testing_dat), label="input")

    axes[1].plot(res, "ro", label="result", alpha=0.2)
    axes[1].plot(np.concatenate(expected, axis=0), "bo", label="expected", alpha=0.2)
    axes[1].legend(bbox_to_anchor=(1.1, 1))

    plt.show()

0 讨论(0)

猫巷女王i

2020-12-04 10:06

I am not clear about the embedding procedure. But still here is a way to implement a variable-length input LSTM. Just do not specify the timespan dimension when building LSTM.

import keras.backend as K
from keras.layers import LSTM, Input

I = Input(shape=(None, 200)) # unknown timespan, fixed feature size
lstm = LSTM(20)
f = K.function(inputs=[I], outputs=[lstm(I)])

import numpy as np
data1 = np.random.random(size=(1, 100, 200)) # batch_size = 1, timespan = 100
print f([data1])[0].shape
# (1, 20)

data2 = np.random.random(size=(1, 314, 200)) # batch_size = 1, timespan = 314
print f([data2])[0].shape
# (1, 20)

0 讨论(0)