How do I create a variable-length input LSTM in Keras?

前端 未结 3 439
悲&欢浪女
悲&欢浪女 2020-12-04 09:08

I am trying to do some vanilla pattern recognition with an LSTM using Keras to predict the next element in a sequence.

My data look like this:

where

相关标签:
3条回答
  • 2020-12-04 09:54

    Not sure how applicable recurrent networks are for your sequences, ie how strongly dependent each element is on its preceding sequence as opposed to other factors. That being said (which doesn't help you one bit of course), if you don't want to pad your input with some bad value, a stateful model that processes a single timestep at once is the only alternative for variable length sequences IMHO. If you don't mind taking an alternative approach to encoding:

    import numpy as np
    import keras.models as kem
    import keras.layers as kel
    import keras.callbacks as kec
    import sklearn.preprocessing as skprep
    
    X_train, max_features = {'Sequence': [[1, 2, 4, 5, 8, 10, 16], [1, 2, 1, 5, 5, 1, 11, 16, 7]]}, 16
    
    num_mem_units = 64
    size_batch = 1
    num_timesteps = 1
    num_features = 1
    num_targets = 1
    num_epochs = 1500
    
    model = kem.Sequential()
    model.add(kel.LSTM(num_mem_units, stateful=True,  batch_input_shape=(size_batch, num_timesteps, num_features),
      return_sequences=True))
    model.add(kel.Dense(num_targets, activation='sigmoid'))
    model.summary()
    model.compile(loss='binary_crossentropy', optimizer='adam')
    
    range_act = (0, 1) # sigmoid
    range_features = np.array([0, max_features]).reshape(-1, 1)
    normalizer = skprep.MinMaxScaler(feature_range=range_act)
    normalizer.fit(range_features)
    
    reset_state = kec.LambdaCallback(on_epoch_end=lambda *_ : model.reset_states())
    
    # training
    for seq in X_train['Sequence']:
        X = seq[:-1]
        y = seq[1:] # predict next element
        X_norm = normalizer.transform(np.array(X).reshape(-1, 1)).reshape(-1, num_timesteps, num_features)
        y_norm = normalizer.transform(np.array(y).reshape(-1, 1)).reshape(-1, num_timesteps, num_targets)
        model.fit(X_norm, y_norm, epochs=num_epochs, batch_size=size_batch, shuffle=False,
          callbacks=[reset_state])
    
    # prediction
    for seq in X_train['Sequence']:
        model.reset_states() 
        for istep in range(len(seq)-1): # input up to not incl last
            val = seq[istep]
            X = np.array([val]).reshape(-1, 1)
            X_norm = normalizer.transform(X).reshape(-1, num_timesteps, num_features)
            y_norm = model.predict(X_norm)
        yhat = int(normalizer.inverse_transform(y_norm[0])[0, 0])
        y = seq[-1] # last
        put = '{0} predicts {1:d}, expecting {2:d}'.format(', '.join(str(val) for val in seq[:-1]), yhat, y)
        print(put)
    

    which produces sth like:

    1, 2, 4, 5, 8, 10 predicts 11, expecting 16
    1, 2, 1, 5, 5, 1, 11, 16 predicts 7, expecting 7
    

    with ridiculous loss, however.

    0 讨论(0)
  • 2020-12-04 10:02

    The trick to training and classifying sequences is training with masking and classifying using a stateful network. Here's an example that I made that classifies whether a sequence of variable length starts with zero or not.

    import numpy as np
    np.random.seed(1)
    
    import tensorflow as tf
    tf.set_random_seed(1)
    
    from keras import models
    from keras.layers import Dense, Masking, LSTM
    
    import matplotlib.pyplot as plt
    
    
    def stateful_model():
        hidden_units = 256
    
        model = models.Sequential()
        model.add(LSTM(hidden_units, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
        model.add(Dense(1, activation='relu', name='output'))
    
        model.compile(loss='binary_crossentropy', optimizer='rmsprop')
    
        return model
    
    
    def train_rnn(x_train, y_train, max_len, mask):
        epochs = 10
        batch_size = 200
    
        vec_dims = 1
        hidden_units = 256
        in_shape = (max_len, vec_dims)
    
        model = models.Sequential()
    
        model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
        model.add(LSTM(hidden_units, return_sequences=False))
        model.add(Dense(1, activation='relu', name='output'))
    
        model.compile(loss='binary_crossentropy', optimizer='rmsprop')
    
        model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
                  validation_split=0.05)
    
        return model
    
    
    def gen_train_sig_cls_pair(t_stops, num_examples, mask):
        x = []
        y = []
        max_t = int(np.max(t_stops))
    
        for t_stop in t_stops:
            one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
    
            sig = np.zeros((num_examples, max_t), dtype=np.int8)
            sig[one_indices, 0] = 1
            sig[:, t_stop:] = mask
            x.append(sig)
    
            cls = np.zeros(num_examples, dtype=np.bool)
            cls[one_indices] = 1
            y.append(cls)
    
        return np.concatenate(x, axis=0), np.concatenate(y, axis=0)
    
    
    def gen_test_sig_cls_pair(t_stops, num_examples):
        x = []
        y = []
    
        for t_stop in t_stops:
            one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
    
            sig = np.zeros((num_examples, t_stop), dtype=np.bool)
            sig[one_indices, 0] = 1
            x.extend(list(sig))
    
            cls = np.zeros((num_examples, t_stop), dtype=np.bool)
            cls[one_indices] = 1
            y.extend(list(cls))
    
        return x, y
    
    
    if __name__ == '__main__':
        noise_mag = 0.01
        mask_val = -10
        signal_lengths = (10, 15, 20)
    
        x_in, y_in = gen_train_sig_cls_pair(signal_lengths, 10, mask_val)
    
        mod = train_rnn(x_in[:, :, None], y_in, int(np.max(signal_lengths)), mask_val)
    
        testing_dat, expected = gen_test_sig_cls_pair(signal_lengths, 3)
    
        state_mod = stateful_model()
        state_mod.set_weights(mod.get_weights())
    
        res = []
        for s_i in range(len(testing_dat)):
            seq_in = list(testing_dat[s_i])
            seq_len = len(seq_in)
    
            for t_i in range(seq_len):
                res.extend(state_mod.predict(np.array([[[seq_in[t_i]]]])))
    
            state_mod.reset_states()
    
        fig, axes = plt.subplots(2)
        axes[0].plot(np.concatenate(testing_dat), label="input")
    
        axes[1].plot(res, "ro", label="result", alpha=0.2)
        axes[1].plot(np.concatenate(expected, axis=0), "bo", label="expected", alpha=0.2)
        axes[1].legend(bbox_to_anchor=(1.1, 1))
    
        plt.show()
    
    0 讨论(0)
  • 2020-12-04 10:06

    I am not clear about the embedding procedure. But still here is a way to implement a variable-length input LSTM. Just do not specify the timespan dimension when building LSTM.

    import keras.backend as K
    from keras.layers import LSTM, Input
    
    I = Input(shape=(None, 200)) # unknown timespan, fixed feature size
    lstm = LSTM(20)
    f = K.function(inputs=[I], outputs=[lstm(I)])
    
    import numpy as np
    data1 = np.random.random(size=(1, 100, 200)) # batch_size = 1, timespan = 100
    print f([data1])[0].shape
    # (1, 20)
    
    data2 = np.random.random(size=(1, 314, 200)) # batch_size = 1, timespan = 314
    print f([data2])[0].shape
    # (1, 20)
    
    0 讨论(0)
提交回复
热议问题