Tying Autoencoder Weights in a Dense Keras Layer

徘徊边缘 提交于 2019-12-04 18:09:34

So after showing my approach, my question is, is this a valid way to tie weights in a Dense Keras layer?

Yes, it's valid.

My fear is that this will only set them equal when the model is build, but not actually tie them. My hope is that the backend transpose function is tying them through references under the hood, but I am sure that I am missing something.

It actually ties them in a computation graph, you can check in printing model.summary() that there's just one copy of these trainable weights. Also, after training your model you can check weights of corresponding layers with model.get_weights(). When the model is build there're no weights yet actually, just placeholders for them.

random.seed(1)

class DenseTied(Layer):
    def __init__(self, units,
                 activation=None,
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 tied_to=None,
                 **kwargs):
        self.tied_to = tied_to
        if 'input_shape' not in kwargs and 'input_dim' in kwargs:
            kwargs['input_shape'] = (kwargs.pop('input_dim'),)
        super().__init__(**kwargs)
        self.units = units
        self.activation = activations.get(activation)
        self.use_bias = use_bias
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)
        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)
        self.kernel_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)
        self.input_spec = InputSpec(min_ndim=2)
        self.supports_masking = True

    def build(self, input_shape):
        assert len(input_shape) >= 2
        input_dim = input_shape[-1]

        if self.tied_to is not None:
            self.kernel = K.transpose(self.tied_to.kernel)
            self._non_trainable_weights.append(self.kernel)
        else:
            self.kernel = self.add_weight(shape=(input_dim, self.units),
                                          initializer=self.kernel_initializer,
                                          name='kernel',
                                          regularizer=self.kernel_regularizer,
                                          constraint=self.kernel_constraint)
        if self.use_bias:
            self.bias = self.add_weight(shape=(self.units,),
                                        initializer=self.bias_initializer,
                                        name='bias',
                                        regularizer=self.bias_regularizer,
                                        constraint=self.bias_constraint)
        else:
            self.bias = None

        self.built = True

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) >= 2
        assert input_shape[-1] == self.units
        output_shape = list(input_shape)
        output_shape[-1] = self.units
        return tuple(output_shape)

    def call(self, inputs):
        output = K.dot(inputs, self.kernel)
        if self.use_bias:
            output = K.bias_add(output, self.bias, data_format='channels_last')
        if self.activation is not None:
            output = self.activation(output)
        return output


# input_ = Input(shape=(16,), dtype=np.float32)
# encoder
#
encoded1 = Dense(4, activation="sigmoid", input_shape=(4,), use_bias=True)
decoded1 = DenseTied(4, activation="sigmoid", tied_to=encoded1, use_bias=False)

# autoencoder
#
autoencoder = Sequential()
# autoencoder.add(input_)
autoencoder.add(encoded1)
autoencoder.add(decoded1)

autoencoder.compile(optimizer="adam", loss="binary_crossentropy")

print(autoencoder.summary())

autoencoder.fit(x=np.random.rand(100, 4), y=np.random.randint(0, 1, size=(100, 4)))

print(autoencoder.layers[0].get_weights()[0])
print(autoencoder.layers[1].get_weights()[0])
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!