Custom loss function involving gradients in Keras/Tensorflow

。_饼干妹妹 提交于 2021-01-02 06:36:25

问题


I've seen that this question has been asked a few times before, but without any resolution. My problem is simple: I would like to implement a loss function which computes the MSE between the gradient of the prediction and the truth value (eventually moving on to much more complicated loss functions).

I define the following two functions:

def my_loss(y_true, y_pred, x):
    dydx = K.gradients(y_pred, x)
    return K.mean(K.square(dydx - y_true), axis=-1)

def my_loss_function(x):
    def gradLoss(y_true, y_pred):
        return my_loss(y_true, y_pred, x)
    return gradLoss

Then, in my model I call

model_loss = my_loss_function(x)
model.compile(optimizer=Adam(lr=0.01),
              loss=model_loss)

but I receive the following error:

ValueError: An operation hasNonefor gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.

For reference, the entire code is included below. What is the proper way to implement such a loss function?

import tensorflow as tf
from tensorflow import keras

import numpy as np
import math, random
import matplotlib.pyplot as plt

from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam

##################################################################################
# set neural network parameters
#     \param [in] NUM_HIDDEN_NODES: neurons per hidden layer
#     \param [in] NUM_EXAMPLES:     total number of examples
#     \param [in] TRAIN_SPLIT:      proportion of examples that are for training
#     \param [in] MINI_BATCH_SIZE:  batch size for optimization step
#     \param [in] NUM_EPOCHS:       iterations for training
##################################################################################
NUM_HIDDEN_NODES = 10
NUM_EXAMPLES = 500
TRAIN_SPLIT = .8
MINI_BATCH_SIZE = 100
NUM_EPOCHS = 400


##################################################################################
# define the function to approximate
#     \param [in] x: list of inputs to evaluate function at
##################################################################################
def my_function(x):
    return np.sin(x)


##################################################################################
# generate training and test data according to TRAIN_SPLIT
#     \param [in] start:    starting point for the data
#     \param [in] end:      ending point for the data
#     \param [out] x_train: data on which the neural network will be trained on
#     \param [out] y_train: data on which the neural network will be trained on
#     \param [out] x_test:  data on which the trained neural network will be 
#                           validated
#     \param [out] y_test:  data on which the trained neural network will be 
#                           validated
##################################################################################
def create_data(start, end):
    x = np.float32(np.random.uniform(start, end, (1, NUM_EXAMPLES))).T
    y = my_function(x)

    train_size = int(NUM_EXAMPLES*TRAIN_SPLIT)
    x_train = x[:train_size]
    x_test = x[train_size:]
    y_train = my_function(x_train)
    y_test = my_function(x_test)

    return (x_train, y_train, x_test, y_test)


from keras import backend as K

def my_loss(y_true, y_pred, x):
    dydx = K.gradients(y_pred, x)
    return K.mean(K.square(dydx - y_true), axis=-1)

def my_loss_function(x):
    def gradLoss(y_true, y_pred):
        return my_loss(y_true, y_pred, x)
    return gradLoss

##################################################################################
# generate the neural network model
##################################################################################
def create_model():
    x = Input(shape=(1, ))

    # hidden layers with tanh activation function
    h1 = Dense(10, activation="tanh")(x)
    h2 = Dense(10, activation="tanh")(h1)
    h3 = Dense(10, activation="tanh")(h2)

    # linear activation layer
    y = Dense(1, activation="linear")(h3)

    model = Model(x, y)

    model_loss = my_loss_function(x)

    model.compile(optimizer=Adam(lr=0.01),
                  loss=model_loss)

    return model


##################################################################################
# routine for training the neural network
#     \param [out] x_train: data on which the neural network will be trained on
#     \param [out] y_train: data on which the neural network will be trained on
#     \param [out] x_test:  data on which the trained neural network will be 
#                           validated
#     \param [out] y_test:  data on which the trained neural network will be 
#                           validated
#     \param [out] model:   the training neural network model
##################################################################################
def train(x_train, y_train, x_test, y_test):
    model = create_model()

    model.fit(x_train, y_train, 
              epochs=NUM_EPOCHS, 
              batch_size=MINI_BATCH_SIZE, 
              validation_data=[x_test, y_test]
              )

    return model

# generate training and test data and train the neural network
x_train, y_train, x_test, y_test = create_data(-2.0*math.pi, 2.0*math.pi)
model = train(x_train, y_train, x_test, y_test)


##################################################################################
# use the neural network model to compute the function at test data
#     \param [in] model: the trained neural network model
#     \param [in] x:     x values at which to test the trained model
##################################################################################
def predict_targets(model, x):
    return model.predict(x)


##################################################################################
# plot the exact data against the predicted data
#     \param [in] x: x data
#     \param [in] y_true: exact y data
#     \param [in] y_pred: neural network prediction
##################################################################################
def plot_predictions(x, y_true, y_pred):
    plt.figure(1)
    plt.plot(x, y_true)
    plt.plot(x, y_pred)
    plt.xlabel('x')
    plt.ylabel('y')
    plt.show()


# plot neural network prediction on the original training data
predictions = predict_targets(model, x_train)
indexes = list(range(len(x_train)))
indexes.sort(key=x_train.__getitem__)
x_train = list(map(x_train.__getitem__, indexes))
y_train = list(map(y_train.__getitem__, indexes))
predictions = list(map(predictions.__getitem__, indexes))
plot_predictions(x_train, y_train, predictions)


# plot neural network prediction on the validation/test data
x = np.linspace(-2*math.pi, 2*math.pi, 1000)
y = my_function(x)
predictions = predict_targets(model, x)
plot_predictions(x, y, predictions)

来源:https://stackoverflow.com/questions/56138334/custom-loss-function-involving-gradients-in-keras-tensorflow

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!