Computing the gradient of the loss using Tensorflow.js

问题

I am trying to compute the gradient of a loss, with relation to a network's trainable weights using Tensorflow.js in order to apply these gradients to my network's weight. In python, this is easily done using the tf.gradients() functions, which takes two minimum inputs representing dx and dy. However, I am not able to reproduce the behavior in Tensorflow.js. I am not sure wether my understanding of the gradient of the loss w.r.t the weights is wrong, or if my code contains mistakes.

I have spent some time analysing the core code of the tfjs-node package to understand how it is done when we call the function tf.model.fit(), but with little success so far.

let model = build_model(); //Two stacked dense layers followed by two parallel dense layers for the output
let loss = compute_loss(...); //This function returns a tf.Tensor of shape [1] containing the mean loss for the batch.
const f = () => loss;
const grad = tf.variableGrads(f);
grad(model.getWeights());

The model.getWeights() function returns an array of tf.variable(), so I assumed the function would compute dL/dW for each layer, which I could apply later to my network's weights, however, that's not quite the case as I get this error :

Error: Cannot compute gradient of y=f(x) with respect to x. Make sure that the f you passed encloses all operations that lead from x to y.

I don't quite understand what does this error means. How am I supposed to compute the gradient (analog to tf.gradients() in Python) of the loss using Tensorflow.js then ?

Edit : This is the function computing the loss :

function compute_loss(done, new_state, memory, agent, gamma=0.99) {
    let reward_sum = 0.;
    if(done) {
        reward_sum = 0.;
    } else {
        reward_sum = agent.call(tf.oneHot(new_state, 12).reshape([1, 9, 12]))
                    .values.flatten().get(0);
    }

    let discounted_rewards = [];
    let memory_reward_rev = memory.rewards;
    for(let reward of memory_reward_rev.reverse()) {
        reward_sum = reward + gamma * reward_sum;
        discounted_rewards.push(reward_sum);
    }
    discounted_rewards.reverse();

    let onehot_states = [];
    for(let state of memory.states) {
        onehot_states.push(tf.oneHot(state, 12));
    }
    let init_onehot = onehot_states[0];

    for(let i=1; i<onehot_states.length;i++) {
        init_onehot = init_onehot.concat(onehot_states[i]);
    }

    let log_val = agent.call(
        init_onehot.reshape([memory.states.length, 9, 12])
    );

    let disc_reward_tensor = tf.tensor(discounted_rewards);
    let advantage = disc_reward_tensor.reshapeAs(log_val.values).sub(log_val.values);
    let value_loss = advantage.square();
    log_val.values.print();

    let policy = tf.softmax(log_val.logits);
    let logits_cpy = log_val.logits.clone();

    let entropy = policy.mul(logits_cpy.mul(tf.scalar(-1))); 
    entropy = entropy.sum();

    let memory_actions = [];
    for(let i=0; i< memory.actions.length; i++) {
        memory_actions.push(new Array(2000).fill(0));
        memory_actions[i][memory.actions[i]] = 1;
    }
    memory_actions = tf.tensor(memory_actions);
    let policy_loss = tf.losses.softmaxCrossEntropy(memory_actions.reshape([memory.actions.length, 2000]), log_val.logits);

    let value_loss_copy = value_loss.clone();
    let entropy_mul = (entropy.mul(tf.scalar(0.01))).mul(tf.scalar(-1));
    let total_loss_1 = value_loss_copy.mul(tf.scalar(0.5, dtype='float32'));

    let total_loss_2 = total_loss_1.add(policy_loss);
    let total_loss = total_loss_2.add(entropy_mul);
    total_loss.print();
    return total_loss.mean();

}

EDIT 2:

I managed to use the compute_loss as the loss function specified on model.compile(). But then, it is required that it takes only two inputs (predictions, labels), so it's not working out for me, as I want to input multiple parameters.

I am trully lost on the matter.

回答1:

The error says it all. Your issue has to do with tf.variableGrads. loss should be a scalar computed using all available tf tensors operators. loss should not return a tensor as indicated in your question.

Here is an example of what loss should be:

const a = tf.variable(tf.tensor1d([3, 4]));
const b = tf.variable(tf.tensor1d([5, 6]));
const x = tf.tensor1d([1, 2]);

const f = () => a.mul(x.square()).add(b.mul(x)).sum(); // f is a function
// df/da = x ^ 2, df/db = x 
const {value, grads} = tf.variableGrads(f); // gradient of f as respect of each variable

Object.keys(grads).forEach(varName => grads[varName].print());

/!\ Notice that the gradient is calculated as respect of variables created using tf.variable

Update:

You're not computing the gradients as it should be. Here is the fix.

function compute_loss(done, new_state, memory, agent, gamma=0.99) {
    const f = () => { let reward_sum = 0.;
    if(done) {
        reward_sum = 0.;
    } else {
        reward_sum = agent.call(tf.oneHot(new_state, 12).reshape([1, 9, 12]))
                    .values.flatten().get(0);
    }

    let discounted_rewards = [];
    let memory_reward_rev = memory.rewards;
    for(let reward of memory_reward_rev.reverse()) {
        reward_sum = reward + gamma * reward_sum;
        discounted_rewards.push(reward_sum);
    }
    discounted_rewards.reverse();

    let onehot_states = [];
    for(let state of memory.states) {
        onehot_states.push(tf.oneHot(state, 12));
    }
    let init_onehot = onehot_states[0];

    for(let i=1; i<onehot_states.length;i++) {
        init_onehot = init_onehot.concat(onehot_states[i]);
    }

    let log_val = agent.call(
        init_onehot.reshape([memory.states.length, 9, 12])
    );

    let disc_reward_tensor = tf.tensor(discounted_rewards);
    let advantage = disc_reward_tensor.reshapeAs(log_val.values).sub(log_val.values);
    let value_loss = advantage.square();
    log_val.values.print();

    let policy = tf.softmax(log_val.logits);
    let logits_cpy = log_val.logits.clone();

    let entropy = policy.mul(logits_cpy.mul(tf.scalar(-1))); 
    entropy = entropy.sum();

    let memory_actions = [];
    for(let i=0; i< memory.actions.length; i++) {
        memory_actions.push(new Array(2000).fill(0));
        memory_actions[i][memory.actions[i]] = 1;
    }
    memory_actions = tf.tensor(memory_actions);
    let policy_loss = tf.losses.softmaxCrossEntropy(memory_actions.reshape([memory.actions.length, 2000]), log_val.logits);

    let value_loss_copy = value_loss.clone();
    let entropy_mul = (entropy.mul(tf.scalar(0.01))).mul(tf.scalar(-1));
    let total_loss_1 = value_loss_copy.mul(tf.scalar(0.5, dtype='float32'));

    let total_loss_2 = total_loss_1.add(policy_loss);
    let total_loss = total_loss_2.add(entropy_mul);
    total_loss.print();
    return total_loss.mean().asScalar();
}

return tf.variableGrads(f);
}

Notice that you can quickly run into a memory consumption issue. It will advisable to surround the function differentiated with tf.tidy to dispose of the tensors.

来源：https://stackoverflow.com/questions/54728772/computing-the-gradient-of-the-loss-using-tensorflow-js

标签

javascript

node.js

deep-learning

tensorflow.js