Efficiently grab gradients from TensorFlow?

后端未结

关注

 2  414

I\'m trying to implement an asynchronous parameter server, DistBelief style using TensorFlow. I found that minimize() is split into two functions, compute_gradients and appl

相关标签:

2条回答

爱一瞬间的悲伤

2020-12-24 04:10

Just give you a simple example. Understand it and try your specific task out.

Initialize required symbols.

x = tf.Variable(0.5)
y = x*x
opt = tf.train.AdagradOptimizer(0.1)
grads = opt.compute_gradients(y)
grad_placeholder = [(tf.placeholder("float", shape=grad[1].get_shape()), grad[1] for grad in grads]
apply_placeholder_op = opt.apply_gradients(grad_placeholder)
transform_grads = [(function1(grad[0]), grad[1]) for grad in grads]
apply_transform_op = opt.apply_gradients(transform_grads)

Initialize

sess = tf.Session()
sess.run(tf.initialize_all_variables())

Get all gradients

grad_vals = sess.run([grad[0] for grad in grads])

Apply gradients

feed_dict = {}
for i in xrange(len(grad_placeholder)):
    feed_dict[grad_placeholder[i][0]] = function2(grad_vals[i])
sess.run(apply_placeholder_op, feed_dict=feed_dict)
sess.run(apply_transform_op)

Note: the code hasn't been tested by myself, but I confirm the code is legal except minor code errors. Note: function1 and function2 is kind of computation, such as 2*x, x^e or e^x and so on.

Refer: TensorFlow apply_gradients remotely

0 讨论(0)

一向

2020-12-24 04:19

I coded up a very simple example with comments (inspired from the above answer) that is runnable to see gradient descent in action:

import tensorflow as tf

#funciton to transform gradients
def T(g, decay=1.0):
    #return decayed gradient
    return decay*g

# x variable
x = tf.Variable(10.0,name='x')
# b placeholder (simualtes the "data" part of the training)
b = tf.placeholder(tf.float32)
# make model (1/2)(x-b)^2
xx_b = 0.5*tf.pow(x-b,2)
y=xx_b

learning_rate = 1.0
opt = tf.train.GradientDescentOptimizer(learning_rate)
# gradient variable list = [ (gradient,variable) ]
gv = opt.compute_gradients(y,[x])
# transformed gradient variable list = [ (T(gradient),variable) ]
decay = 0.1 # decay the gradient for the sake of the example
tgv = [(T(g,decay=decay),v) for (g,v) in gv] #list [(grad,var)]
# apply transformed gradients (this case no transform)
apply_transform_op = opt.apply_gradients(tgv)
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    epochs = 10
    for i in range(epochs):
        b_val = 1.0 #fake data (in SGD it would be different on every epoch)
        print '----'
        x_before_update = x.eval()
        print 'before update',x_before_update

        # compute gradients
        grad_vals = sess.run([g for (g,v) in gv], feed_dict={b: b_val})
        print 'grad_vals: ',grad_vals
        # applies the gradients
        result = sess.run(apply_transform_op, feed_dict={b: b_val})

        print 'value of x should be: ', x_before_update - T(grad_vals[0], decay=decay)
        x_after_update = x.eval()
        print 'after update', x_after_update

you can observe the change in the variable as its trained and also the value of the gradient. Note that the only reason T decays the gradient because otherwise it reaches the global minimum in 1 step.

As an extra bonus, if you want to see it work with tensorboard, here you go! :)

## run cmd to collect model: python quadratic_minimizer.py --logdir=/tmp/quaratic_temp
## show board on browser run cmd: tensorboard --logdir=/tmp/quaratic_temp
## browser: http://localhost:6006/

import tensorflow as tf

#funciton to transform gradients
def T(g, decay=1.0):
    #return decayed gradient
    return decay*g

# x variable
x = tf.Variable(10.0,name='x')
# b placeholder (simualtes the "data" part of the training)
b = tf.placeholder(tf.float32)
# make model (1/2)(x-b)^2
xx_b = 0.5*tf.pow(x-b,2)
y=xx_b

learning_rate = 1.0
opt = tf.train.GradientDescentOptimizer(learning_rate)
# gradient variable list = [ (gradient,variable) ]
gv = opt.compute_gradients(y,[x])
# transformed gradient variable list = [ (T(gradient),variable) ]
decay = 0.9 # decay the gradient for the sake of the example
tgv = [ (T(g,decay=decay), v) for (g,v) in gv] #list [(grad,var)]
# apply transformed gradients (this case no transform)
apply_transform_op = opt.apply_gradients(tgv)

(dydx,_) = tgv[0]
x_scalar_summary = tf.scalar_summary("x", x)
grad_scalar_summary = tf.scalar_summary("dydx", dydx)

with tf.Session() as sess:
    merged = tf.merge_all_summaries()
    tensorboard_data_dump = '/tmp/quaratic_temp'
    writer = tf.train.SummaryWriter(tensorboard_data_dump, sess.graph)

    sess.run(tf.initialize_all_variables())
    epochs = 14
    for i in range(epochs):
        b_val = 1.0 #fake data (in SGD it would be different on every epoch)
        print '----'
        x_before_update = x.eval()
        print 'before update',x_before_update

        # get gradients
        #grad_list = [g for (g,v) in gv]
        (summary_str_grad,grad_val) = sess.run([merged] + [dydx], feed_dict={b: b_val})
        grad_vals = sess.run([g for (g,v) in gv], feed_dict={b: b_val})
        print 'grad_vals: ',grad_vals
        writer.add_summary(summary_str_grad, i)

        # applies the gradients
        [summary_str_apply_transform,_] = sess.run([merged,apply_transform_op], feed_dict={b: b_val})
        writer.add_summary(summary_str_apply_transform, i)

        print 'value of x after update should be: ', x_before_update - T(grad_vals[0], decay=decay)
        x_after_update = x.eval()
        print 'after update', x_after_update

0 讨论(0)