Efficiently grab gradients from TensorFlow?

后端 未结 2 417
小鲜肉
小鲜肉 2020-12-24 03:20

I\'m trying to implement an asynchronous parameter server, DistBelief style using TensorFlow. I found that minimize() is split into two functions, compute_gradients and appl

2条回答
  •  一向
    一向 (楼主)
    2020-12-24 04:19

    I coded up a very simple example with comments (inspired from the above answer) that is runnable to see gradient descent in action:

    import tensorflow as tf
    
    #funciton to transform gradients
    def T(g, decay=1.0):
        #return decayed gradient
        return decay*g
    
    # x variable
    x = tf.Variable(10.0,name='x')
    # b placeholder (simualtes the "data" part of the training)
    b = tf.placeholder(tf.float32)
    # make model (1/2)(x-b)^2
    xx_b = 0.5*tf.pow(x-b,2)
    y=xx_b
    
    learning_rate = 1.0
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    # gradient variable list = [ (gradient,variable) ]
    gv = opt.compute_gradients(y,[x])
    # transformed gradient variable list = [ (T(gradient),variable) ]
    decay = 0.1 # decay the gradient for the sake of the example
    tgv = [(T(g,decay=decay),v) for (g,v) in gv] #list [(grad,var)]
    # apply transformed gradients (this case no transform)
    apply_transform_op = opt.apply_gradients(tgv)
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        epochs = 10
        for i in range(epochs):
            b_val = 1.0 #fake data (in SGD it would be different on every epoch)
            print '----'
            x_before_update = x.eval()
            print 'before update',x_before_update
    
            # compute gradients
            grad_vals = sess.run([g for (g,v) in gv], feed_dict={b: b_val})
            print 'grad_vals: ',grad_vals
            # applies the gradients
            result = sess.run(apply_transform_op, feed_dict={b: b_val})
    
            print 'value of x should be: ', x_before_update - T(grad_vals[0], decay=decay)
            x_after_update = x.eval()
            print 'after update', x_after_update
    

    you can observe the change in the variable as its trained and also the value of the gradient. Note that the only reason T decays the gradient because otherwise it reaches the global minimum in 1 step.


    As an extra bonus, if you want to see it work with tensorboard, here you go! :)

    ## run cmd to collect model: python quadratic_minimizer.py --logdir=/tmp/quaratic_temp
    ## show board on browser run cmd: tensorboard --logdir=/tmp/quaratic_temp
    ## browser: http://localhost:6006/
    
    import tensorflow as tf
    
    #funciton to transform gradients
    def T(g, decay=1.0):
        #return decayed gradient
        return decay*g
    
    # x variable
    x = tf.Variable(10.0,name='x')
    # b placeholder (simualtes the "data" part of the training)
    b = tf.placeholder(tf.float32)
    # make model (1/2)(x-b)^2
    xx_b = 0.5*tf.pow(x-b,2)
    y=xx_b
    
    learning_rate = 1.0
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    # gradient variable list = [ (gradient,variable) ]
    gv = opt.compute_gradients(y,[x])
    # transformed gradient variable list = [ (T(gradient),variable) ]
    decay = 0.9 # decay the gradient for the sake of the example
    tgv = [ (T(g,decay=decay), v) for (g,v) in gv] #list [(grad,var)]
    # apply transformed gradients (this case no transform)
    apply_transform_op = opt.apply_gradients(tgv)
    
    (dydx,_) = tgv[0]
    x_scalar_summary = tf.scalar_summary("x", x)
    grad_scalar_summary = tf.scalar_summary("dydx", dydx)
    
    with tf.Session() as sess:
        merged = tf.merge_all_summaries()
        tensorboard_data_dump = '/tmp/quaratic_temp'
        writer = tf.train.SummaryWriter(tensorboard_data_dump, sess.graph)
    
        sess.run(tf.initialize_all_variables())
        epochs = 14
        for i in range(epochs):
            b_val = 1.0 #fake data (in SGD it would be different on every epoch)
            print '----'
            x_before_update = x.eval()
            print 'before update',x_before_update
    
            # get gradients
            #grad_list = [g for (g,v) in gv]
            (summary_str_grad,grad_val) = sess.run([merged] + [dydx], feed_dict={b: b_val})
            grad_vals = sess.run([g for (g,v) in gv], feed_dict={b: b_val})
            print 'grad_vals: ',grad_vals
            writer.add_summary(summary_str_grad, i)
    
            # applies the gradients
            [summary_str_apply_transform,_] = sess.run([merged,apply_transform_op], feed_dict={b: b_val})
            writer.add_summary(summary_str_apply_transform, i)
    
            print 'value of x after update should be: ', x_before_update - T(grad_vals[0], decay=decay)
            x_after_update = x.eval()
            print 'after update', x_after_update
    

提交回复
热议问题