Tensorflow. Nonlinear regression

南楼画角 提交于 2019-12-10 13:45:56

问题


I have these feature and label, that are not linear enough to be satisfied with linear solution. I trained SVR(kernel='rbf') model from sklearn, but now its time to do it with tensorflow, and its hard to say what one should write to achieve same or better effect.

Do you see that lazy orange line down there? It doesn't fill you with determination

code itself:

import pandas as pd
import numpy as np
import tensorflow as tf
import tqdm
import matplotlib.pyplot as plt
from omnicomm_data.test_data import get_model, clean_df
import os
from sklearn import preprocessing

graph = tf.get_default_graph()

# tf variables
x_ = tf.placeholder(name="input", shape=[None, 1], dtype=np.float32)
y_ = tf.placeholder(name="output", shape=[None, 1], dtype=np.float32)
w = tf.Variable(tf.random_normal([]), name='weight')
b = tf.Variable(tf.random_normal([]), name='bias')
lin_model = tf.add(tf.multiply(x_, w), b)

#loss
loss = tf.reduce_mean(tf.pow(lin_model - y_, 2), name='loss')
train_step = tf.train.GradientDescentOptimizer(0.000000025).minimize(loss)

#nonlinear part
nonlin_model = tf.tanh(tf.add(tf.multiply(x_, w), b))
nonlin_loss = tf.reduce_mean(tf.pow(nonlin_model - y_, 2), name='cost')
train_step_nonlin = tf.train.GradientDescentOptimizer(0.000000025).minimize(nonlin_loss)       


# pandas data
df_train = pd.read_csv('me_rate.csv', header=None)

liters = df_train.iloc[:, 0].values.reshape(-1, 1)
parrots = df_train.iloc[:, 1].values.reshape(-1, 1)

#model for prediction
mms = preprocessing.MinMaxScaler()
rbf = get_model(path_to_model)


n_epochs = 200
train_errors = []
non_train_errors = []
test_errors = []

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in tqdm.tqdm(range(n_epochs)):

        _, train_err, summ = sess.run([train_step, loss, summaries],
                                feed_dict={x_: parrots, y_: liters})
        summary_writer.add_summary(summ, i)
        train_errors.append(train_err)

        _, non_train_err, = sess.run([train_step_nonlin, nonlin_loss],
                                      feed_dict={x_: parrots, y_: liters})
        non_train_errors.append(non_train_err)


    plt.plot(list(range(n_epochs)), train_errors, label='train_lin')
    plt.plot(list(range(n_epochs)), non_train_errors, label='train_nonlin')
    plt.legend()
    print(train_errors[:10])
    print(non_train_errors[:10])
    plt.show()

    plt.scatter(parrots, liters, label='actual data')
    plt.plot(parrots, sess.run(lin_model, feed_dict={x_: parrots}), label='linear (tf)')
    plt.plot(parrots, sess.run(nonlin_model, feed_dict={x_: parrots}), label='nonlinear (tf)')
    plt.plot(parrots, rbf.predict(mms.fit_transform(parrots)), label='rbf (sklearn)')
    plt.legend()
    plt.show()

How to motivate that orange line?

Part After.

Code looks like:

import pandas as pd
import numpy as np
import tensorflow as tf
import tqdm
import matplotlib.pyplot as plt
from omnicomm_data.test_data import get_model
import os
from sklearn import preprocessing

graph = tf.get_default_graph()

# tf variables
x_ = tf.placeholder(name="input", shape=[None, 1], dtype=np.float32)
y_ = tf.placeholder(name="output", shape=[None, 1], dtype=np.float32)
w = tf.Variable(tf.random_normal([]), name='weight')
b = tf.Variable(tf.random_normal([]), name='bias')

# nonlinear
nonlin_model = tf.add(tf.multiply(tf.tanh(x_), w), b)
nonlin_loss = tf.reduce_mean(tf.pow(nonlin_model - y_, 2), name='cost')
train_step_nonlin = tf.train.GradientDescentOptimizer(0.01).minimize(nonlin_loss)


# pandas data
df_train = pd.read_csv('me_rate.csv', header=None)


liters = df_train.iloc[:, 0].values.reshape(-1, 1)
parrots = df_train.iloc[:, 1].values.reshape(-1, 1)


#model for prediction
mms = preprocessing.MinMaxScaler()
rbf = get_model(path_to_model)


nz = preprocessing.MaxAbsScaler()  # normalization coz tanh
norm_parrots = nz.fit_transform(parrots)
print(norm_parrots)

n_epochs = 20000
train_errors = []
non_train_errors = []
test_errors = []
weights = []
biases = []

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in tqdm.tqdm(range(n_epochs)):

        _, non_train_err, weight, bias = sess.run([train_step_nonlin, nonlin_loss, w, b],
                                      feed_dict={x_: norm_parrots, y_: liters})
        non_train_errors.append(non_train_err)
        weights.append(weight)
        biases.append(bias)


    plt.scatter(norm_parrots, liters, label='actual data')

    plt.plot(norm_parrots, sess.run(nonlin_model, feed_dict={x_: norm_parrots}), c='orange', label='nonlinear (tf)')
    plt.plot(norm_parrots, rbf.predict(mms.fit_transform(parrots)), label='rbf (sklearn)')
    plt.legend()
    plt.show()

Asyoucanclearlysee we got some improvements for orange line (not quite good as rbf, but its just need more work).


回答1:


You are using tf.tanh as activation, it means that your output is limited in the range [-1,1]. Therefore it will never fit your data.

Edit: I have removed a part noticing a typo that has already been fixed.



来源:https://stackoverflow.com/questions/44721841/tensorflow-nonlinear-regression

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!