问题

I have these feature and label, that are not linear enough to be satisfied with linear solution. I trained SVR(kernel='rbf') model from sklearn, but now its time to do it with tensorflow, and its hard to say what one should write to achieve same or better effect.

Do you see that lazy orange line down there? It doesn't fill you with determination

code itself:

import pandas as pd
import numpy as np
import tensorflow as tf
import tqdm
import matplotlib.pyplot as plt
from omnicomm_data.test_data import get_model, clean_df
import os
from sklearn import preprocessing

graph = tf.get_default_graph()

# tf variables
x_ = tf.placeholder(name="input", shape=[None, 1], dtype=np.float32)
y_ = tf.placeholder(name="output", shape=[None, 1], dtype=np.float32)
w = tf.Variable(tf.random_normal([]), name='weight')
b = tf.Variable(tf.random_normal([]), name='bias')
lin_model = tf.add(tf.multiply(x_, w), b)

#loss
loss = tf.reduce_mean(tf.pow(lin_model - y_, 2), name='loss')
train_step = tf.train.GradientDescentOptimizer(0.000000025).minimize(loss)

#nonlinear part
nonlin_model = tf.tanh(tf.add(tf.multiply(x_, w), b))
nonlin_loss = tf.reduce_mean(tf.pow(nonlin_model - y_, 2), name='cost')
train_step_nonlin = tf.train.GradientDescentOptimizer(0.000000025).minimize(nonlin_loss)       


# pandas data
df_train = pd.read_csv('me_rate.csv', header=None)

liters = df_train.iloc[:, 0].values.reshape(-1, 1)
parrots = df_train.iloc[:, 1].values.reshape(-1, 1)

#model for prediction
mms = preprocessing.MinMaxScaler()
rbf = get_model(path_to_model)


n_epochs = 200
train_errors = []
non_train_errors = []
test_errors = []

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in tqdm.tqdm(range(n_epochs)):

        _, train_err, summ = sess.run([train_step, loss, summaries],
                                feed_dict={x_: parrots, y_: liters})
        summary_writer.add_summary(summ, i)
        train_errors.append(train_err)

        _, non_train_err, = sess.run([train_step_nonlin, nonlin_loss],
                                      feed_dict={x_: parrots, y_: liters})
        non_train_errors.append(non_train_err)


    plt.plot(list(range(n_epochs)), train_errors, label='train_lin')
    plt.plot(list(range(n_epochs)), non_train_errors, label='train_nonlin')
    plt.legend()
    print(train_errors[:10])
    print(non_train_errors[:10])
    plt.show()

    plt.scatter(parrots, liters, label='actual data')
    plt.plot(parrots, sess.run(lin_model, feed_dict={x_: parrots}), label='linear (tf)')
    plt.plot(parrots, sess.run(nonlin_model, feed_dict={x_: parrots}), label='nonlinear (tf)')
    plt.plot(parrots, rbf.predict(mms.fit_transform(parrots)), label='rbf (sklearn)')
    plt.legend()
    plt.show()

How to motivate that orange line?

Part After.

Code looks like:

import pandas as pd
import numpy as np
import tensorflow as tf
import tqdm
import matplotlib.pyplot as plt
from omnicomm_data.test_data import get_model
import os
from sklearn import preprocessing

graph = tf.get_default_graph()

# tf variables
x_ = tf.placeholder(name="input", shape=[None, 1], dtype=np.float32)
y_ = tf.placeholder(name="output", shape=[None, 1], dtype=np.float32)
w = tf.Variable(tf.random_normal([]), name='weight')
b = tf.Variable(tf.random_normal([]), name='bias')

# nonlinear
nonlin_model = tf.add(tf.multiply(tf.tanh(x_), w), b)
nonlin_loss = tf.reduce_mean(tf.pow(nonlin_model - y_, 2), name='cost')
train_step_nonlin = tf.train.GradientDescentOptimizer(0.01).minimize(nonlin_loss)


# pandas data
df_train = pd.read_csv('me_rate.csv', header=None)


liters = df_train.iloc[:, 0].values.reshape(-1, 1)
parrots = df_train.iloc[:, 1].values.reshape(-1, 1)


#model for prediction
mms = preprocessing.MinMaxScaler()
rbf = get_model(path_to_model)


nz = preprocessing.MaxAbsScaler()  # normalization coz tanh
norm_parrots = nz.fit_transform(parrots)
print(norm_parrots)

n_epochs = 20000
train_errors = []
non_train_errors = []
test_errors = []
weights = []
biases = []

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in tqdm.tqdm(range(n_epochs)):

        _, non_train_err, weight, bias = sess.run([train_step_nonlin, nonlin_loss, w, b],
                                      feed_dict={x_: norm_parrots, y_: liters})
        non_train_errors.append(non_train_err)
        weights.append(weight)
        biases.append(bias)


    plt.scatter(norm_parrots, liters, label='actual data')

    plt.plot(norm_parrots, sess.run(nonlin_model, feed_dict={x_: norm_parrots}), c='orange', label='nonlinear (tf)')
    plt.plot(norm_parrots, rbf.predict(mms.fit_transform(parrots)), label='rbf (sklearn)')
    plt.legend()
    plt.show()

Asyoucanclearlysee we got some improvements for orange line (not quite good as rbf, but its just need more work).

回答1:

You are using tf.tanh as activation, it means that your output is limited in the range [-1,1]. Therefore it will never fit your data.

Edit: I have removed a part noticing a typo that has already been fixed.

来源：https://stackoverflow.com/questions/44721841/tensorflow-nonlinear-regression

标签

python

tensorflow

non-linear-regression

Tensorflow. Nonlinear regression

问题

How to motivate that orange line?

回答1: