I am new to keras. I was attempting an ML problem. About the data:
It has 5 input features, 4 output classes and about 26000 records.
I had first attempted it using MLPClassifier() as follows:
clf = MLPClassifier(verbose=True, tol=1e-6, batch_size=300, hidden_layer_sizes=(200,100,100,100), max_iter=500, learning_rate_init= 0.095, solver='sgd', learning_rate='adaptive', alpha = 0.002) clf.fit(train, y_train)
After testing, I usually got a LB score around 99.90. To gain more flexibility over the model, I decided to implement the same model in Keras to start with and then make changes in it in an attempt to increase the LB score. I came up with the following:
model = Sequential() model.add(Dense(200, input_dim=5, init='uniform', activation = 'relu')) model.add(Dense(100, init='uniform', activation='relu')) model.add(Dropout(0.2)) model.add(Dense(100, init='uniform', activation='relu')) model.add(Dense(100, init='uniform', activation='relu')) model.add(Dense(4, init='uniform', activation='softmax')) lrate = 0.095 decay = lrate/125 sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) hist = model.fit(train, categorical_labels, nb_epoch=125, batch_size=256, shuffle=True, verbose=2)
The model seems pretty similar to the MLPClassifier() model but the LB scores were pretty disappointing at around 97. Can somebody please tell what exactly was wrong with this model? Or how can we replicate the MLPClassifier model in keras. I think regularisation might be one of the factors that went wrong here.
Edit 2: Here is the code:
#import libraries import pandas as pd from sklearn.neural_network import MLPClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import log_loss from sklearn.preprocessing import MinMaxScaler, scale, StandardScaler, Normalizer from keras.models import Sequential from keras.layers import Dense, Dropout from keras import regularizers from keras.optimizers import SGD #load data train = pd.read_csv("train.csv") test = pd.read_csv("test.csv") #generic preprocessing #encode as integer mapping = {'Front':0, 'Right':1, 'Left':2, 'Rear':3} train = train.replace({'DetectedCamera':mapping}) test = test.replace({'DetectedCamera':mapping}) #renaming column train.rename(columns = {'SignFacing (Target)': 'Target'}, inplace=True) mapping = {'Front':0, 'Left':1, 'Rear':2, 'Right':3} train = train.replace({'Target':mapping}) #split data y_train = train['Target'] test_id = test['Id'] train.drop(['Target','Id'], inplace=True, axis=1) test.drop('Id',inplace=True,axis=1) train_train, train_test, y_train_train, y_train_test = train_test_split(train, y_train) scaler = StandardScaler() scaler.fit(train_train) train_train = scaler.transform(train_train) train_test = scaler.transform(train_test) test = scaler.transform(test) #training and modelling model = Sequential() model.add(Dense(200, input_dim=5, kernel_initializer='uniform', activation = 'relu')) model.add(Dense(100, kernel_initializer='uniform', activation='relu')) # model.add(Dropout(0.2)) # model.add(Dense(100, init='uniform', activation='relu')) # model.add(Dense(100, init='uniform', activation='relu')) model.add(Dropout(0.2)) model.add(Dense(100, kernel_initializer='uniform', activation='relu')) model.add(Dense(100, kernel_initializer='uniform', activation='relu')) model.add(Dense(4, kernel_initializer='uniform', activation='softmax')) lrate = 0.095 decay = lrate/250 sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) hist = model.fit(train_train, categorical_labels, validation_data=(train_test, categorical_labels_test), nb_epoch=100, batch_size=256, shuffle=True, verbose=2)