Keras accuracy does not change

非 Y 不嫁゛ 提交于 2019-11-28 16:48:37

问题


I have a few thousand audio files and I want to classify them using Keras and Theano. So far, I generated a 28x28 spectrograms (bigger is probably better, but I am just trying to get the algorithm work at this point) of each audio file and read the image into a matrix. So in the end I get this big image matrix to feed into the network for image classification.

In a tutorial I found this mnist classification code:

import numpy as np

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense
from keras.utils import np_utils

batch_size = 128
nb_classes = 10
nb_epochs = 2

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
X_train /= 255
X_test /= 255

print(X_train.shape[0], "train samples")
print(X_test.shape[0], "test samples")

y_train = np_utils.to_categorical(y_train, nb_classes)
y_test =  np_utils.to_categorical(y_test, nb_classes)

model = Sequential()

model.add(Dense(output_dim = 100, input_dim = 784, activation= "relu"))
model.add(Dense(output_dim = 200, activation = "relu"))
model.add(Dense(output_dim = 200, activation = "relu"))
model.add(Dense(output_dim = nb_classes, activation = "softmax"))

model.compile(optimizer = "adam", loss = "categorical_crossentropy")

model.fit(X_train, y_train, batch_size = batch_size, nb_epoch = nb_epochs, show_accuracy = True, verbose = 2, validation_data = (X_test, y_test))
score = model.evaluate(X_test, y_test, show_accuracy = True, verbose = 0)
print("Test score: ", score[0])
print("Test accuracy: ", score[1])

This code runs, and I get the result as expected:

(60000L, 'train samples')
(10000L, 'test samples')
Train on 60000 samples, validate on 10000 samples
Epoch 1/2
2s - loss: 0.2988 - acc: 0.9131 - val_loss: 0.1314 - val_acc: 0.9607
Epoch 2/2
2s - loss: 0.1144 - acc: 0.9651 - val_loss: 0.0995 - val_acc: 0.9673
('Test score: ', 0.099454972004890438)
('Test accuracy: ', 0.96730000000000005)

Up to this point everything runs perfectly, however when I apply the above algorithm to my dataset, accuracy gets stuck.

My code is as follows:

import os

import pandas as pd

from sklearn.cross_validation import train_test_split

from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.utils import np_utils

import AudioProcessing as ap
import ImageTools as it

batch_size = 128
nb_classes = 2
nb_epoch = 10  


for i in range(20):
    print "\n"
# Generate spectrograms if necessary
if(len(os.listdir("./AudioNormalPathalogicClassification/Image")) > 0):
    print "Audio files are already processed. Skipping..."
else:
    print "Generating spectrograms for the audio files..."
    ap.audio_2_image("./AudioNormalPathalogicClassification/Audio/","./AudioNormalPathalogicClassification/Image/",".wav",".png",(28,28))

# Read the result csv
df = pd.read_csv('./AudioNormalPathalogicClassification/Result/result.csv', header = None)

df.columns = ["RegionName","IsNormal"]

bool_mapping = {True : 1, False : 0}

nb_classes = 2

for col in df:
    if(col == "RegionName"):
        a = 3      
    else:
        df[col] = df[col].map(bool_mapping)

y = df.iloc[:,1:].values

y = np_utils.to_categorical(y, nb_classes)

# Load images into memory
print "Loading images into memory..."
X = it.load_images("./AudioNormalPathalogicClassification/Image/",".png")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

X_train = X_train.reshape(X_train.shape[0], 784)
X_test = X_test.reshape(X_test.shape[0], 784)
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
X_train /= 255
X_test /= 255

print("X_train shape: " + str(X_train.shape))
print(str(X_train.shape[0]) + " train samples")
print(str(X_test.shape[0]) + " test samples")

model = Sequential()


model.add(Dense(output_dim = 100, input_dim = 784, activation= "relu"))
model.add(Dense(output_dim = 200, activation = "relu"))
model.add(Dense(output_dim = 200, activation = "relu"))
model.add(Dense(output_dim = nb_classes, activation = "softmax"))

model.compile(loss = "categorical_crossentropy", optimizer = "adam")

print model.summary()

model.fit(X_train, y_train, batch_size = batch_size, nb_epoch = nb_epoch, show_accuracy = True, verbose = 1, validation_data = (X_test, y_test))
score = model.evaluate(X_test, y_test, show_accuracy = True, verbose = 1)
print("Test score: ", score[0])
print("Test accuracy: ", score[1])

AudioProcessing.py

import os
import scipy as sp
import scipy.io.wavfile as wav
import matplotlib.pylab as pylab
import Image

def save_spectrogram_scipy(source_filename, destination_filename, size):
    dt = 0.0005
    NFFT = 1024       
    Fs = int(1.0/dt)  
    fs, audio = wav.read(source_filename)
    if(len(audio.shape) >= 2):
        audio = sp.mean(audio, axis = 1)
    fig = pylab.figure()    
    ax = pylab.Axes(fig, [0,0,1,1])    
    ax.set_axis_off()
    fig.add_axes(ax) 
    pylab.specgram(audio, NFFT = NFFT, Fs = Fs, noverlap = 900, cmap="gray")
    pylab.savefig(destination_filename)
    img = Image.open(destination_filename).convert("L")
    img = img.resize(size)
    img.save(destination_filename)
    pylab.clf()
    del img

def audio_2_image(source_directory, destination_directory, audio_extension, image_extension, size):
    nb_files = len(os.listdir(source_directory));
    count = 0
    for file in os.listdir(source_directory):
        if file.endswith(audio_extension):        
            destinationName = file[:-4]
            save_spectrogram_scipy(source_directory + file, destination_directory + destinationName + image_extension, size)
            count += 1
            print ("Generating spectrogram for files " + str(count) + " / " + str(nb_files) + ".")

ImageTools.py

import os
import numpy as np
import matplotlib.image as mpimg
def load_images(source_directory, image_extension):
    image_matrix = []
    nb_files = len(os.listdir(source_directory));
    count = 0
    for file in os.listdir(source_directory):
        if file.endswith(image_extension):
            with open(source_directory + file,"r+b") as f:
                img = mpimg.imread(f)
                img = img.flatten()                
                image_matrix.append(img)
                del img
                count += 1
                #print ("File " + str(count) + " / " + str(nb_files) + " loaded.")
    return np.asarray(image_matrix)

So I run the above code and recieve:

Audio files are already processed. Skipping...
Loading images into memory...
X_train shape: (2394L, 784L)
2394 train samples
1027 test samples
--------------------------------------------------------------------------------
Initial input shape: (None, 784)
--------------------------------------------------------------------------------
Layer (name)                  Output Shape                  Param #
--------------------------------------------------------------------------------
Dense (dense)                 (None, 100)                   78500
Dense (dense)                 (None, 200)                   20200
Dense (dense)                 (None, 200)                   40200
Dense (dense)                 (None, 2)                     402
--------------------------------------------------------------------------------
Total params: 139302
--------------------------------------------------------------------------------
None
Train on 2394 samples, validate on 1027 samples
Epoch 1/10
2394/2394 [==============================] - 0s - loss: 0.6898 - acc: 0.5455 - val_loss: 0.6835 - val_acc: 0.5716
Epoch 2/10
2394/2394 [==============================] - 0s - loss: 0.6879 - acc: 0.5522 - val_loss: 0.6901 - val_acc: 0.5716
Epoch 3/10
2394/2394 [==============================] - 0s - loss: 0.6880 - acc: 0.5522 - val_loss: 0.6842 - val_acc: 0.5716
Epoch 4/10
2394/2394 [==============================] - 0s - loss: 0.6883 - acc: 0.5522 - val_loss: 0.6829 - val_acc: 0.5716
Epoch 5/10
2394/2394 [==============================] - 0s - loss: 0.6885 - acc: 0.5522 - val_loss: 0.6836 - val_acc: 0.5716
Epoch 6/10
2394/2394 [==============================] - 0s - loss: 0.6887 - acc: 0.5522 - val_loss: 0.6832 - val_acc: 0.5716
Epoch 7/10
2394/2394 [==============================] - 0s - loss: 0.6882 - acc: 0.5522 - val_loss: 0.6859 - val_acc: 0.5716
Epoch 8/10
2394/2394 [==============================] - 0s - loss: 0.6882 - acc: 0.5522 - val_loss: 0.6849 - val_acc: 0.5716
Epoch 9/10
2394/2394 [==============================] - 0s - loss: 0.6885 - acc: 0.5522 - val_loss: 0.6836 - val_acc: 0.5716
Epoch 10/10
2394/2394 [==============================] - 0s - loss: 0.6877 - acc: 0.5522 - val_loss: 0.6849 - val_acc: 0.5716
1027/1027 [==============================] - 0s
('Test score: ', 0.68490593621422047)
('Test accuracy: ', 0.57156767283349563)

I tried changing the network, adding more epochs, but I always get the same result no matter what. I don't understand why I am getting the same result.

Any help would be appreciated. Thank you.

Edit: I found a mistake where pixel values were not read correctly. I fixed the ImageTools.py below as:

import os
import numpy as np
from scipy.misc import imread

def load_images(source_directory, image_extension):
    image_matrix = []
    nb_files = len(os.listdir(source_directory));
    count = 0
    for file in os.listdir(source_directory):
        if file.endswith(image_extension):
            with open(source_directory + file,"r+b") as f:
                img = imread(f)                
                img = img.flatten()                        
                image_matrix.append(img)
                del img
                count += 1
                #print ("File " + str(count) + " / " + str(nb_files) + " loaded.")
    return np.asarray(image_matrix)

Now I actually get grayscale pixel values from 0 to 255, so now my dividing it by 255 makes sense. However, I still get the same result.


回答1:


The most likely reason is that the optimizer is not suited to your dataset. Here is a list of Keras optimizers from the documentation.

I recommend you first try SGD with default parameter values. If it still doesn't work, divide the learning rate by 10. Do that a few times if necessary. If your learning rate reaches 1e-6 and it still doesn't work, then you have another problem.

In summary, replace this line:

model.compile(loss = "categorical_crossentropy", optimizer = "adam")

with this:

from keras.optimizers import SGD
opt = SGD(lr=0.01)
model.compile(loss = "categorical_crossentropy", optimizer = opt)

and change the learning rate a few times if it doesn't work.

If it was the problem, you should see the loss getting lower after just a few epochs.




回答2:


Check out this one

sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model.compile( loss = "categorical_crossentropy", 
               optimizer = sgd, 
               metrics=['accuracy']
             )

Check out the documentation

I had better results with MNIST




回答3:


After some examination, I found that the issue was the data itself. It was very dirty as in same input had 2 different outputs, hence creating confusion. After clearing up the data now my accuracy goes up to %69. Still not enough to be good, but at least I can now work my way up from here now that the data is clear.

I used the below code to test:

import os
import sys

import pandas as pd
import numpy as np

from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core import Dense, Activation, Dropout, Flatten
from keras.utils import np_utils

sys.path.append("./")
import AudioProcessing as ap
import ImageTools as it


# input image dimensions
img_rows, img_cols = 28, 28
dim = 1
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
nb_pool = 2
# convolution kernel size
nb_conv = 3

batch_size = 128
nb_classes = 2
nb_epoch = 200

for i in range(20):
    print "\n"

## Generate spectrograms if necessary
if(len(os.listdir("./AudioNormalPathalogicClassification/Image")) > 0):
    print "Audio files are already processed. Skipping..."
else:
    # Read the result csv
    df = pd.read_csv('./AudioNormalPathalogicClassification/Result/AudioNormalPathalogicClassification_result.csv', header = None, encoding = "utf-8")

    df.columns = ["RegionName","Filepath","IsNormal"]

    bool_mapping = {True : 1, False : 0}

    for col in df:
        if(col == "RegionName" or col == "Filepath"):
            a = 3      
        else:
            df[col] = df[col].map(bool_mapping)

    region_names = df.iloc[:,0].values
    filepaths = df.iloc[:,1].values
    y = df.iloc[:,2].values
    #Generate spectrograms and make a new CSV file
    print "Generating spectrograms for the audio files..."
    result = ap.audio_2_image(filepaths, region_names, y, "./AudioNormalPathalogicClassification/Image/", ".png",(img_rows,img_cols))
    df = pd.DataFrame(data = result)
    df.to_csv("NormalVsPathalogic.csv",header= False, index = False, encoding = "utf-8")

# Load images into memory
print "Loading images into memory..."
df = pd.read_csv('NormalVsPathalogic.csv', header = None, encoding = "utf-8")
y = df.iloc[:,0].values
y = np_utils.to_categorical(y, nb_classes)
y = np.asarray(y)

X = df.iloc[:,1:].values
X = np.asarray(X)
X = X.reshape(X.shape[0], dim, img_rows, img_cols)
X = X.astype("float32")
X /= 255

print X.shape

model = Sequential()

model.add(Convolution2D(64, nb_conv, nb_conv,
                        border_mode='valid',
                        input_shape=(1, img_rows, img_cols)))

model.add(Activation('relu'))

model.add(Convolution2D(32, nb_conv, nb_conv))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))

model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(128))
model.add(Activation('relu'))

model.add(Dropout(0.5))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adadelta')

print model.summary()

model.fit(X, y, batch_size = batch_size, nb_epoch = nb_epoch, show_accuracy = True, verbose = 1)



回答4:


I faced a similar issue. One-hot encoding the target variable using nputils in Keras, solved the issue of accuracy and validation loss being stuck. Using weights for balancing the target classes further improved performance.

Solution :

from keras.utils.np.utils import to_categorical
y_train = to_categorical(y_train)
y_val = to_categorical(y_val) 



回答5:


If the accuracy is not changing, it means the optimizer has found a local minimum for the loss. This may be an undesirable minimum. One common local minimum is to always predict the class with the most number of data points. You should use weighting on the classes to avoid this minimum.

from sklearn.utils import compute_class_weight
classWeight = compute_class_weight('balanced', outputLabels, outputs) 
classWeight = dict(enumerate(classWeight))
model.fit(X_train, y_train, batch_size = batch_size, nb_epoch = nb_epochs, show_accuracy = True, verbose = 2, validation_data = (X_test, y_test), class_weight=classWeight)



回答6:


I've the same problem as you my solution was a loop instead of epochs

for i in range(10):
  history = model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    workers=6,
                    epochs=1)
and you can as well save the model each epoch so you can pause the training after any epoch you want

for i in range(10):
  history = model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    workers=6,
                    epochs=1)
  #save model
  model.save('drive/My Drive/vggnet10epochs.h5')
  model = load_model('drive/My Drive/vggnet10epochs.h5')


来源:https://stackoverflow.com/questions/37213388/keras-accuracy-does-not-change

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!