#!/usr/bin/env python import os from python_speech_features import mfcc from python_speech_features import delta from python_speech_features import logfbank import scipy.io.wavfile as wav import pickle import numpy as np import matplotlib.pyplot as plt rootdir = '...'#wavw文件的路径 list = os.listdir(rootdir) dic = {} for i in range(0, len(list)): path = os.path.join(rootdir, list[i]) if os.path.isfile(path): print(list[i]) (rate,sig) = wav.read(path) # plt.plot(sig[:3000]) #plt.show() mfcc_feat = mfcc(sig,rate)#, nfft=551 mfcc_feat -= (np.mean(mfcc_feat, axis=0) + 1e-8) mfcc_feat2 = mfcc_feat[0:25,:]#加截断,不显示非零值 # plt.plot(mfcc_feat) from matplotlib import cm fig, ax = plt.subplots() mfcc_data = np.swapaxes(mfcc_feat2, 0, 1) #cax = ax.imshow(mfcc_data, interpolation='nearest', cmap=cm.coolwarm, origin='lower', aspect='auto') plt.imshow(mfcc_data, cmap=plt.cm.jet, extent=[0, mfcc_data.shape[1], 0, mfcc_data.shape[0]], aspect='auto') ax.set_title('MFCC') plt.show() d_mfcc_feat = delta(mfcc_feat, 2) fbank_feat = logfbank(sig,rate,nfft=551) print(len(fbank_feat)) dic[list[i]] = fbank_feat output = open(os.path.join(rootdir, 'data.pkl'), 'wb') pickle.dump(dic, output)
重点想记录的是MFCC的画图,热力图的重点一句是plt.imshow(mfcc_data, cmap=plt.cm.jet, extent=[0, mfcc_data.shape[1], 0, mfcc_data.shape[0]], aspect='auto')
,如果音频文件本身有空白,可能出现如下情况
加了截断的代码mfcc_feat2 = mfcc_feat[0:25,:]
,不显示非零值,让整体更好看
参考资料:
stackoverflow
Speech Processing for Machine Learning
python 实现MFCC
文章来源: MFCC python plot