dataframe for emotion of files

Importing Libraries import pandas as pd import numpy as np

import os import sys

import librosa import librosa.display import seaborn as sns import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.metrics import confusion_matrix, classification_report from sklearn.model_selection import train_test_split

from IPython.display import Audio

import tensorflow as tf from tensorflow.keras.callbacks import ReduceLROnPlateau from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, LSTM, Flatten, Dropout, BatchNormalization from tensorflow.keras.callbacks import ModelCheckpoint

import warnings if not sys.warnoptions: warnings.simplefilter("ignore") warnings.filterwarnings("ignore", category=DeprecationWarning) Dataset Crema = "/kaggle/input/cremad/AudioWAV/" crema_directory_list = os.listdir(Crema)

file_emotion = [] file_path = []

for file in crema_directory_list: # storing file paths file_path.append(Crema + file) # storing file emotions part=file.split('_') if part[2] == 'SAD': file_emotion.append('sad') elif part[2] == 'ANG': file_emotion.append('angry') elif part[2] == 'DIS': file_emotion.append('disgust') elif part[2] == 'FEA': file_emotion.append('fear') elif part[2] == 'HAP': file_emotion.append('happy') elif part[2] == 'NEU': file_emotion.append('neutral') else: file_emotion.append('Unknown')

dataframe for emotion of files

emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])

dataframe for path of files.

path_df = pd.DataFrame(file_path, columns=['Path']) Crema_df = pd.concat([emotion_df, path_df], axis=1) Crema_df.head() Emotions Path 0 disgust /kaggle/input/cremad/AudioWAV/1028_TSI_DIS_XX.wav 1 happy /kaggle/input/cremad/AudioWAV/1075_IEO_HAP_LO.wav 2 happy /kaggle/input/cremad/AudioWAV/1084_ITS_HAP_XX.wav 3 disgust /kaggle/input/cremad/AudioWAV/1067_IWW_DIS_XX.wav 4 disgust /kaggle/input/cremad/AudioWAV/1066_TIE_DIS_XX.wav plt.title('Count of Emotions', size=16) sns.countplot(Crema_df.Emotions) plt.ylabel('Count', size=12) plt.xlabel('Emotions', size=12) sns.despine(top=True, right=True, left=False, bottom=False) plt.show()

Visualization def create_waveplot(data, sr, e): plt.figure(figsize=(10, 3)) plt.title('Waveplot for {} emotion'.format(e), size=15) librosa.display.waveplot(data, sr=sr) plt.show()

def create_spectrogram(data, sr, e): X = librosa.stft(data) Xdb = librosa.amplitude_to_db(abs(X)) plt.figure(figsize=(12, 3)) plt.title('Spectrogram for {} emotion'.format(e), size=15) librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
plt.colorbar() emotion='angry' path = np.array(Crema_df.Path[Crema_df.Emotions==emotion])[0] data, sampling_rate = librosa.load(path) create_waveplot(data, sampling_rate, emotion) create_spectrogram(data, sampling_rate, emotion) Audio(path)

MFCC Extraction labels = {'disgust':0,'happy':1,'sad':2,'neutral':3,'fear':4,'angry':5} Crema_df.replace({'Emotions':labels},inplace=True) num_mfcc=13 n_fft=2048 hop_length=512 SAMPLE_RATE = 22050 data = { "labels": [], "mfcc": [] } for i in range(7442): data['labels'].append(Crema_df.iloc[i,0]) signal, sample_rate = librosa.load(Crema_df.iloc[i,1], sr=SAMPLE_RATE) mfcc = librosa.feature.mfcc(signal, sample_rate, n_mfcc=13, n_fft=2048, hop_length=512) mfcc = mfcc.T data["mfcc"].append(np.asarray(mfcc)) if i%500==0: print(i) 0 500 1000 1500 2000 2500 3000 3500 4000 4500 5000 5500 6000 6500 7000 Padding MFCC to make them of equal length X = np.asarray(data['mfcc']) y = np.asarray(data["labels"]) X = tf.keras.preprocessing.sequence.pad_sequences(X) X.shape (7442, 216, 13) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.2) print(X_train.shape,y_train.shape,X_validation.shape,y_validation.shape,X_test.shape,y_test.shape) (5357, 216, 13) (5357,) (1340, 216, 13) (1340,) (745, 216, 13) (745,) Model def build_model(input_shape): model = tf.keras.Sequential()

model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
model.add(LSTM(64))

model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(6, activation='softmax'))

return model

create network

input_shape = (None,13) model = build_model(input_shape)

compile model

optimiser = tf.keras.optimizers.Adam(learning_rate=0.001) model.compile(optimizer=optimiser, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary() Model: "sequential"

Layer (type) Output Shape Param #

lstm (LSTM) (None, None, 128) 72704

lstm_1 (LSTM) (None, 64) 49408

dense (Dense) (None, 64) 4160

dropout (Dropout) (None, 64) 0

dense_1 (Dense) (None, 6) 390

Total params: 126,662 Trainable params: 126,662 Non-trainable params: 0

Training history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30) Epoch 1/30 168/168 [==============================] - 9s 29ms/step - loss: 1.6471 - accuracy: 0.2998 - val_loss: 1.4173 - val_accuracy: 0.4373 Epoch 2/30 168/168 [==============================] - 4s 23ms/step - loss: 1.5058 - accuracy: 0.3737 - val_loss: 1.4070 - val_accuracy: 0.4388 Epoch 3/30 168/168 [==============================] - 4s 24ms/step - loss: 1.4647 - accuracy: 0.4006 - val_loss: 1.3801 - val_accuracy: 0.4515 Epoch 4/30 168/168 [==============================] - 4s 22ms/step - loss: 1.4572 - accuracy: 0.4115 - val_loss: 1.3795 - val_accuracy: 0.4507 Epoch 5/30 168/168 [==============================] - 4s 23ms/step - loss: 1.4235 - accuracy: 0.4205 - val_loss: 1.3509 - val_accuracy: 0.4485 Epoch 6/30 168/168 [==============================] - 4s 22ms/step - loss: 1.3981 - accuracy: 0.4334 - val_loss: 1.3364 - val_accuracy: 0.4582 Epoch 7/30 168/168 [==============================] - 4s 22ms/step - loss: 1.3942 - accuracy: 0.4391 - val_loss: 1.3362 - val_accuracy: 0.4642 Epoch 8/30 168/168 [==============================] - 4s 23ms/step - loss: 1.3706 - accuracy: 0.4609 - val_loss: 1.2877 - val_accuracy: 0.4761 Epoch 9/30 168/168 [==============================] - 4s 22ms/step - loss: 1.3394 - accuracy: 0.4601 - val_loss: 1.3043 - val_accuracy: 0.4664 Epoch 10/30 168/168 [==============================] - 4s 22ms/step - loss: 1.3195 - accuracy: 0.4828 - val_loss: 1.2969 - val_accuracy: 0.4679 Epoch 11/30 168/168 [==============================] - 4s 23ms/step - loss: 1.3135 - accuracy: 0.4834 - val_loss: 1.2508 - val_accuracy: 0.5090 Epoch 12/30 168/168 [==============================] - 4s 23ms/step - loss: 1.2816 - accuracy: 0.5153 - val_loss: 1.2401 - val_accuracy: 0.5164 Epoch 13/30 168/168 [==============================] - 4s 22ms/step - loss: 1.2723 - accuracy: 0.5016 - val_loss: 1.2301 - val_accuracy: 0.5201 Epoch 14/30 168/168 [==============================] - 4s 23ms/step - loss: 1.2353 - accuracy: 0.5257 - val_loss: 1.2099 - val_accuracy: 0.5194 Epoch 15/30 168/168 [==============================] - 4s 22ms/step - loss: 1.2186 - accuracy: 0.5198 - val_loss: 1.1667 - val_accuracy: 0.5530 Epoch 16/30 168/168 [==============================] - 4s 23ms/step - loss: 1.1892 - accuracy: 0.5366 - val_loss: 1.1892 - val_accuracy: 0.5381 Epoch 17/30 168/168 [==============================] - 4s 23ms/step - loss: 1.1847 - accuracy: 0.5515 - val_loss: 1.1699 - val_accuracy: 0.5254 Epoch 18/30 168/168 [==============================] - 4s 22ms/step - loss: 1.1964 - accuracy: 0.5474 - val_loss: 1.1640 - val_accuracy: 0.5396 Epoch 19/30 168/168 [==============================] - 4s 23ms/step - loss: 1.1418 - accuracy: 0.5676 - val_loss: 1.1774 - val_accuracy: 0.5515 Epoch 20/30 168/168 [==============================] - 4s 24ms/step - loss: 1.0904 - accuracy: 0.5906 - val_loss: 1.1691 - val_accuracy: 0.5582 Epoch 21/30 168/168 [==============================] - 4s 22ms/step - loss: 1.0924 - accuracy: 0.5863 - val_loss: 1.1475 - val_accuracy: 0.5612 Epoch 22/30 168/168 [==============================] - 4s 23ms/step - loss: 1.0912 - accuracy: 0.5859 - val_loss: 1.1340 - val_accuracy: 0.5627 Epoch 23/30 168/168 [==============================] - 4s 23ms/step - loss: 1.0430 - accuracy: 0.6085 - val_loss: 1.1479 - val_accuracy: 0.5493 Epoch 24/30 168/168 [==============================] - 4s 22ms/step - loss: 1.0264 - accuracy: 0.6117 - val_loss: 1.0858 - val_accuracy: 0.5851 Epoch 25/30 168/168 [==============================] - 4s 23ms/step - loss: 1.0153 - accuracy: 0.6116 - val_loss: 1.0996 - val_accuracy: 0.5687 Epoch 26/30 168/168 [==============================] - 4s 22ms/step - loss: 0.9915 - accuracy: 0.6299 - val_loss: 1.1131 - val_accuracy: 0.5821 Epoch 27/30 168/168 [==============================] - 4s 22ms/step - loss: 0.9779 - accuracy: 0.6221 - val_loss: 1.1090 - val_accuracy: 0.5627 Epoch 28/30 168/168 [==============================] - 4s 23ms/step - loss: 0.9417 - accuracy: 0.6419 - val_loss: 1.0965 - val_accuracy: 0.5858 Epoch 29/30 168/168 [==============================] - 4s 24ms/step - loss: 0.9348 - accuracy: 0.6440 - val_loss: 1.1353 - val_accuracy: 0.5709 Epoch 30/30 168/168 [==============================] - 4s 23ms/step - loss: 0.9239 - accuracy: 0.6497 - val_loss: 1.1182 - val_accuracy: 0.5903 Evaluation test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0) print("Test Accuracy: ",test_acc) Test Accuracy: 0.563758373260498 model.save('Speech-Emotion-Recognition-Model.h5')

Name		Name	Last commit message	Last commit date
Latest commit History 2 Commits
README.md		README.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Repository files navigation

dataframe for emotion of files

dataframe for path of files.

create network

compile model

Layer (type) Output Shape Param #

dense_1 (Dense) (None, 6) 390

About

Uh oh!

Releases

Packages

komaldhiman1/speech-emotion-recognition-

Folders and files

Latest commit

History

Repository files navigation

dataframe for emotion of files

dataframe for path of files.

create network

compile model

Layer (type) Output Shape Param #

dense_1 (Dense) (None, 6) 390

About

Resources

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Packages