import numpy as np
import pandas as pd
data = pd. read_csv( './ISEAR.csv' , header= None )
data. head( )
0 1 2 0 joy On days when I feel close to my partner and ot... NaN 1 fear Every time I imagine that someone I love or I ... NaN 2 anger When I had been obviously unjustly treated and... NaN 3 sadness When I think about the short time that we live... NaN 4 disgust At a gathering I found myself involuntarily si... NaN
from sklearn. model_selection import train_test_split
from sklearn. feature_extraction. text import TfidfVectorizer
X_data = data[ 1 ] . values. tolist( )
Y_data = data[ 0 ] . values. tolist( )
vectorizer = TfidfVectorizer( )
X_data = vectorizer. fit_transform( X_data)
x_train, x_test, y_train, y_test = train_test_split( X_data, Y_data, train_size = 0.8 , random_state = 42 )
{'anger', 'disgust', 'fear', 'guilt', 'joy', 'sadness', 'shame'}
from sklearn. linear_model import LogisticRegression
from sklearn. model_selection import GridSearchCV
lr = LogisticRegression( )
lr. fit( x_train, y_train) . score( x_test, y_test)
parameters = { 'C' : [ 0.005 , 0.01 , 0.1 , 0.5 , 1.5 , 2 , 2.5 , 3.5 ] }
clf = GridSearchCV( lr, parameters, cv= 5 )
clf. fit( x_train, y_train)
print ( clf. score( x_test, y_test) )
print ( clf. best_params_)
0.5791223404255319
{'C': 2}
from sklearn. metrics import confusion_matrix
confusion_matrix( y_test, clf. predict( x_test) )
array([[ 98, 35, 17, 29, 14, 17, 17],
[ 25, 123, 13, 18, 9, 3, 13],
[ 13, 7, 137, 12, 13, 11, 7],
[ 24, 12, 11, 110, 12, 15, 25],
[ 7, 7, 8, 8, 182, 12, 9],
[ 17, 18, 14, 8, 19, 122, 7],
[ 20, 32, 15, 34, 20, 6, 99]], dtype=int64)