import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
df = pd.read_csv('e:\\SMSSpamCollection', delimiter='\t',header=None)
In [10]:
df.head()
Out[10]:
In [12]:
X_train_raw, X_test_raw, y_train, y_test = train_test_split(df[1],df[0])
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train_raw)
classifier = LogisticRegression()
classifier.fit(X_train, y_train)
X_test = vectorizer.transform( ['URGENT! Your Mobile No 1234 was awarded a Prize', 'Hey honey, whats up?'] )
predictions = classifier.predict(X_test)
print(predictions)