from sklearn import svm
import pandas as pd
from sklearn import model_selection
from sklearn import metrics
letters = pd.read_csv(r'./letterdata.csv')
letters.head()
| letter | xbox | ybox | width | height | onpix | xbar | ybar | x2bar | y2bar | xybar | x2ybar | xy2bar | xedge | xedgey | yedge | yedgex |
---|
0 | T | 2 | 8 | 3 | 5 | 1 | 8 | 13 | 0 | 6 | 6 | 10 | 8 | 0 | 8 | 0 | 8 |
---|
1 | I | 5 | 12 | 3 | 7 | 2 | 10 | 5 | 5 | 4 | 13 | 3 | 9 | 2 | 8 | 4 | 10 |
---|
2 | D | 4 | 11 | 6 | 8 | 6 | 10 | 6 | 2 | 6 | 10 | 3 | 7 | 3 | 7 | 3 | 9 |
---|
3 | N | 7 | 11 | 6 | 6 | 3 | 5 | 9 | 4 | 6 | 4 | 4 | 10 | 6 | 10 | 2 | 8 |
---|
4 | G | 2 | 1 | 3 | 1 | 1 | 8 | 6 | 6 | 6 | 6 | 5 | 9 | 1 | 7 | 5 | 10 |
---|
predictors = letters.columns[1:]
X_train,X_test,y_train,y_test = model_selection.train_test_split(letters[predictors], letters.letter,
test_size = 0.25, random_state = 1234)
C=[0.05,0.1,0.5,1,2,5]
parameters = {'C':C}
grid_linear_svc = model_selection.GridSearchCV(estimator = svm.LinearSVC(),param_grid =parameters,scoring='accuracy',cv=5,verbose =1)
grid_linear_svc.fit(X_train,y_train)
grid_linear_svc.best_params_, grid_linear_svc.best_score_
Fitting 5 folds for each of 6 candidates, totalling 30 fits
pred_ linear_svc = grid_linear_svc.predict(X_test)
metrics.accuracy_score(y_test, pred_linear_svc)
kernel=['rbf','linear','poly','sigmoid']
C=[0.1,0.5,1,2,5]
parameters = {'kernel':kernel,'C':C}
grid_svc = model_selection.GridSearchCV(estimator = svm.SVC(),param_grid =parameters,scoring='accuracy',cv=5,verbose =1)
grid_svc.fit(X_train,y_train)
grid_svc.best_params_, grid_svc.best_score_
pred_svc = grid_svc.predict(X_test)
metrics.accuracy_score(y_test,pred_svc)
forestfires = pd.read_csv(r'C:\Users\Administrator\Desktop\forestfires.csv')
forestfires.head()
forestfires.drop('day',axis = 1, inplace = True)
forestfires.month = pd.factorize(forestfires.month)[0]
forestfires.head()
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import norm
sns.distplot(forestfires.area, bins = 50, kde = True, fit = norm, hist_kws = {'color':'steelblue'},
kde_kws = {'color':'red', 'label':'Kernel Density'},
fit_kws = {'color':'black','label':'Nomal', 'linestyle':'--'})
plt.legend()
plt.show()
from sklearn import preprocessing
import numpy as np
from sklearn import neighbors
y = np.log1p(forestfires.area)
predictors = forestfires.columns[:-1]
X = preprocessing.scale(forestfires[predictors])
X_train,X_test,y_train,y_test = model_selection.train_test_split(X, y, test_size = 0.25, random_state = 1234)
svr = svm.SVR()
svr.fit(X_train,y_train)
pred_svr = svr.predict(X_test)
metrics.mean_squared_error(y_test,pred_svr)
epsilon = np.arange(0.1,1.5,0.2)
C= np.arange(100,1000,200)
gamma = np.arange(0.001,0.01,0.002)
parameters = {'epsilon':epsilon,'C':C,'gamma':gamma}
grid_svr = model_selection.GridSearchCV(estimator = svm.SVR(),param_grid =parameters,
scoring='neg_mean_squared_error',cv=5,verbose =1, n_jobs=2)
grid_svr.fit(X_train,y_train)
print(grid_svr.best_params_, grid_svr.best_score_)
pred_grid_svr = grid_svr.predict(X_test)
metrics.mean_squared_error(y_test,pred_grid_svr)