网格寻优SVM

最新推荐文章于 2024-05-06 19:21:07 发布

KimEddy

最新推荐文章于 2024-05-06 19:21:07 发布

阅读量5.4k

点赞数 2

分类专栏： python数据挖掘文章标签： svm 网格 python 机器学习

本文链接：https://blog.csdn.net/weixin_37450657/article/details/78840831

版权

python数据挖掘专栏收录该内容

6 篇文章 2 订阅

订阅专栏

import pandas as pd
import numpy as np
from time import time
from sklearn.model_selection import GridSearchCV
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from sklearn import preprocessing

C:\Users\shaoqiu\Anaconda3\lib\site-packages\sklearn\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

#读取中国银行数据
df=pd.read_csv(r'G:\\Project\\data\\CB.csv',encoding='gbk')

#将日期作为index,顺序排列
df = df.set_index('date')
df = df.sort_index()
# print (df.head())

#将涨跌幅按照1,0标签分类p_change
value = pd.Series(df['p_change'],index=df.index)
value[value>=0]=1 #0 means fall
value[value<0]=0 #1 means rise
print(df.head())df = df.drop(['price_change'],1)
df = df.fillna(0)
df = df.astype(float)

X = np.array(df.drop(['p_change'], 1))
X = preprocessing.scale(X)

y = np.array(df['p_change'])
print(X.shape)
print(y.shape)

            open  high  close   low      volume  price_change  p_change  \
date                                                                      
2015-01-05  4.18  4.50   4.42  4.18  23084548.0          0.27       1.0   
2015-01-06  4.38  4.74   4.56  4.28  23127260.0          0.14       1.0   
2015-01-07  4.46  4.64   4.54  4.44  15485755.0         -0.02       0.0   
2015-01-08  4.55  4.57   4.33  4.31  14892726.0         -0.21       0.0   
2015-01-09  4.28  4.76   4.47  4.23  22776194.0          0.14       1.0   

              ma5   ma10   ma20       v_ma5      v_ma10       v_ma20  turnover  
date                                                                            
2015-01-05  4.036  3.862  3.736  20236083.4  20700121.8  18068874.18      1.13  
2015-01-06  4.184  3.964  3.791  21016213.4  21901637.0  18406099.90      1.13  
2015-01-07  4.322  4.029  3.838  20103937.2  19875380.1  18223578.34      0.76  
2015-01-08  4.400  4.090  3.867  19874622.2  18551524.5  18027645.85      0.73  
2015-01-09  4.464  4.180  3.901  19873296.6  19332925.8  18291454.08      1.11

df = df.drop(['price_change'],1)
df = df.fillna(0)
df = df.astype(float)

X = np.array(df.drop(['p_change'], 1))
X = preprocessing.scale(X)

y = np.array(df['p_change'])
print(X.shape)
print(y.shape)

(690, 12)
(690,)

#训练集取数据前80%，测试数据取后20%
X_train , X_test, \
y_train , y_test = train_test_split(X,y,test_size=0.2)

#选择SVM算法的最优参数
print ("开始建模")
t0 = time()
#C 是对错误部分的惩罚；gamma 合成点
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5, 5e5 ,1e6],
              'gamma': [0.0001, 0.0005, 0.001, 0.005,0.01, 0.1]}
#C和gamma组合，寻找出最好的一个组合
#class_weight='balanced'表示调整各类别权重，权重与该类中样本数成反比，  
#防止模型过于拟合某个样本数量过大的类  
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
print (clf)
#建模
clf = clf.fit(X_train, y_train)
print ("time:%0.3fs" % (time()-t0))
print (clf.best_estimator_ )  #最好的模型的信息

开始建模
GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [1000.0, 5000.0, 10000.0, 50000.0, 100000.0, 500000.0, 1000000.0], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)
time:34.609s
SVC(C=5000.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.005, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

根据网格寻参，得到SVC(C=1.0, cache_size=200, class_weight=’balanced’, coef0=0.0,

decision_function_shape=’ovr’, degree=3, gamma=’auto’, kernel=’rbf’,
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)

y_pred = clf.predict(X_test)
#打印预测成绩报告
print (classification_report(y_test,y_pred))

             precision    recall  f1-score   support

        0.0       0.83      0.85      0.84        62
        1.0       0.88      0.86      0.87        76

avg / total       0.86      0.86      0.86       138

#打印预测成绩混淆矩阵
print (confusion_matrix(y_test,y_pred))

[[53  9]
 [11 65]]

KimEddy

关注

2
点赞
踩
28

收藏

觉得还不错? 一键收藏
0
评论
网格寻优SVM

import pandas as pdimport numpy as npfrom time import timefrom sklearn.model_selection import GridSearchCVfrom sklearn.cross_validation import train_test_splitfrom sklearn.metrics import classific
复制链接

扫一扫