网格寻优SVM

import pandas as pd
import numpy as np
from time import time
from sklearn.model_selection import GridSearchCV
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from sklearn import preprocessing
C:\Users\shaoqiu\Anaconda3\lib\site-packages\sklearn\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)
#读取中国银行数据
df=pd.read_csv(r'G:\\Project\\data\\CB.csv',encoding='gbk')

#将日期作为index,顺序排列
df = df.set_index('date')
df = df.sort_index()
# print (df.head())

#将涨跌幅按照1,0标签分类p_change
value = pd.Series(df['p_change'],index=df.index)
value[value>=0]=1 #0 means fall
value[value<0]=0 #1 means rise
print(df.head())df = df.drop(['price_change'],1)
df = df.fillna(0)
df = df.astype(float)

X = np.array(df.drop(['p_change'], 1))
X = preprocessing.scale(X)

y = np.array(df['p_change'])
print(X.shape)
print(y.shape)
            open  high  close   low      volume  price_change  p_change  \
date                                                                      
2015-01-05  4.18  4.50   4.42  4.18  23084548.0          0.27       1.0   
2015-01-06  4.38  4.74   4.56  4.28  23127260.0          0.14       1.0   
2015-01-07  4.46  4.64   4.54  4.44  15485755.0         -0.02       0.0   
2015-01-08  4.55  4.57   4.33  4.31  14892726.0         -0.21       0.0   
2015-01-09  4.28  4.76   4.47  4.23  22776194.0          0.14       1.0   

              ma5   ma10   ma20       v_ma5      v_ma10       v_ma20  turnover  
date                                                                            
2015-01-05  4.036  3.862  3.736  20236083.4  20700121.8  18068874.18      1.13  
2015-01-06  4.184  3.964  3.791  21016213.4  21901637.0  18406099.90      1.13  
2015-01-07  4.322  4.029  3.838  20103937.2  19875380.1  18223578.34      0.76  
2015-01-08  4.400  4.090  3.867  19874622.2  18551524.5  18027645.85      0.73  
2015-01-09  4.464  4.180  3.901  19873296.6  19332925.8  18291454.08      1.11  
df = df.drop(['price_change'],1)
df = df.fillna(0)
df = df.astype(float)

X = np.array(df.drop(['p_change'], 1))
X = preprocessing.scale(X)

y = np.array(df['p_change'])
print(X.shape)
print(y.shape)
(690, 12)
(690,)
#训练集取数据前80%,测试数据取后20%
X_train , X_test, \
y_train , y_test = train_test_split(X,y,test_size=0.2)

#选择SVM算法的最优参数
print ("开始建模")
t0 = time()
#C 是对错误部分的惩罚;gamma 合成点
param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5, 5e5 ,1e6],
              'gamma': [0.0001, 0.0005, 0.001, 0.005,0.01, 0.1]}
#C和gamma组合,寻找出最好的一个组合
#class_weight='balanced'表示调整各类别权重,权重与该类中样本数成反比,  
#防止模型过于拟合某个样本数量过大的类  
clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
print (clf)
#建模
clf = clf.fit(X_train, y_train)
print ("time:%0.3fs" % (time()-t0))
print (clf.best_estimator_ )  #最好的模型的信息
开始建模
GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [1000.0, 5000.0, 10000.0, 50000.0, 100000.0, 500000.0, 1000000.0], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)
time:34.609s
SVC(C=5000.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.005, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
根据网格寻参,得到SVC(C=1.0, cache_size=200, class_weight=’balanced’, coef0=0.0,

decision_function_shape=’ovr’, degree=3, gamma=’auto’, kernel=’rbf’,
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)

y_pred = clf.predict(X_test)
#打印预测成绩报告
print (classification_report(y_test,y_pred))
             precision    recall  f1-score   support

        0.0       0.83      0.85      0.84        62
        1.0       0.88      0.86      0.87        76

avg / total       0.86      0.86      0.86       138
#打印预测成绩混淆矩阵
print (confusion_matrix(y_test,y_pred))
[[53  9]
 [11 65]]
  • 2
    点赞
  • 28
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值