Logistic 回归—网格搜索最优参数笔记

1、准备


 
 
  1. # 首先 import 必要的模块
  2. import pandas as pd
  3. import numpy as np
  4. from sklearn.model_selection import GridSearchCV
  5. #竞赛的评价指标为logloss
  6. from sklearn.metrics import log_loss
  7. from matplotlib import pyplot
  8. import seaborn as sns
  9. %matplotlib inline
  10. data = pd.read_csv( 'Otto_train.csv')
  11. data.head()
  12. data.info()
  13. data.describe()
  14. data.shape
  15. #受机器性能所限取前两万条数据
  16. data = data[: 20000]
  17. # Target 分布,看看各类样本分布是否均衡
  18. sns.countplot(data.target)
  19. pyplot.xlabel( 'target');
  20. pyplot.ylabel( 'Number of occurrences');

2、数据标准化


 
 
  1. # 将类别字符串变成数字
  2. y_train = data.target
  3. y_train = y_train.map( lambda s:s[ 6:])
  4. y_train = y_train.map( lambda s:int(s) -1)
  5. data = data.drop([ 'target', 'id'],axis= 1)
  6. X_train = np.array(data)
  7. # 数据标准化
  8. from sklearn.preprocessing import StandardScaler
  9. # 初始化特征的标准化器
  10. ss_X = StandardScaler()
  11. # 分别对训练和测试数据的特征进行标准化处理
  12. X_train = ss_X.fit_transform(X_train)
  13. from sklearn.linear_model import LogisticRegression
  14. from sklearn.cross_validation import cross_val_score
  15. lr= LogisticRegression()
  16. # 交叉验证用于评估模型性能和进行参数调优(模型选择)
  17. #分类任务中交叉验证缺省是采用StratifiedKFold
  18. loss = cross_val_score(lr, X_train, y_train, cv= 5, scoring= 'neg_log_loss')
  19. print( 'logloss of each fold is: ',-loss)
  20. print( 'cv logloss is:', -loss.mean())

3、调用GridSearchCV进行参数调优


 
 
  1. from sklearn.model_selection import GridSearchCV
  2. from sklearn.linear_model import LogisticRegression
  3. #需要调优的参数
  4. # 请尝试将L1正则和L2正则分开,并配合合适的优化求解算法(slover)
  5. #tuned_parameters = {'penalty':['l1','l2'],
  6. # 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]
  7. # }
  8. penaltys = [ 'l1', 'l2']
  9. Cs = [ 0.001, 0.01, 0.1, 1, 10, 100, 1000]
  10. tuned_parameters = dict(penalty = penaltys, C = Cs)
  11. lr_penalty= LogisticRegression()
  12. grid= GridSearchCV(lr_penalty, tuned_parameters,cv= 5, scoring= 'neg_log_loss')
  13. grid.fit(X_train,y_train)
  14. grid.cv_results_
  15. print(-grid.best_score_)
  16. print(grid.best_params_)
  17. # 绘制plot CV误差曲线
  18. test_means = grid.cv_results_[ 'mean_test_score' ]
  19. test_stds = grid.cv_results_[ 'std_test_score' ]
  20. train_means = grid.cv_results_[ 'mean_train_score' ]
  21. train_stds = grid.cv_results_[ 'std_train_score' ]
  22. # plot results
  23. n_Cs = len(Cs)
  24. number_penaltys = len(penaltys)
  25. test_scores = np.array(test_means).reshape(n_Cs,number_penaltys)
  26. train_scores = np.array(train_means).reshape(n_Cs,number_penaltys)
  27. test_stds = np.array(test_stds).reshape(n_Cs,number_penaltys)
  28. train_stds = np.array(train_stds).reshape(n_Cs,number_penaltys)
  29. x_axis = np.log10(Cs)
  30. for i, value in enumerate(penaltys):
  31. #pyplot.plot(log(Cs), test_scores[i], label= 'penalty:' + str(value))
  32. pyplot.errorbar(x_axis, test_scores[:,i], yerr=test_stds[:,i] ,label = penaltys[i] + ' Test')
  33. pyplot.errorbar(x_axis, train_scores[:,i], yerr=train_stds[:,i] ,label = penaltys[i] + ' Train')
  34. pyplot.legend()
  35. pyplot.xlabel( 'log(C)' )
  36. pyplot.ylabel( 'neg-logloss' )
  37. pyplot.savefig( 'LogisticGridSearchCV_C.png' )
  38. pyplot.show()

 

  •                     <li class="tool-item tool-active is-like "><a href="javascript:;"><svg class="icon" aria-hidden="true">
                            <use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#csdnc-thumbsup"></use>
                        </svg><span class="name">点赞</span>
                        <span class="count">1</span>
                        </a></li>
                        <li class="tool-item tool-active is-collection "><a href="javascript:;" data-report-click="{&quot;mod&quot;:&quot;popu_824&quot;}"><svg class="icon" aria-hidden="true">
                            <use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#icon-csdnc-Collection-G"></use>
                        </svg><span class="name">收藏</span></a></li>
                        <li class="tool-item tool-active is-share"><a href="javascript:;" data-report-click="{&quot;mod&quot;:&quot;1582594662_002&quot;}"><svg class="icon" aria-hidden="true">
                            <use xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#icon-csdnc-fenxiang"></use>
                        </svg>分享</a></li>
                        <!--打赏开始-->
                                                <!--打赏结束-->
                                                <li class="tool-item tool-more">
                            <a>
                            <svg t="1575545411852" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="5717" xmlns:xlink="http://www.w3.org/1999/xlink" width="200" height="200"><defs><style type="text/css"></style></defs><path d="M179.176 499.222m-113.245 0a113.245 113.245 0 1 0 226.49 0 113.245 113.245 0 1 0-226.49 0Z" p-id="5718"></path><path d="M509.684 499.222m-113.245 0a113.245 113.245 0 1 0 226.49 0 113.245 113.245 0 1 0-226.49 0Z" p-id="5719"></path><path d="M846.175 499.222m-113.245 0a113.245 113.245 0 1 0 226.49 0 113.245 113.245 0 1 0-226.49 0Z" p-id="5720"></path></svg>
                            </a>
                            <ul class="more-box">
                                <li class="item"><a class="article-report">文章举报</a></li>
                            </ul>
                        </li>
                                            </ul>
                </div>
                            </div>
            <div class="person-messagebox">
                <div class="left-message"><a href="https://blog.csdn.net/evolution23">
                    <img src="https://profile.csdnimg.cn/2/3/3/3_evolution23" class="avatar_pic" username="evolution23">
                                            <img src="https://g.csdnimg.cn/static/user-reg-year/1x/9.png" class="user-years">
                                    </a></div>
                <div class="middle-message">
                                        <div class="title"><span class="tit"><a href="https://blog.csdn.net/evolution23" data-report-click="{&quot;mod&quot;:&quot;popu_379&quot;}" target="_blank">二月鳥</a></span>
                                            </div>
                    <div class="text"><span>发布了19 篇原创文章</span> · <span>获赞 7</span> · <span>访问量 2万+</span></div>
                </div>
                                <div class="right-message">
                                            <a href="https://im.csdn.net/im/main.html?userName=evolution23" target="_blank" class="btn btn-sm btn-red-hollow bt-button personal-letter">私信
                        </a>
                                                            <a class="btn btn-sm  bt-button personal-watch" data-report-click="{&quot;mod&quot;:&quot;popu_379&quot;}">关注</a>
                                    </div>
                            </div>
                    </div>
    
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值