03.random_forest_demo

注:本案例为黑马的课堂案例,上传仅为方便查看

# 1.获取数据
# 2.数据基本处理
# 2.1 确定特征值,目标值
# 2.2 缺失值处理
# 2.3 数据集划分
# 3.特征工程(字典特征抽取)
# 4.机器学习(随机森林+cv)
# 5.模型评估
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction import DictVectorizer
# from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.ensemble import RandomForestClassifier
# 1.获取数据
data = pd.read_csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt")
data
row.namespclasssurvivednameageembarkedhome.destroomticketboatsex
011st1Allen, Miss Elisabeth Walton29.0000SouthamptonSt Louis, MOB-524160 L2212female
121st0Allison, Miss Helen Loraine2.0000SouthamptonMontreal, PQ / Chesterville, ONC26NaNNaNfemale
231st0Allison, Mr Hudson Joshua Creighton30.0000SouthamptonMontreal, PQ / Chesterville, ONC26NaN(135)male
341st0Allison, Mrs Hudson J.C. (Bessie Waldo Daniels)25.0000SouthamptonMontreal, PQ / Chesterville, ONC26NaNNaNfemale
451st1Allison, Master Hudson Trevor0.9167SouthamptonMontreal, PQ / Chesterville, ONC22NaN11male
561st1Anderson, Mr Harry47.0000SouthamptonNew York, NYE-12NaN3male
671st1Andrews, Miss Kornelia Theodosia63.0000SouthamptonHudson, NYD-713502 L7710female
781st0Andrews, Mr Thomas, jr39.0000SouthamptonBelfast, NIA-36NaNNaNmale
891st1Appleton, Mrs Edward Dale (Charlotte Lamson)58.0000SouthamptonBayside, Queens, NYC-101NaN2female
9101st0Artagaveytia, Mr Ramon71.0000CherbourgMontevideo, UruguayNaNNaN(22)male
10111st0Astor, Colonel John Jacob47.0000CherbourgNew York, NYNaN17754 L224 10s 6d(124)male
11121st1Astor, Mrs John Jacob (Madeleine Talmadge Force)19.0000CherbourgNew York, NYNaN17754 L224 10s 6d4female
12131st1Aubert, Mrs Leontine PaulineNaNCherbourgParis, FranceB-3517477 L69 6s9female
13141st1Barkworth, Mr Algernon H.NaNSouthamptonHessle, YorksA-23NaNBmale
14151st0Baumann, Mr John D.NaNSouthamptonNew York, NYNaNNaNNaNmale
15161st1Baxter, Mrs James (Helene DeLaudeniere Chaput)50.0000CherbourgMontreal, PQB-58/60NaN6female
16171st0Baxter, Mr Quigg Edmond24.0000CherbourgMontreal, PQB-58/60NaNNaNmale
17181st0Beattie, Mr Thomson36.0000CherbourgWinnipeg, MNC-6NaNNaNmale
18191st1Beckwith, Mr Richard Leonard37.0000SouthamptonNew York, NYD-35NaN5male
19201st1Beckwith, Mrs Richard Leonard (Sallie Monypeny)47.0000SouthamptonNew York, NYD-35NaN5female
20211st1Behr, Mr Karl Howell26.0000CherbourgNew York, NYC-148NaN5male
21221st0Birnbaum, Mr Jakob25.0000CherbourgSan Francisco, CANaNNaN(148)male
22231st1Bishop, Mr Dickinson H.25.0000CherbourgDowagiac, MIB-49NaN7male
23241st1Bishop, Mrs Dickinson H. (Helen Walton)19.0000CherbourgDowagiac, MIB-49NaN7female
24251st1Bjornstrm-Steffansson, Mr Mauritz Hakan28.0000SouthamptonStockholm, Sweden / Washington, DCNaNDmale
25261st0Blackwell, Mr Stephen Weart45.0000SouthamptonTrenton, NJNaNNaN(241)male
26271st1Blank, Mr Henry39.0000CherbourgGlen Ridge, NJA-31NaN7male
27281st1Bonnell, Miss Caroline30.0000SouthamptonYoungstown, OHC-7NaN8female
28291st1Bonnell, Miss Elizabeth58.0000SouthamptonBirkdale, England Cleveland, OhioC-103NaN8female
29301st0Borebank, Mr John JamesNaNSouthamptonLondon / Winnipeg, MBD-21/2NaNNaNmale
....................................
128312843rd0Vestrom, Miss Hulda Amanda AdolfinaNaNNaNNaNNaNNaNNaNfemale
128412853rd0Vonk, Mr JenkoNaNNaNNaNNaNNaNNaNmale
128512863rd0Ware, Mr FrederickNaNNaNNaNNaNNaNNaNmale
128612873rd0Warren, Mr Charles WilliamNaNNaNNaNNaNNaNNaNmale
128712883rd0Wazli, Mr YousifNaNNaNNaNNaNNaNNaNmale
128812893rd0Webber, Mr JamesNaNNaNNaNNaNNaNNaNmale
128912903rd1Wennerstrom, Mr August EdvardNaNNaNNaNNaNNaNNaNmale
129012913rd0Wenzel, Mr LinhartNaNNaNNaNNaNNaNNaNmale
129112923rd0Widegren, Mr Charles PeterNaNNaNNaNNaNNaNNaNmale
129212933rd0Wiklund, Mr Jacob AlfredNaNNaNNaNNaNNaNNaNmale
129312943rd1Wilkes, Mrs EllenNaNNaNNaNNaNNaNNaNfemale
129412953rd0Willer, Mr AaronNaNNaNNaNNaNNaNNaNmale
129512963rd0Willey, Mr EdwardNaNNaNNaNNaNNaNNaNmale
129612973rd0Williams, Mr Howard HughNaNNaNNaNNaNNaNNaNmale
129712983rd0Williams, Mr LeslieNaNNaNNaNNaNNaNNaNmale
129812993rd0Windelov, Mr EinarNaNNaNNaNNaNNaNNaNmale
129913003rd0Wirz, Mr AlbertNaNNaNNaNNaNNaNNaNmale
130013013rd0Wiseman, Mr PhillippeNaNNaNNaNNaNNaNNaNmale
130113023rd0Wittevrongel, Mr CamielNaNNaNNaNNaNNaNNaNmale
130213033rd1Yalsevac, Mr IvanNaNNaNNaNNaNNaNNaNmale
130313043rd0Yasbeck, Mr AntoniNaNNaNNaNNaNNaNNaNmale
130413053rd1Yasbeck, Mrs AntoniNaNNaNNaNNaNNaNNaNfemale
130513063rd0Youssef, Mr GeriosNaNNaNNaNNaNNaNNaNmale
130613073rd0Zabour, Miss HileniNaNNaNNaNNaNNaNNaNfemale
130713083rd0Zabour, Miss TaminiNaNNaNNaNNaNNaNNaNfemale
130813093rd0Zakarian, Mr ArtunNaNNaNNaNNaNNaNNaNmale
130913103rd0Zakarian, Mr MapriederNaNNaNNaNNaNNaNNaNmale
131013113rd0Zenn, Mr PhilipNaNNaNNaNNaNNaNNaNmale
131113123rd0Zievens, ReneNaNNaNNaNNaNNaNNaNfemale
131213133rd0Zimmerman, LeoNaNNaNNaNNaNNaNNaNmale

1313 rows × 11 columns

data.describe()
row.namessurvivedage
count1313.0000001313.000000633.000000
mean657.0000000.34196531.194181
std379.1747620.47454914.747525
min1.0000000.0000000.166700
25%329.0000000.00000021.000000
50%657.0000000.00000030.000000
75%985.0000001.00000041.000000
max1313.0000001.00000071.000000
# 2.数据基本处理
# 2.1 确定特征值,目标值
x = data[["pclass", "age", "sex"]]
x.head()
pclassagesex
01st29.0000female
11st2.0000female
21st30.0000male
31st25.0000female
41st0.9167male
y = data["survived"]
y.head()
0    1
1    0
2    0
3    0
4    1
Name: survived, dtype: int64
# 2.2 缺失值处理
x["age"].fillna(value=data["age"].mean(), inplace=True)
/Users/sherwin/workspaces/ai/lib/python3.6/site-packages/pandas/core/generic.py:3660: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)
x.head()
pclassagesex
01st29.0000female
11st2.0000female
21st30.0000male
31st25.0000female
41st0.9167male
# 2.3 数据集划分
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=22, test_size=0.2)
# 3.特征工程(字典特征抽取)
x.head()
pclassagesex
01st29.0000female
11st2.0000female
21st30.0000male
31st25.0000female
41st0.9167male
x_train = x_train.to_dict(orient="records")
x_test = x_test.to_dict(orient="records")
x_train

[{'pclass': '3rd', 'age': 45.0, 'sex': 'female'},


{‘pclass’: ‘3rd’, ‘age’: 31.19418104265403, ‘sex’: ‘male’},
{‘pclass’: ‘1st’, ‘age’: 36.0, ‘sex’: ‘male’},
…]

transfer = DictVectorizer()

x_train = transfer.fit_transform(x_train)
x_test = transfer.fit_transform(x_test)
x_train
<1050x6 sparse matrix of type '<class 'numpy.float64'>'
	with 3150 stored elements in Compressed Sparse Row format>
# 4.机器学习(模型训练)
estimator = RandomForestClassifier()

param_grid = {"n_estimators": [120,200,300,500,800,1200], "max_depth": [5, 8, 15, 25, 30]}
estimator = GridSearchCV(estimator, param_grid=param_grid, cv=5)

estimator.fit(x_train, y_train)
GridSearchCV(cv=5, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_estimators': [120, 200, 300, 500, 800, 1200], 'max_depth': [5, 8, 15, 25, 30]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)
estimator.score(x_test, y_test)
0.7908745247148289
estimator.best_estimator_
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=5, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=500, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值