DigitRecognizer

from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd
dataset=pd.read_csv('input/train.csv')
test=pd.read_csv('input/test.csv')
dataset.describe()
labelpixel0pixel1pixel2pixel3pixel4pixel5pixel6pixel7pixel8pixel774pixel775pixel776pixel777pixel778pixel779pixel780pixel781pixel782pixel783
count42000.00000042000420004200042000420004200042000420004200042000.00000042000.00000042000.00000042000.0000042000.00000042000.00000042000420004200042000
mean4.4566430000000000.2192860.1170950.0590240.020190.0172380.0028570000
std2.8877300000000006.3128904.6338193.2744881.759871.8944980.4142640000
min0.0000000000000000.0000000.0000000.0000000.000000.0000000.0000000000
25%2.0000000000000000.0000000.0000000.0000000.000000.0000000.0000000000
50%4.0000000000000000.0000000.0000000.0000000.000000.0000000.0000000000
75%7.0000000000000000.0000000.0000000.0000000.000000.0000000.0000000000
max9.000000000000000254.000000254.000000253.000000253.00000254.00000062.0000000000

8 rows × 785 columns

dataset.head()
labelpixel0pixel1pixel2pixel3pixel4pixel5pixel6pixel7pixel8pixel774pixel775pixel776pixel777pixel778pixel779pixel780pixel781pixel782pixel783
010000000000000000000
100000000000000000000
210000000000000000000
340000000000000000000
400000000000000000000

5 rows × 785 columns

test.head()
pixel0pixel1pixel2pixel3pixel4pixel5pixel6pixel7pixel8pixel9pixel774pixel775pixel776pixel777pixel778pixel779pixel780pixel781pixel782pixel783
000000000000000000000
100000000000000000000
200000000000000000000
300000000000000000000
400000000000000000000

5 rows × 784 columns

target=dataset[[0]].values
target
array([[1], [0], [1], …, [7], [6], [9]])
target=target.ravel()
target
array([1, 0, 1, …, 7, 6, 9])
train=dataset.iloc[:,1:].values
train
array([[0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], …, [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0]])
File “”, line 1 print train.dtype ^ SyntaxError: Missing parentheses in call to ‘print’
test
pixel0pixel1pixel2pixel3pixel4pixel5pixel6pixel7pixel8pixel9pixel774pixel775pixel776pixel777pixel778pixel779pixel780pixel781pixel782pixel783
000000000000000000000
100000000000000000000
200000000000000000000
300000000000000000000
400000000000000000000
500000000000000000000
600000000000000000000
700000000000000000000
800000000000000000000
900000000000000000000
1000000000000000000000
1100000000000000000000
1200000000000000000000
1300000000000000000000
1400000000000000000000
1500000000000000000000
1600000000000000000000
1700000000000000000000
1800000000000000000000
1900000000000000000000
2000000000000000000000
2100000000000000000000
2200000000000000000000
2300000000000000000000
2400000000000000000000
2500000000000000000000
2600000000000000000000
2700000000000000000000
2800000000000000000000
2900000000000000000000
2797000000000000000000000
2797100000000000000000000
2797200000000000000000000
2797300000000000000000000
2797400000000000000000000
2797500000000000000000000
2797600000000000000000000
2797700000000000000000000
2797800000000000000000000
2797900000000000000000000
2798000000000000000000000
2798100000000000000000000
2798200000000000000000000
2798300000000000000000000
2798400000000000000000000
2798500000000000000000000
2798600000000000000000000
2798700000000000000000000
2798800000000000000000000
2798900000000000000000000
2799000000000000000000000
2799100000000000000000000
2799200000000000000000000
2799300000000000000000000
2799400000000000000000000
2799500000000000000000000
2799600000000000000000000
2799700000000000000000000
2799800000000000000000000
2799900000000000000000000

28000 rows × 784 columns

test=test.values
test
array([[0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], …, [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0], [0, 0, 0, …, 0, 0, 0]])
#creat and train the random forest
rf=RandomForestClassifier(n_estimators=100)
rf.fit(train,target)
RandomForestClassifier(bootstrap=True, class_weight=None, criterion=’gini’, max_depth=None, max_features=’auto’, max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=False)
pred=rf.predict(test)
pred
array([2, 0, 9, …, 3, 9, 2])
subdemo=pd.read_csv('input/sample_submission.csv')
subdemo
ImageIdLabel
010
120
230
340
450
560
670
780
890
9100
10110
11120
12130
13140
14150
15160
16170
17180
18190
19200
20210
21220
22230
23240
24250
25260
26270
27280
28290
29300
27970279710
27971279720
27972279730
27973279740
27974279750
27975279760
27976279770
27977279780
27978279790
27979279800
27980279810
27981279820
27982279830
27983279840
27984279850
27985279860
27986279870
27987279880
27988279890
27989279900
27990279910
27991279920
27992279930
27993279940
27994279950
27995279960
27996279970
27997279980
27998279990
27999280000

28000 rows × 2 columns

len(test)
28000
len(pred)
28000
np.savetxt('submission_rand_forest.csv', np.c_[range(1,len(test)+1),pred], delimiter=',', header = 'ImageId,Label', comments = '', fmt='%d')
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值