机器学习之多类别问题

#多分类的思想是:将多个类别拆成多个二分类的问题
#本实验为三分类问题
import pandas as pd
import matplotlib.pyplot as plt

columns = ["mpg","cylinders","displacement","horsepower","weight","acceleration","model year","origin","car name"]
#由于原使数据只有数据,因此columns是为他们增加列的分类,delim_whitespace代表以空格作为分隔符
cars = pd.read_table("D:\\test\machineLearning\\auto-mpg.data",delim_whitespace=True,names=columns)
cars.head(2)
mpgcylindersdisplacementhorsepowerweightaccelerationmodel yearorigincar name
018.08307.0130.03504.012.0701chevrolet chevelle malibu
115.08350.0165.03693.011.5701buick skylark 320
dumy_cylinders=pd.get_dummies(cars["cylinders"],prefix="cyl")
dumy_year=pd.get_dummies(cars["model year"],prefix="year")
#prefix为指定前缀,本函数功能为将cylinders项根据不同值生成不同的类别,如果是,则是1,否则为0
print dumy_cylinders.head(2)
   cyl_3  cyl_4  cyl_5  cyl_6  cyl_8
0    0.0    0.0    0.0    0.0    1.0
1    0.0    0.0    0.0    0.0    1.0
#将数据按列拼在一起
cars=pd.concat([cars,dumy_cylinders],axis=1)
cars=pd.concat([cars,dumy_year],axis=1)
print cars.head(2)
    mpg  cylinders  displacement horsepower  weight  acceleration  model year  \
0  18.0          8         307.0      130.0  3504.0          12.0          70   
1  15.0          8         350.0      165.0  3693.0          11.5          70   

   origin                   car name  cyl_3   ...     year_73  year_74  \
0       1  chevrolet chevelle malibu    0.0   ...         0.0      0.0   
1       1          buick skylark 320    0.0   ...         0.0      0.0   

   year_75  year_76  year_77  year_78  year_79  year_80  year_81  year_82  
0      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0  
1      0.0      0.0      0.0      0.0      0.0      0.0      0.0      0.0  

[2 rows x 27 columns]
import numpy as np
shuffle_row = np.random.permutation(cars.index)
shuffle_car = cars.iloc[shuffle_row]
high_train_row = int(cars.shape[0]*.70)
train = shuffle_car.iloc[0:high_train_row]
test = shuffle_car.iloc[high_train_row:] 
#取出origin的值
unique_origin=cars["origin"].unique()
print unique_origin
unique_origin.sort()
models = {}
feature = [c for c in train.columns if c.startswith("cyl") or c.startswith("year")]
print feature
[1 3 2]
['cylinders', 'cyl_3', 'cyl_4', 'cyl_5', 'cyl_6', 'cyl_8', 'year_70', 'year_71', 'year_72', 'year_73', 'year_74', 'year_75', 'year_76', 'year_77', 'year_78', 'year_79', 'year_80', 'year_81', 'year_82']
from sklearn.linear_model import LogisticRegression
for origin in unique_origin:
    model1=LogisticRegression()
    x_train=train[feature]
    #将当前的数字做为正力
    y_train=train["origin"]==origin
    model1.fit(x_train,y_train)
    #将训练完的模型保存起来
    models[origin]=model1
testing_probs = pd.DataFrame(columns=unique_origin)
print testing_probs 

for origin in unique_origin:
    X_test=test[feature]
    testing_probs[origin]=models[origin].predict_proba(X_test)[:,1] 
print testing_probs.head(3)
Empty DataFrame
Columns: [1, 2, 3]
Index: []
          1         2         3
0  0.885148  0.132344  0.017013
1  0.885148  0.132344  0.017013
2  0.981668  0.024437  0.003563
predicted_origins=testing_probs.idxmax(axis=1)
print predicted_origins.head(3) 
0    1
1    1
2    3
dtype: int64
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值