scikit-learn初步学习

  这段时间看了各种分类器的原理,然后感觉 scikit-learn这个库还是挺好用的,下面有决策树,逻辑回归,高斯贝叶斯,K近邻,用于分类的支持向量机。
 
#coding=utf-8
import numpy as np
import urllib
from sklearn import  preprocessing
from sklearn.ensemble import  ExtraTreesClassifier
from sklearn import  metrics
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import Ridge
from sklearn.grid_search import GridSearchCV
def savefile(path,content):
    fp = open(path,"wb")
    fp.write(content)
    fp.close()
def readfile(path):
    fp = open(path,"rb")
    content = fp.read()
    fp.close()
    return content
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
data = urllib.urlopen(url)
# savefile("C:\Users\Administrator\Desktop\hh_practice.txt",data.read())
dataset = np.loadtxt(data,delimiter=",")
# print dataset
X = dataset[:,0:8]
Y = dataset[:,8]
# print "X:"
# print X
# print "Y:"
# print Y

#数据归一化
# X=[[6,8,10],[600,800,1000],[600,800,10000]]
# normalized_X = preprocessing.normalize(X)
# print normalized_X

#数据标准化
# standardized_X = preprocessing.scale(X)
# print standardized_X

#决策树
model = ExtraTreesClassifier()
model.fit(X,Y)
# expected = Y
# predicted = model.predict(X)
# print(metrics.classification_report(expected,predicted))
# print(metrics.confusion_matrix(expected,predicted))
# 打印特征的信息增益
"""
为什么每次打印的信息增益不一样
"""
# print(model.feature_importances_)

# LR
model = LogisticRegression()
model.fit(X,Y)
print(model)
expected = Y
predicted = model.predict(X)
print predicted
#预测结果
print(metrics.classification_report(expected,predicted))
print(metrics.confusion_matrix(expected,predicted))


# 高斯NB
model = GaussianNB()
model.fit(X,Y)
expected = Y
predicted = model.predict(X)
#预测结果
print(metrics.classification_report(expected,predicted))
print(metrics.confusion_matrix(expected,predicted))

# K近邻
model = KNeighborsClassifier()
model.fit(X,Y)
print model
expected = Y
predicted = model.predict(X)
print(metrics.classification_report(expected,predicted))
print(metrics.confusion_matrix(expected,predicted))

# DT
model = DecisionTreeClassifier()
model.fit(X,Y)
print(model)
expected = Y
predicted = model.predict(X)
print(metrics.classification_report(expected,predicted))
print(metrics.confusion_matrix(expected,predicted))

# SVM
model = SVC()
model.fit(X,Y)
print model
expected = Y
predicted = model.predict(X)
print(metrics.classification_report(expected,predicted))
print(metrics.confusion_matrix(expected,predicted))

#调参 参数搜索
alphas = np.array([1,0.1,0.01,0.001,0.0001,0])
model = Ridge()
grid = GridSearchCV(estimator=model,param_grid=dict(alpha=alphas))
grid.fit(X,Y)
print grid
print(grid.best_score_)
print(grid.best_estimator_.alpha)

 
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值