『数据挖掘』scikit-learn包的初级学习

代码来源:【机器学习实验】scikit-learn的主要模块和基本使用

# coding:utf-8
# creat_time = "2016-05-26"

# 加载数据(Data Loading)
import numpy as np
import urllib

# 从 UCI 机器学习数据仓库中下载数据
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
# 下载文件
raw_data = urllib.urlopen(url)

# load the CSV file as a numpy matrix
dataset = np.loadtxt(raw_data, delimiter=",")

# separate the data from the target attributes
x = dataset[:, 0:7]
y = dataset[:, 8]

# 数据归一化(Data Normalization)
from sklearn import preprocessing

# normalize the data attributes
normalized_x = preprocessing.normalize(x)

# standardize the data attributes
standardized_x = preprocessing.scale(x)

# 特征选择(Feature Selection)
from sklearn import metrics
from sklearn.ensemble import ExtraTreesClassifier

model = ExtraTreesClassifier()
model.fit(x, y)

# display the relative importance of each attribute
print model.feature_importances_

# 逻辑回归(对数概率回归)
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(x, y)
print model

# make predictions
expected = y
predicted = model.predict(x)
# summarize the fit of the model
print metrics.classification_report(expected, predicted)
print metrics.confusion_matrix(expected, predicted)

# 朴素贝叶斯
from sklearn import metries
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(x, y)
print model

# make predictions
exceptd = y
predicted = model.predict(x)

# summarize the fit of the model
print metrics.classification_report(expected, predicted)
print metrics.confusion_matrix(expected, predicted)

# K 近邻
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier

# fit a k-nearest neighbor model to the data
model = KneighborsClassifier()
model.fit(x, y)
print model

# make predictions
expected = y
predicted = model.predict(x)
# summarize the fit of the model
print metrics.classification_report(expected, predicted)
print metrics.confusion_matrix(expected, predicted)

# 决策树
from sklearn import metrics
form sklearn.tree import DecisionTreeClassifier

# fit a CART model to the data
model = DecisionTreeClassifier()
model.fit(x, y)
print model

# make predictions
expected = y
predicted = model.predict(x)

# summarize the fit of the model
print metrics.classification_report(exceptd, predicted)
print metrics.confusion_matrix(expected, predicted)

# 支持向量机
from sklearn import metrics
from sklearn.svn import SVC

# fit a SVM model to the data
model = SVC()
model.fit(x, y)
print model

# make predictions
expected = y
predicted = model.predict(x)

# summarize the fit of the model
print metrics.classification_report(expected, predicted)
print metrics.confusion_matrix(expected, predicted)

# 如何优化算法参数
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.grid_search import GridSearchCV

# prepare a range of alpha values to test
alphas = np.array([1, 0.1, 0.01, 0.001, 0.0001, 0])
# create and fit a ridge regression model, testing each alpha
model = Ridge()
grid = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas))
grid.fit(x, y)
print grid

# summarize the results of the grid search
print grid.best_score_
print grid.best_estimator_.alpha

import numpy as np
from scipy.stats import uniform as sp_rand
from sklearn.linear_model import Ridge
from sklearn.grid_search import RandomizedSearchCV

# prepare a uniform distribution to sample for the alpha parameter
param_grid = {"alpha": sp_rand()}
# create and fit a ridge regression model, testing random alpha values
model = Ridge()
rsearch = RandomizedSearchCV(estimator=model, param_distributions=param_grid)
rsearch.fit(x, y)
print rsearch

# summarize the results of the random parameter search
print rsearch.best_score_
print rsearch.best_estimator_.alpha


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值