准备数据
import numpy as np
import urllib.request
"""
下载数据
dataset前2行数据
>>> dataset[:2]
array([[ 6. , 148. , 72. , 35. , 0. , 33.6 ,
0.627, 50. , 1. ],
[ 1. , 85. , 66. , 29. , 0. , 26.6 ,
0.351, 31. , 0. ]])
>>>
"""
url = """http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"""
raw_data = urllib.request.urlopen(url)
# 数据集
dataset = np.loadtxt(raw_data , delimiter=",")
# 输出数据前两行作为范例
print("数据前2行:")
print(dataset[:2])
X = dataset[:,:8]
Y = dataset[:,8]
print("=================================")
logistic回归模型
"""
logistic 回归模型
"""
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
# 训练模型
model.fit(X,Y)
expected = Y
predicted = model.predict(X)
print("罗基斯特回归模型:")
print(model)
print()
print(metrics.classification_report(expected, predicted))
# 混淆矩阵
print(metrics.confusion_matrix(expected, predicted))
print()
输出:
罗基斯特回归模型:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
verbose=0, warm_start=False)
precision recall f1-score support
0.0 0.79 0.90 0.84 500
1.0 0.74 0.55 0.63 268
avg / total 0.77 0.77 0.77 768
[[448 52]
[121 147]]
朴素贝叶斯分类模型
"""
朴素贝叶斯分类模型
"""
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X,Y)
expected = Y
predicted = model.predict(X)
print("朴素贝叶斯分类模型:")
print(model)
print()
print(metrics.classification_report(expected, predicted))
# 混淆矩阵
print(metrics.confusion_matrix(expected, predicted))
print()
输出:
朴素贝叶斯分类模型:
GaussianNB(priors=None)
precision recall f1-score support
0.0 0.80 0.84 0.82 500
1.0 0.68 0.62 0.64 268
avg / total 0.76 0.76 0.76 768
[[421 79]
[103 165]]
KNN算法
"""
K-最邻近
"""
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(X,Y)
expected = Y
predicted = model.predict(X)
print("K-最邻近:")
print(model)
print()
print(metrics.classification_report(expected, predicted))
# 混淆矩阵
print(metrics.confusion_matrix(expected, predicted))
print()
输出:
K-最邻近:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
metric_params=None, n_jobs=1, n_neighbors=5, p=2,
weights='uniform')
precision recall f1-score support
0.0 0.83 0.88 0.85 500
1.0 0.75 0.65 0.70 268
avg / total 0.80 0.80 0.80 768
[[442 58]
[ 93 175]]
决策树模型
"""
决策树
"""
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X,Y)
expected = Y
predicted = model.predict(X)
print("决策树:")
print(model)
print()
print(metrics.classification_report(expected, predicted))
# 混淆矩阵
print(metrics.confusion_matrix(expected, predicted))
输出
决策树:
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False, random_state=None,
splitter='best')
precision recall f1-score support
0.0 1.00 1.00 1.00 500
1.0 1.00 1.00 1.00 268
avg / total 1.00 1.00 1.00 768
[[500 0]
[ 0 268]]
SVC
"""
支持向量机器
"""
from sklearn import metrics
from sklearn.svm import SVC
model = SVC()
model.fit(X,Y)
expected = Y
predicted = model.predict(X)
print("SVC:")
print(model)
print()
print(metrics.classification_report(expected, predicted))
# 混淆矩阵
print(metrics.confusion_matrix(expected, predicted))
输出:
SVC:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
precision recall f1-score support
0.0 1.00 1.00 1.00 500
1.0 1.00 1.00 1.00 268
avg / total 1.00 1.00 1.00 768
[[500 0]
[ 0 268]]