python机器学习

最新推荐文章于 2024-07-21 01:39:27 发布

潜水的飞鱼baby

最新推荐文章于 2024-07-21 01:39:27 发布

阅读量848

点赞数 1

分类专栏： python_numpy 文章标签： python 机器学习

本文链接：https://blog.csdn.net/u011702002/article/details/78348268

版权

python_numpy 专栏收录该内容

25 篇文章 2 订阅

订阅专栏

准备数据

import numpy as np
import urllib.request

"""
    下载数据
    dataset前2行数据
    >>> dataset[:2]
    array([[   6.   ,  148.   ,   72.   ,   35.   ,    0.   ,   33.6  ,
           0.627,   50.   ,    1.   ],
       [   1.   ,   85.   ,   66.   ,   29.   ,    0.   ,   26.6  ,
           0.351,   31.   ,    0.   ]])
    >>> 
"""

url = """http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"""
raw_data = urllib.request.urlopen(url)
# 数据集
dataset = np.loadtxt(raw_data , delimiter=",")
# 输出数据前两行作为范例

print("数据前2行：")
print(dataset[:2])

X = dataset[:,:8]
Y = dataset[:,8]

print("=================================")

logistic回归模型

"""
    logistic 回归模型
"""
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
# 训练模型
model.fit(X,Y)

expected = Y
predicted = model.predict(X)

print("罗基斯特回归模型：")
print(model)
print()
print(metrics.classification_report(expected, predicted))
# 混淆矩阵
print(metrics.confusion_matrix(expected, predicted))
print()

输出：

罗基斯特回归模型：
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

             precision    recall  f1-score   support

        0.0       0.79      0.90      0.84       500
        1.0       0.74      0.55      0.63       268

avg / total       0.77      0.77      0.77       768

[[448  52]
 [121 147]]

朴素贝叶斯分类模型

"""
    朴素贝叶斯分类模型
"""
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(X,Y)

expected = Y
predicted = model.predict(X)

print("朴素贝叶斯分类模型：")
print(model)
print()
print(metrics.classification_report(expected, predicted))
# 混淆矩阵
print(metrics.confusion_matrix(expected, predicted))
print()

输出：

朴素贝叶斯分类模型：
GaussianNB(priors=None)

             precision    recall  f1-score   support

        0.0       0.80      0.84      0.82       500
        1.0       0.68      0.62      0.64       268

avg / total       0.76      0.76      0.76       768

[[421  79]
 [103 165]]

KNN算法


"""
    K-最邻近
"""
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier()

model.fit(X,Y)

expected = Y
predicted = model.predict(X)

print("K-最邻近：")
print(model)
print()
print(metrics.classification_report(expected, predicted))
# 混淆矩阵
print(metrics.confusion_matrix(expected, predicted))
print()

输出：

K-最邻近：
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

             precision    recall  f1-score   support

        0.0       0.83      0.88      0.85       500
        1.0       0.75      0.65      0.70       268

avg / total       0.80      0.80      0.80       768

[[442  58]
 [ 93 175]]

决策树模型

"""
    决策树
"""
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()

model.fit(X,Y)

expected = Y
predicted = model.predict(X)

print("决策树：")
print(model)
print()
print(metrics.classification_report(expected, predicted))
# 混淆矩阵
print(metrics.confusion_matrix(expected, predicted))

输出

决策树：
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00       500
        1.0       1.00      1.00      1.00       268

avg / total       1.00      1.00      1.00       768

[[500   0]
 [  0 268]]

SVC

"""
    支持向量机器
"""
from sklearn import metrics
from sklearn.svm import SVC

model = SVC()

model.fit(X,Y)

expected = Y
predicted = model.predict(X)
print("SVC：")
print(model)
print()
print(metrics.classification_report(expected, predicted))
# 混淆矩阵
print(metrics.confusion_matrix(expected, predicted))

输出：

SVC：
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

             precision    recall  f1-score   support

        0.0       1.00      1.00      1.00       500
        1.0       1.00      1.00      1.00       268

avg / total       1.00      1.00      1.00       768

[[500   0]
 [  0 268]]