python保存模型 drop_python机器学习常用模型使用

最新推荐文章于 2022-07-18 14:50:28 发布

weixin_39995280

最新推荐文章于 2022-07-18 14:50:28 发布

阅读量202

点赞数

文章标签： python保存模型 drop

import pandas as pd

import matplotlib.pyplot as plt

import numpy as np

import seaborn as sns

from sklearn.externals import joblib

df=pd.read_csv("nonulleye.csv")//导入已经清洗完的数据

print(df.head())

y = df["y"].values

x_data = df.drop(["y"],axis=1)

x = (x_data - np.min(x_data))/(np.max(x_data)-np.min(x_data))

# separating train (80%) and test (%20) sets

from sklearn.model_selection import train_test_split

x_train, x_test,y_train, y_test = train_test_split(x,y,test_size = 0.10,random_state = 42)

y_train_01 = [1 if each > 4.8 else 0 for each in y_train]

y_test_01 = [1 if each > 4.8 else 0 for each in y_test]

from sklearn.linear_model import LogisticRegression

lrc = LogisticRegression()

lrc.fit(x_train,y_train_01)

print("LogisticRegression score: ", lrc.score(x_test,y_test_01))

print("real value of y_test_01[1]: " + str(y_test_01[1]) + " -> the predict: " + str(lrc.predict(x_test.iloc[[1],:])))

print("real value of y_test_01[2]: " + str(y_test_01[2]) + " -> the predict: " + str(lrc.predict(x_test.iloc[[2],:])))

# confusion matrix 混淆矩阵

from sklearn.metrics import confusion_matrix

cm_lrc = confusion_matrix(y_test_01,lrc.predict(x_test))

#print("y_test_01 == 1 :" + str(len(y_test_01[y_test_01==1]))) # 29

# cm visualization

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_lrc,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.title("lrc Test for Test Dataset")

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.show()

from sklearn.metrics import precision_score, recall_score

print("precision_score: ", precision_score(y_test_01,lrc.predict(x_test)))

print("recall_score: ", recall_score(y_test_01,lrc.predict(x_test)))

from sklearn.metrics import f1_score

print("f1_score: ",f1_score(y_test_01,lrc.predict(x_test)))

#Test for Train Dataset:

cm_lrc_train = confusion_matrix(y_train_01,lrc.predict(x_train))

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_lrc_train,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.title("Test for Train Dataset")

plt.show()

#Support Vector Machine支持向量机

from sklearn.svm import SVC

svm = SVC(random_state = 1)

svm.fit(x_train,y_train_01)

print("SVC score: ", svm.score(x_test,y_test_01))

print("real value of y_test_01[1]: " + str(y_test_01[1]) + " -> the predict: " + str(svm.predict(x_test.iloc[[1],:])))

print("real value of y_test_01[2]: " + str(y_test_01[2]) + " -> the predict: " + str(svm.predict(x_test.iloc[[2],:])))

# confusion matrix

from sklearn.metrics import confusion_matrix

cm_svm = confusion_matrix(y_test_01,svm.predict(x_test))

# print("y_test_01 == 1 :" + str(len(y_test_01[y_test_01==1]))) # 29

# cm visualization

import seaborn as sns

import matplotlib.pyplot as plt

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_svm,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.title("SVC Test for Test Dataset")

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.show()

from sklearn.metrics import precision_score, recall_score

print("precision_score: ", precision_score(y_test_01,svm.predict(x_test)))

print("recall_score: ", recall_score(y_test_01,svm.predict(x_test)))

from sklearn.metrics import f1_score

print("f1_score: ",f1_score(y_test_01,svm.predict(x_test)))

#Test for Train Dataset:

cm_svm_train = confusion_matrix(y_train_01,svm.predict(x_train))

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_svm_train,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.title("Test for Train Dataset")

plt.show()

#Gaussian Naive Bayes朴素贝叶斯

from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()

nb.fit(x_train,y_train_01)

print("GaussianNB score: ", nb.score(x_test,y_test_01))

print("real value of y_test_01[1]: " + str(y_test_01[1]) + " -> the predict: " + str(nb.predict(x_test.iloc[[1],:])))

print("real value of y_test_01[2]: " + str(y_test_01[2]) + " -> the predict: " + str(nb.predict(x_test.iloc[[2],:])))

# confusion matrix

from sklearn.metrics import confusion_matrix

cm_nb = confusion_matrix(y_test_01,nb.predict(x_test))

# print("y_test_01 == 1 :" + str(len(y_test_01[y_test_01==1]))) # 29

# cm visualization

import seaborn as sns

import matplotlib.pyplot as plt

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_nb,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.title("GaussianNB Test for Test Dataset")

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.show()

from sklearn.metrics import precision_score, recall_score

print("precision_score: ", precision_score(y_test_01,nb.predict(x_test)))

print("recall_score: ", recall_score(y_test_01,nb.predict(x_test)))

from sklearn.metrics import f1_score

print("f1_score: ",f1_score(y_test_01,nb.predict(x_test)))

#testfor train dataset

cm_nb_train = confusion_matrix(y_train_01,nb.predict(x_train))

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_nb_train,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.title("Test for Train Dataset")

plt.show()

#Decision Tree Classification决策树分类

from sklearn.tree import DecisionTreeClassifier

dtc = DecisionTreeClassifier()

dtc.fit(x_train,y_train_01)

print("DecisionTree score: ", dtc.score(x_test,y_test_01))

print("real value of y_test_01[1]: " + str(y_test_01[1]) + " -> the predict: " + str(dtc.predict(x_test.iloc[[1],:])))

print("real value of y_test_01[2]: " + str(y_test_01[2]) + " -> the predict: " + str(dtc.predict(x_test.iloc[[2],:])))

#joblib.dump(dtc, filename="dtc.model")

# confusion matrix

from sklearn.metrics import confusion_matrix

cm_dtc = confusion_matrix(y_test_01,dtc.predict(x_test))

# print("y_test_01 == 1 :" + str(len(y_test_01[y_test_01==1]))) # 29

# cm visualization

import seaborn as sns

import matplotlib.pyplot as plt

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_dtc,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.title("Test for Test Dataset")

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.show()

from sklearn.metrics import precision_score, recall_score

print("precision_score: ", precision_score(y_test_01,dtc.predict(x_test)))

print("recall_score: ", recall_score(y_test_01,dtc.predict(x_test)))

from sklearn.metrics import f1_score

print("f1_score: ",f1_score(y_test_01,dtc.predict(x_test)))

#test

cm_dtc_train = confusion_matrix(y_train_01,dtc.predict(x_train))

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_dtc_train,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.title("Test for Train Dataset")

plt.show()

#Random Forest Classification 随机森林分类

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators = 100,random_state = 1)

rfc.fit(x_train,y_train_01)

print("RandomForest score: ", rfc.score(x_test,y_test_01))

print("real value of y_test_01[1]: " + str(y_test_01[1]) + " -> the predict: " + str(rfc.predict(x_test.iloc[[1],:])))

print("real value of y_test_01[2]: " + str(y_test_01[2]) + " -> the predict: " + str(rfc.predict(x_test.iloc[[2],:])))

#joblib.dump(dtc, filename="rfc.model")

#print(rfc.predict_proba(x_test))

# confusion matrix

from sklearn.metrics import confusion_matrix

cm_rfc = confusion_matrix(y_test_01,rfc.predict(x_test))

# print("y_test_01 == 1 :" + str(len(y_test_01[y_test_01==1]))) # 29

# cm visualization

import seaborn as sns

import matplotlib.pyplot as plt

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_rfc,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.title("Test for Test Dataset")

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.show()

from sklearn.metrics import precision_score, recall_score

print("precision_score: ", precision_score(y_test_01,rfc.predict(x_test)))

print("recall_score: ", recall_score(y_test_01,rfc.predict(x_test)))

from sklearn.metrics import f1_score

print("f1_score: ",f1_score(y_test_01,rfc.predict(x_test)))

#test

cm_rfc_train = confusion_matrix(y_train_01,rfc.predict(x_train))

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_rfc_train,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.title("Test for Train Dataset")

plt.show()

#K Nearest Neighbors Classification KNN分类

from sklearn.neighbors import KNeighborsClassifier

# finding k value

scores = []

for each in range(1,50):

knn_n = KNeighborsClassifier(n_neighbors = each)

knn_n.fit(x_train,y_train_01)

scores.append(knn_n.score(x_test,y_test_01))

plt.plot(range(1,50),scores)

plt.xlabel("k")

plt.ylabel("accuracy")

plt.show()

knn = KNeighborsClassifier(n_neighbors = 3) # n_neighbors = k

knn.fit(x_train,y_train_01)

print("KNN score of 3 :",knn.score(x_test,y_test_01))

print("real value of y_test_01[1]: " + str(y_test_01[1]) + " -> the predict: " + str(knn.predict(x_test.iloc[[1],:])))

print("real value of y_test_01[2]: " + str(y_test_01[2]) + " -> the predict: " + str(knn.predict(x_test.iloc[[2],:])))

#print(knn.predict_proba(x_test))

# confusion matrix

from sklearn.metrics import confusion_matrix

cm_knn = confusion_matrix(y_test_01,knn.predict(x_test))

# print("y_test_01 == 1 :" + str(len(y_test_01[y_test_01==1]))) # 29

# cm visualization

import seaborn as sns

import matplotlib.pyplot as plt

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_knn,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.title("Test for Test Dataset")

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.show()

from sklearn.metrics import precision_score, recall_score

print("precision_score: ", precision_score(y_test_01,knn.predict(x_test)))

print("recall_score: ", recall_score(y_test_01,knn.predict(x_test)))

from sklearn.metrics import f1_score

print("f1_score: ",f1_score(y_test_01,knn.predict(x_test)))

#test

cm_knn_train = confusion_matrix(y_train_01,knn.predict(x_train))

f, ax = plt.subplots(figsize =(5,5))

sns.heatmap(cm_knn_train,annot = True,linewidths=0.5,linecolor="red",fmt = ".0f",ax=ax)

plt.xlabel("predicted y values")

plt.ylabel("real y values")

plt.title("Test for Train Dataset")

plt.show()

#Comparison of Classification Algorithms比较以上算法

y = np.array([lrc.score(x_test,y_test_01),svm.score(x_test,y_test_01),nb.score(x_test,y_test_01),dtc.score(x_test,y_test_01),rfc.score(x_test,y_test_01),knn.score(x_test,y_test_01)])

#x = ["LogisticRegression","SVM","GaussianNB","DecisionTreeClassifier","RandomForestClassifier","KNeighborsClassifier"]

x = ["LogisticReg.","SVM","GNB","Dec.Tree","Ran.Forest","KNN"]

plt.bar(x,y)

plt.title("Comparison of Classification Algorithms")

plt.xlabel("Classfication")

plt.ylabel("Score")

plt.show()

weixin_39995280

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python保存模型 drop_python机器学习常用模型使用

import pandas as pdimport matplotlib.pyplot as pltimport numpy as npimport seaborn as snsfrom sklearn.externals import joblibdf=pd.read_csv("nonulleye.csv")//导入已经清洗完的数据print(df.head())y = df["y"].valu...
复制链接

扫一扫