有两篇文章写的比我详细,内容比这里多,但是本宝宝没有抄袭,这是学习笔记,附别人的链接:
https://blog.csdn.net/u012084802/article/details/78865445
https://www.cnblogs.com/amanda-x/p/7801427.html
scikit-learn参考链接:
http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
http://scikit-learn.org/stable/modules/naive_bayes.html
http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html#examples-using-sklearn-naive-bayes-gaussiannb
注:
*.ipynb的是运行文件
运行命令:
jupyter notebook *.ipynb
源代码之码云下载:
https://gitee.com/iscas/machine_learning_improve/tree/master
执行文件:
class_vis.py
#!/usr/bin/python
#from udacityplots import *
import warnings
warnings.filterwarnings("ignore")
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import pylab as pl
import numpy as np
#import numpy as np
#import matplotlib.pyplot as plt
#plt.ioff()
def prettyPicture(clf, X_test, y_test):
x_min = 0.0; x_max = 1.0
y_min = 0.0; y_max = 1.0
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
h = .01 # step size in the mesh
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)
# Plot also the test points
grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]
bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]
grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]
bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]
plt.scatter(grade_sig, bumpy_sig, color = "b", label="fast")
plt.scatter(grade_bkg, bumpy_bkg, color = "r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.savefig("test.png")
import base64
import json
import subprocess
def output_image(name, format, bytes):
image_start = "BEGIN_IMAGE_f9825uweof8jw9fj4r8"
image_end = "END_IMAGE_0238jfw08fjsiufhw8frs"
data = {}
data['name'] = name
data['format'] = format
data['bytes'] = base64.encodestring(bytes)
print image_start+json.dumps(data)+image_end
classify.py
#!/usr/bin/python
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
def NBAccuracy(features_train, labels_train, features_test, labels_test):
""" compute the accuracy of your Naive Bayes classifier """
### import the sklearn module for GaussianNB
### create classifier
clf = GaussianNB()
### fit the classifier on the training features and labels
clf.fit(features_train, labels_train)
### use the trained classifier to predict labels for the test features
pred = clf.predict(features_test)
### calculate and return the accuracy on the test data
### this is slightly different than the example,
### where we just print the accuracy
### you might need to import an sklearn module
accuracy = accuracy_score(labels_test, pred)
return accuracy
ClassifyNB.py
#!/usr/bin/python
from sklearn.naive_bayes import GaussianNB
def classify(features_train, labels_train):
### import the sklearn module for GaussianNB
### create classifier
### fit the classifier on the training features and labels
### return the fit classifier
### your code goes here!
gnb = GaussianNB()
clf = gnb.fit(features_train, labels_train)
print '--clf---'
print clf
print '---clf--'
return clf
prep_terrain_data.py
#!/usr/bin/python
import random
def makeTerrainData(n_points=1000):
###############################################################################
### make the toy dataset
random.seed(42)
grade = [random.random() for ii in range(0,n_points)]
bumpy = [random.random() for ii in range(0,n_points)]
error = [random.random() for ii in range(0,n_points)]
y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii]) for ii in range(0,n_points)]
for ii in range(0, len(y)):
if grade[ii]>0.8 or bumpy[ii]>0.8:
y[ii] = 1.0
### split into train/test sets
X = [[gg, ss] for gg, ss in zip(grade, bumpy)]
split = int(0.75*n_points)
X_train = X[0:split]
X_test = X[split:]
y_train = y[0:split]
y_test = y[split:]
grade_sig = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==0]
bumpy_sig = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==0]
grade_bkg = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==1]
bumpy_bkg = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==1]
# training_data = {"fast":{"grade":grade_sig, "bumpiness":bumpy_sig}
# , "slow":{"grade":grade_bkg, "bumpiness":bumpy_bkg}}
grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]
bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]
grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]
bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]
test_data = {"fast":{"grade":grade_sig, "bumpiness":bumpy_sig}
, "slow":{"grade":grade_bkg, "bumpiness":bumpy_bkg}}
###print X_train
### print '\n'
###print y_train
###print '\n'
###print X_test
###print '\n'
###print y_test
return X_train, y_train, X_test, y_test
# return training_data, test_data
Studentcode.ipynb
#!/usr/bin/python
from class_vis import prettyPicture
from prep_terrain_data import makeTerrainData
from classify import NBAccuracy
import matplotlib.pyplot as plt
import numpy as np
import pylab as pl
features_train, labels_train, features_test, labels_test = makeTerrainData()
def submitAccuracy(_features_train, _labels_train, _features_test, _labels_test):
accuracy = NBAccuracy(_features_train, _labels_train, _features_test, _labels_test)
return accuracy
if __name__ == '__main__':
print submitAccuracy(features_train, labels_train, features_test, labels_test)
studentMain.ipynb
#!/usr/bin/python
""" Complete the code in ClassifyNB.py with the sklearn
Naive Bayes classifier to classify the terrain data.
The objective of this exercise is to recreate the decision
boundary found in the lesson video, and make a plot that
visually shows the decision boundary """
from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture, output_image
from ClassifyNB import classify
import numpy as np
import pylab as pl
features_train, labels_train, features_test, labels_test = makeTerrainData()
### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]
# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)
### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())