01利用sklean练习机器学习--sklean之感知机preceptron模型实践

1 介绍

台湾大学林轩田教授机器学习基石
02 Learning to Answer Yes or No
学习利用感知机来处理二分类模型

2 自己实现

#-*- coding:utf-8 -*-
from numpy import *
import matplotlib.pyplot as plt
import operator
import time
def createTrainDataSet():#训练样本,第一个1为阈值对应的w,下同
    trainData = [   [1, 1, 4],
                    [1, 2, 3],
                    [1, -2, 3],
                    [1, -2, 2],
                    [1, 0, 1],
                    [1, 1, 2]]
    label = [1, 1, 1, -1, -1,  -1]
    return trainData, label
def createTestDataSet():#数据样本
    testData = [   [1, 1, 1],
                   [1, 2, 0],
                   [1, 2, 4],
                   [1, 1, 3]]
    return testData
def sigmoid(X):
    X = float(X)
    if X > 0:
        return 1
    elif X < 0:
        return -1
    else:
        return 0
def pla(traindataIn,trainlabelIn):
    traindata=mat(traindataIn)
    trainlabel=mat(trainlabelIn).transpose()
    m,n=shape(traindata)
    w=ones((n,1))
    while True:
        iscompleted=True
        for i in range(m):
            if (sigmoid(dot(traindata[i],w))==trainlabel[i]):
                continue
            else:
                iscompleted=False
                w+=(trainlabel[i]*traindata[i]).transpose()
        if iscompleted:
            break
    return w
def classify(inX,w):
    result=sigmoid(sum(w*inX))
    if result>0:
        return 1
    else:
        return -1
def plotBestFit(w):
    traindata,label=createTrainDataSet()
    dataArr = array(traindata)
    n = shape(dataArr)[0]
    xcord1=[];ycord1=[]
    xcord2=[];ycord2=[]
    for i in range(n):
        if int(label[i])==1:
            xcord1.append(dataArr[i,1])
            ycord1.append(dataArr[i,2])
        else:
            xcord2.append(dataArr[i, 1])
            ycord2.append(dataArr[i, 2])
    fig=plt.figure()
    ax= fig.add_subplot(111)
    ax.scatter(xcord1, ycord1,s=30,c='red',marker='s')
    ax.scatter(xcord2, ycord2,s=30,c='green')
    x = arange(-3.0, 3.0, 0.1)
    y = (-w[0]-w[1] * x)/w[2]
    ax.plot(x, y)
    plt.xlabel('X1'); plt.ylabel('X2')
    plt.show()
def classifyall(datatest,w):
    predict=[]
    for data in datatest:
        result=classify(data,w)
        predict.append(result)
    return predict
def main():
    trainData,label=createTrainDataSet()
    testdata=createTestDataSet()
    w=pla(trainData,label)
    result=classifyall(testdata,w)
    plotBestFit(w)
    print w
    print result
if __name__=='__main__':
    start = time.clock()
    main()
    end = time.clock()
    print('finish all in %s' % str(end - start))

参考:http://www.mamicode.com/info-detail-1434773.html

3 利用sklean

这里涉及利用sklean来生成数据,训练,保存,评估模型以及利用模型来预测等例子。

#-*- coding:utf-8 -*-
from sklearn.datasets import make_classification
from matplotlib import pyplot as plt
from sklearn.linear_model import Perceptron
from sklearn.externals import joblib
import numpy as np


x,y = make_classification(n_samples=1000, n_features=2,n_redundant=0,n_informative=1,n_clusters_per_class=1)

#n_samples:生成样本的数量

#n_features=2:生成样本的特征数,特征数=n_informative() + n_redundant + n_repeated

#n_informative:多信息特征的个数

#n_redundant:冗余信息,informative特征的随机线性组合

#n_clusters_per_class :某一个类别是由几个cluster构成的
#训练数据和测试数据
x_data_train = x[:800,:]
x_data_test = x[800:,:]
y_data_train = y[:800]
y_data_test = y[800:]

#正例和反例
positive_x1 = [x[i,0] for i in range(1000) if y[i] == 1]
positive_x2 = [x[i,1] for i in range(1000) if y[i] == 1]
negetive_x1 = [x[i,0] for i in range(1000) if y[i] == 0]
negetive_x2 = [x[i,1] for i in range(1000) if y[i] == 0]


#定义感知机
clf = Perceptron(fit_intercept=False,n_iter=30,shuffle=False)
#使用训练数据进行训练
clf.fit(x_data_train,y_data_train)
#得到训练结果,权重矩阵
print(clf.coef_)
#输出为:[[-0.38478876,4.41537463]]

#超平面的截距,此处输出为:[0.]
print(clf.intercept_)
#保存模型
joblib.dump(clf,'pla.model')
#利用测试数据进行验证
CLF = joblib.load('pla.model')
acc = CLF.score(x_data_test,y_data_test)
print(acc)
#得到的输出结果为0.995,这个结果还不错吧。

#测试单个事例
print "[2.0,1.0] predict:",CLF.predict([2.0,1.0])
#画出正例和反例的散点图
plt.scatter(positive_x1,positive_x2,c='red')
plt.scatter(negetive_x1,negetive_x2,c='blue')
#画出超平面(在本例中即是一条直线)
line_x = np.arange(-4,4)
line_y = line_x * (-CLF.coef_[0][0] / CLF.coef_[0][1]) - CLF.intercept_
plt.plot(line_x,line_y)
plt.show()

这里写图片描述

参考https://zhuanlan.zhihu.com/p/27152953

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值