机器学习之感知器

最新推荐文章于 2023-09-10 00:38:03 发布

aliyanah_

最新推荐文章于 2023-09-10 00:38:03 发布

阅读量388

点赞数

分类专栏：机器学习文章标签：机器学习感知器 python

本文链接：https://blog.csdn.net/aliyanah_/article/details/78293363

版权

机器学习专栏收录该内容

2 篇文章 0 订阅

订阅专栏

   感知器（Perception）是1957年由Rosenblatt提出，是神经网络与支持向量机的基础。感知器是二类分类的线性分类模型，它的输入为实例的特征向量X，输出是实例的类别y。（y一般取值为 +1 和-1）感知器的目标是将对应的输入空间的实例划分为正负两类的分离超平面。感知器的学习宗旨是通过训练数据得到将训练数据划分的分离超平面，所以需要一个衡量错误分类的因子，即损失函数，利用梯度下降法对损失函数进行极小化，得到感知器模型。
   感知器具体算法学习可参考《统计学习方法》这本书。代码是根据慕课网是机器学习视频学习总结得到的

 # -*- coding: utf-8 -*-
"""
Created on Wed Oct 18 09:01:23 2017

@author: Administrator
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#定义一个分类器
class Perception(object):
    '''
    初始化  学习率eta  和  权重向量训练的次数 n_iter
    errors 用于记录神经元判断错误的次数
    '''
    def __init__(self,eta=0.01,n_iter=10):
        self.eta=eta
        self.n_iter=n_iter
        pass

    #定义一个点积运算函数 W × X
    def net_input(self,X):
        return np.dot(X,self.w_[1:])+self.w_[0]

    #类别判断 1?-1
    def predict(self,xi):
        return np.where(self.net_input(xi)>=0.0,1,-1)

    '''
    训练数据
    X为输入的样本数据向量    y表示对应样本的分类
    X[[1,2,3],[4,5,6]]      y=[[1,-1]]
    X.shape[n_samples,n_features]
    n_samples:表示X中样本数量   2
    n_features：表示每个样本有几个神经元电信号   3
    '''
    def fit(self,X,y):
    #初始化权重向量  w_全为 0，
    #之所以 + 1 是因为最开始要加一个 w0 阈值
        self.w_=np.zeros(X.shape[1]+1)
        self.errors_=[]
        #print self.w_        [ 0.  0.  0.  0.]

        for _ in range(self.n_iter):
            errors=0
            '''
            更新权重向量
            zip(X,y) 为 [([1,2,3],1),([4,5,6],-1)]
            update=η*(y-y＇)
            '''
            for xi,target in zip(X,y):
                update=self.eta*(target-self.predict(xi))
                #print 'perdict= ',self.predict(xi)
                #print 'update= ',update
                '''
                xi 是一个向量  
                update*xi 等价于  [▽W(1)=xi[1]*update,▽W(2)=xi[2]*update,▽W(3)=xi[3]*update]
                '''
                self.w_[1:]+=update*xi
                self.w_[0]=update
                #print self.w_
                errors+=int(update!=0.0)
                self.errors_.append(errors)
                #print self.errors_
                pass
            pass

#数据可视化
from matplotlib.colors import ListedColormap
def plot_dection_regions(x,y,classfiler,resolution=0.02):
    #markers=('s','x','o','v')
    colors=('red','blue','green','gray','cyan')
    #len(np.unique(y)=2
    #ListedColormap 表示根据参数的长度分配几种颜色     
    cmap=ListedColormap(colors[:len(np.unique(y))])
    #统计花径和花瓣的最大值和最小值
    x1_min,x1_max=x[: ,0].min()-1,x[: ,0].max()
    x2_min,x2_max=x[: ,1].min()-1,x[: ,1].max()

    #print x1_min,x1_max,x2_min,x2_max
    """
    np.meshgrid()表示把里面扩充成二维向量
    np.arange(x1_min,x1_max,resolution）为 185个 从3.3- 7.0 每隔0.02的数据
    np.arange(x2_min,x2_max,resolution) 为255个从0-5.08每个0.02的数据
    xx1   从3.3 - 6.98 为一行  有185列的数据，并每条数据扩充255行，即255*185
    xx2 将255个数据中，拿出每个数据横向扩充成185的（即185个一样的数据），构成255*185
    """
    xx1,xx2=np.meshgrid(np.arange(x1_min,x1_max,resolution),
                        np.arange(x2_min,x2_max,resolution))

    print np.arange(x1_min, x1_max, resolution).shape
    print np.arange(x1_min, x1_max, resolution)
    print np.arange(x2_min, x2_max, resolution).shape
    print np.arange(x2_min, x2_max, resolution)
    print xx2.shape
    print xx2 

    # z存储 模型分类变量得到的结果
    z=classfiler.predict(np.array([xx1.ravel(),xx2.ravel()]).T)

    print xx1.ravel()
    print xx2.ravel()
    print z

    z=z.reshape(xx1.shape)
    print z

    #在两个分类之间画分界线
    plt.contourf(xx1,xx2,z,alpha=0.4,cmap=cmap)
    plt.xlim(xx1.min(),xx1.max()) #起始点和末尾点
    plt.ylim(xx2.min(),xx2.max())
    plt.xlabel('the length of huajing')
    plt.ylabel('the length of huaban')

    plt.legend('upper right')
    plt.show()


def main():
    file="https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
    df=pd.read_csv(file,header=None)
    #print df.head(10)  数据的前10条
    """
    y=df.loc[0:100,4].values
    表示把前100条数据 的第4列提取出来
    y=np.where(y=='Iris-setosa',1,-1)
    表示把字符串类型转化为数字类型
    """
    y=df.loc[0:100,4].values
    y=np.where(y=='Iris-setosa',1,-1)

    x=df.iloc[0:100,[0,2]].values
    plt.scatter(x[:50,0],x[:50,1],color='red',marker='o',label='setosa')
    plt.scatter(x[50:100,0],x[50:100,1],color='blue',marker='*',label='versicolor')
    plt.xlabel('length of huajing')
    plt.ylabel('length of huaban')
    plt.legend('upper left')
    plt.show()

    p1=Perception(eta=0.1)
    p1.fit(x,y)
    plt.plot(range(1,len(p1.errors_)+1),p1.errors_,marker='x')
    plt.xlabel('Epochs')
    plt.ylabel('error sort')
    plt.show()

    plot_dection_regions(x,y,p1)
    pass

if __name__=='__main__':
    main()