python实现鸢尾花两个特征训练的方法源码整理。

最新推荐文章于 2022-06-19 15:09:21 发布

海宝7号

最新推荐文章于 2022-06-19 15:09:21 发布

阅读量348

点赞数

分类专栏： python2021 文章标签：深度学习神经网络机器学习数据挖掘

本文链接：https://blog.csdn.net/dongbao520/article/details/115912846

版权

python2021 专栏收录该内容

186 篇文章 12 订阅

订阅专栏

懂得都懂，话不多说。
源码如下:

#!/usr/bin/env python 
# -*- coding:utf-8 -*-
# Author's_name_is_NIKOLA_SS
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import itertools as it
import matplotlib as mpl
from matplotlib import colors

# 默认字体与汉化防乱码
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False

# LogisticRegression算法，训练数据.
#传入参数为数据集（包括特征数据及标签数据），结果返回训练得到的参数 W

def LogRegressionAlgorithm(datas,labels):
    kinds = list(set(labels))  # 3个类别的名字列表
    means=datas.mean(axis=0) #各个属性的均值
    stds=datas.std(axis=0) #各个属性的标准差
    N,M= datas.shape[0],datas.shape[1]+1  #N是样本数，M是参数向量的维
    K=3 #k=3是类别数

    data=np.ones((N,M))
    data[:,1:]=(datas-means)/stds #对原始数据进行标准差归一化

    W=np.zeros((K-1,M))  #存储参数矩阵
    priorEs=np.array([1.0/N*np.sum(data[labels==kinds[i]],axis=0) for i in range(K-1)]) 
    #各个属性的先验期望值

    liklist=[]
    for it in range(1000):
        lik=0 #当前的对数似然函数值
        for k in range(K-1): #似然函数值的第一部分
            lik -= np.sum(np.dot(W[k],data[labels==kinds[k]].transpose()))
        lik +=1.0/N *np.sum(np.log(np.sum(np.exp(np.dot(W,data.transpose())),axis=0)+1)) 
        #似然函数的第二部分
        liklist.append(lik)
        wx=np.exp(np.dot(W,data.transpose()))
        probs=np.divide(wx,1+np.sum(wx,axis=0).transpose()) # K-1 *N的矩阵
        posteriorEs=1.0/N*np.dot(probs,data) #各个属性的后验期望值
        gradients=posteriorEs - priorEs +1.0/100 *W #梯度，最后一项是高斯项，防止过拟合
        W -= gradients #对参数进行修正
    print("输出W为：",W)
    return W

#根据训练得到的参数W和数据集，进行预测。输入参数为数据集和由LogisticRegression算法得到的参数W，返回值为预测的值
def predict_fun(datas,W):
    N, M = datas.shape[0], datas.shape[1] + 1  # N是样本数，M是参数向量的维
    K = 3  # k=3是类别数
    data = np.ones((N, M))
    means = datas.mean(axis=0)  # 各个属性的均值
    stds = datas.std(axis=0)  # 各个属性的标准差
    data[:, 1:] = (datas - means) / stds  # 对原始数据进行标准差归一化

    # probM每行三个元素，分别表示data中对应样本被判给三个类别的概率
    probM = np.ones((N, K))
    print("data.shape:", data.shape)
    print("datas.shape:", datas.shape)
    print("W.shape:", W.shape)
    print("probM.shape:", probM.shape)
    probM[:, :-1] = np.exp(np.dot(data, W.transpose()))
    probM /= np.array([np.sum(probM, axis=1)]).transpose()  # 得到概率

    predict = np.argmax(probM, axis=1).astype(int)  # 取最大概率对应的类别
    print("输出predict为：", predict)
    return predict

if __name__ == '__main__':

    attributes=['SepalLength','SepalWidth','PetalLength','PetalWidth'] #鸢尾花的四个属性名

    datas=[]
    labels=[]

    #读入数据集的数据：
    data_file=open(r'W:\PY\CVBB\titantic_and_iris_dataset\iris.txt')
    for line in data_file.readlines():
        # print(line)
        linedata = line.split(',')
        # datas.append(linedata[:-1])  # 前4列是4个属性的值(误判的样本的个数为：7
        datas.append(linedata[:-3])  # 前2列是2个属性的值(误判的样本的个数为：30
        labels.append(linedata[-1].replace('\n', ''))  # 最后一列是类别

    datas=np.array(datas)
    datas=datas.astype(float) #将二维的字符串数组转换成浮点数数组
    labels=np.array(labels)
    kinds=list(set(labels)) #3个类别的名字列表

    #通过LogisticRegression算法得到参数 W
    W=LogRegressionAlgorithm(datas,labels)

    #通过预测函数predict_fun（）函数进行预测
    predict=predict_fun(datas,W)
    
    # rights 列表储存代表原始标签数据的序号，根据labels 数据生成
    N = datas.shape[0]
    rights = np.zeros(N)
    rights[labels == kinds[1]] = 1
    rights[labels == kinds[2]] = 2
    rights = rights.astype(int)
    # 误判的个数
    print("误判的样本的个数为：%d\n" % np.sum(predict != rights))

不算太好，再接再厉，深度学习。
输出效果:

输出W为： [[-3.17933965]
 [-3.17933965]]
data.shape: (151, 1)
datas.shape: (151, 0)
W.shape: (2, 1)
probM.shape: (151, 3)
输出predict为： [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2]
误判的样本的个数为：150

海宝7号

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
python实现鸢尾花两个特征训练的方法源码整理。

懂得都懂，话不多说。源码如下:#!/usr/bin/env python # -*- coding:utf-8 -*-# Author's_name_is_NIKOLA_SSimport numpy as npfrom mpl_toolkits.mplot3d import Axes3Dimport matplotlib.pyplot as pltimport itertools as itimport matplotlib as mplfrom matplotlib import co
复制链接

扫一扫