分类---

pnn

添加链接描述

# -*- coding: utf-8 -*-
"""
Created on Wed Dec 12 13:21:13 2018

@author: lj
"""
import numpy as np
import math
import copy

def load_data(file_name):
    '''导入数据
    input:  file_name(string):文件的存储位置
    output: feature_data(mat):特征
            label_data(mat):标签
            n_class(int):类别的个数
    '''
    # 1、获取特征
    f = open(file_name)  # 打开文件
    feature_data = []
    label = []
    for line in f.readlines():
        feature_tmp = []
        lines = line.strip().split("\t")
        for i in range(len(lines) - 1):
            feature_tmp.append(float(lines[i]))
        label.append(int(lines[-1]))      
        feature_data.append(feature_tmp)
    f.close()  # 关闭文件
    
    return np.mat(feature_data), label

def Normalization(data):
    '''样本数据归一化
    input:data(mat):样本特征矩阵
    output:Nor_feature(mat):归一化的样本特征矩阵
    '''
    m,n = np.shape(data)
    Nor_feature = copy.deepcopy(data) 
    sample_sum = np.sqrt(np.sum(np.square(data),axis = 1))   
    for i in range(n):
        Nor_feature[:,i] = Nor_feature[:,i] / sample_sum
        
    return Nor_feature

def distance(X,Y):
    '''计算两个样本之间的距离
    '''
    return np.sum(np.square(X-Y),axis = 1)

def distance_mat(Nor_trainX,Nor_testX):
    '''计算待测试样本与所有训练样本的欧式距离
    input:Nor_trainX(mat):归一化的训练样本
          Nor_testX(mat):归一化的测试样本
    output:Euclidean_D(mat):测试样本与训练样本的距离矩阵
    '''
    m,n = np.shape(Nor_trainX)
    p = np.shape(Nor_testX)[0]
    Euclidean_D = np.mat(np.zeros((p,m)))
    for i in range(p):
        for j in range(m):
            Euclidean_D[i,j] = distance(Nor_testX[i,:],Nor_trainX[j,:])[0,0]
    return Euclidean_D

def Gauss(Euclidean_D,sigma):
    '''测试样本与训练样本的距离矩阵对应的Gauss矩阵
    input:Euclidean_D(mat):测试样本与训练样本的距离矩阵
          sigma(float):Gauss函数的标准差
    output:Gauss(mat):Gauss矩阵
    '''
    m,n = np.shape(Euclidean_D)
    Gauss = np.mat(np.zeros((m,n)))
    for i in range(m):
        for j in range(n):
            Gauss[i,j] = math.exp(- Euclidean_D[i,j] / (2 * (sigma ** 2)))
    return Gauss

def Prob_mat(Gauss_mat,labelX):
    '''测试样本属于各类的概率和矩阵
    input:Gauss_mat(mat):Gauss矩阵
          labelX(list):训练样本的标签矩阵
    output:Prob_mat(mat):测试样本属于各类的概率矩阵
           label_class(list):类别种类列表
    '''
    ## 找出所有的标签类别
    label_class = []
    for i in range(len(labelX)):
        if labelX[i] not in label_class:
            label_class.append(labelX[i])
    
    n_class = len(label_class)
    ## 求概率和矩阵
    p,m = np.shape(Gauss_mat)
    Prob = np.mat(np.zeros((p,n_class)))
    for i in range(p):
        for j in range(m):
            for s in range(n_class):
                if labelX[j] == label_class[s]:
                    Prob[i,s] += Gauss_mat[i,j]
    Prob_mat = copy.deepcopy(Prob)
    Prob_mat = Prob_mat / np.sum(Prob,axis = 1)
    return Prob_mat,label_class

def calss_results(Prob,label_class):
    '''分类结果
    input:Prob(mat):测试样本属于各类的概率矩阵
          label_class(list):类别种类列表
    output:results(list):测试样本分类结果
    '''
    arg_prob = np.argmax(Prob,axis = 1) ##类别指针
    results = []
    for i in range(len(arg_prob)):
        results.append(label_class[arg_prob[i,0]])
    return results
                    

if __name__ == '__main__':
    # 1、导入数据
    print ("--------- 1.load data ------------")
    trainX, labelX = load_data("data.txt")
    # 2、样本数据归一化
    Nor_trainX = Normalization(trainX)
    Nor_testX = Normalization(trainX[100:300,:]) 
    # 3、计算Gauss矩阵
    Euclidean_D = distance_mat(Nor_trainX,Nor_testX)
    Gauss_mat = Gauss(Euclidean_D,0.1)
    Prob,label_class = Prob_mat(Gauss_mat,labelX)
    # 4、求测试样本的分类
    predict_results = calss_results(Prob,label_class)
       

SVM(2分类)

添加链接描述

import matplotlib.pyplot as plt
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from sklearn import datasets
sess = tf.compat.v1.Session()
#sess = tf.Session()

 
# 加载数据
# iris.data = [(Sepal Length, Sepal Width, Petal Length, Petal Width)]
iris = datasets.load_iris()
x_vals = np.array([[x[0], x[3]] for x in iris.data])
y_vals = np.array([1 if y == 0 else -1 for y in iris.target])
# 分离训练和测试集
train_indices = np.random.choice(len(x_vals),
                                 round(len(x_vals)*0.8),
                                 replace=False)
test_indices = np.array(list(set(range(len(x_vals))) - set(train_indices)))
x_vals_train = x_vals[train_indices]
x_vals_test = x_vals[test_indices]
y_vals_train = y_vals[train_indices]
y_vals_test = y_vals[test_indices]
batch_size = 100
 
# 初始化feedin
x_data = tf.placeholder(shape=[None, 2], dtype=tf.float32)
y_target = tf.placeholder(shape=[None, 1], dtype=tf.float32)
 
# 创建变量
A = tf.Variable(tf.random_normal(shape=[2, 1]))
b = tf.Variable(tf.random_normal(shape=[1, 1]))
 
# 定义线性模型
model_output = tf.subtract(tf.matmul(x_data, A), b)
 
# Declare vector L2 'norm' function squared
l2_norm = tf.reduce_sum(tf.square(A))
 
# Loss = max(0, 1-pred*actual) + alpha * L2_norm(A)^2
alpha = tf.constant([0.01])
classification_term = tf.reduce_mean(tf.maximum(0., tf.subtract(1., tf.multiply(model_output, y_target))))
loss = tf.add(classification_term, tf.multiply(alpha, l2_norm))
my_opt = tf.train.GradientDescentOptimizer(0.01)
train_step = my_opt.minimize(loss)
 
init = tf.global_variables_initializer()
sess.run(init)
 
# Training loop
loss_vec = []
train_accuracy = []
test_accuracy = []
for i in range(20000):
    rand_index = np.random.choice(len(x_vals_train), size=batch_size)
    rand_x = x_vals_train[rand_index]
    rand_y = np.transpose([y_vals_train[rand_index]])
    sess.run(train_step, feed_dict={x_data: rand_x, y_target: rand_y})
    [[a1], [a2]] = sess.run(A)
[[b]] = sess.run(b)
slope = -a2/a1
y_intercept = b/a1
best_fit = []
 
x1_vals = [d[1] for d in x_vals]
 
for i in x1_vals:
    best_fit.append(slope*i+y_intercept)
 
 
# Separate I. setosa
setosa_x = [d[1] for i, d in enumerate(x_vals) if y_vals[i] == 1]
setosa_y = [d[0] for i, d in enumerate(x_vals) if y_vals[i] == 1]
not_setosa_x = [d[1] for i, d in enumerate(x_vals) if y_vals[i] == -1]
not_setosa_y = [d[0] for i, d in enumerate(x_vals) if y_vals[i] == -1]
 
plt.plot(setosa_x, setosa_y, 'o', label='I. setosa')
plt.plot(not_setosa_x, not_setosa_y, 'x', label='Non-setosa')
plt.plot(x1_vals, best_fit, 'r-', label='Linear Separator', linewidth=3)
plt.ylim([0, 10])
plt.legend(loc='lower right')
plt.title('Sepal Length vs Pedal Width')
plt.xlabel('Pedal Width')
plt.ylabel('Sepal Length')
plt.show()

SVM

%输入和输出数据
tic
data=csvread('1NewTrainTest.csv');
[M,N]=size(data);
input=data(:,1:N-1);
output=data(:,N);
%随机选择1900组作为训练,100组最为测试
TestNum=M-5;
input_train=input(1:TestNum,:)';
output_train=output(1:TestNum,:)';
input_test=input(TestNum+1:M,:)';
output_test=output(TestNum+1:M,:)';
%训练数据的归一化
[inputn,inputps]=mapminmax(input_train);
[outputn,outputps]=mapminmax(output_train);
%构建SVM
Mdl=fitrsvm(inputn',outputn');
%待预测数据的归一化
inputn_test=mapminmax('apply',input_test,inputps);
%输出预测值,并反归一化
an=predict(Mdl,inputn_test');
SVMoutput=mapminmax('reverse',an,outputps);
%预测值和准确值
% plot(SVMoutput,'r');
% hold on;
% plot(output_test,'b')
% toc
% output_test=output_test';
% b=SVMoutput;
% d=output_test;
% rmse=sqrt(sum((b-d).^2)/length(b)); 
% mape=sum(abs(b-d)./d)/length(b);
% mae=sum(abs(b-d))/length(b);
fprintf('经次训练误,差用时%f\n\n',rmse,mape);

SVM


clc;
clear;
close all;
tic
fprintf('-----已开始请等待-----\n\n');
%% 造数据不用关心,直接跳过
% 造数据 20*2
data = [0.4,0.3;-0.5,0.1;-0.2,-0.3;0.5,-0.3;
        2.1,1.9;1.8,2.2;1.7,2.5;2.3,1.6;
        -2.2,1.6;-1.9,2.1;-1.7,2.6;-2.3,2.5;
        -3.1,-1.9;-2.8,-2.1;-1.9,-2.5;-2.3,-3.2;
        3.9,-3.5;2.8,-2.2;1.7,-3.1;2.5,-3.4];
data1 = data + 2.5*rand(20,2);
data2 = data + 2.5*rand(20,2);
data3 = data + 2.5*rand(20,2); data1(17:20,:);
% 训练数据
train_data = [data1(1:4,:);data2(1:4,:);data3(1:4,:);
              data1(5:8,:);data2(5:8,:);data3(5:8,:);
              data1(9:12,:);data2(9:12,:);data3(9:12,:);
              data1(13:16,:);data2(13:16,:);data3(13:16,:);
              data1(17:20,:);data2(17:20,:);data3(17:20,:)];
                  
% 画图显示
figure;
% gscatter函数可以按分类或者分组画离散点
% group为分组向量,对应每一个坐标的类别
group_train = [1;1;1;1;1;1;1;1;1;1;1;1;
         2;2;2;2;2;2;2;2;2;2;2;2;
         3;3;3;3;3;3;3;3;3;3;3;3;
         4;4;4;4;4;4;4;4;4;4;4;4;
         5;5;5;5;5;5;5;5;5;5;5;5];
gscatter(train_data(:,1),train_data(:,2),group_train);

title('训练数据样本分布');
xlabel('样本特征1');
ylabel('样本特征2');
legend('Location','Northwest');
grid on;

%%
% 测试数据
test_data = data + 3.0*rand(20,2);
test_features = test_data;
% 测试数据的真实标签
test_labels = [1;1;1;1;2;2;2;2;3;3;3;3;4;4;4;4;5;5;5;5];

%%
% 训练数据分为5% 类别i的 正样本 选择类别i的全部,负样本 从其余类别中随机选择(个数与正样本相同)
% 类别1
class1_p = train_data(1:12,:);
% randperm(n,k)是从1到n的序号中随机返回k个
index1 = randperm(48,12);
% 从其余样本中随机选择k个
train_data_c = train_data;
train_data_c(1:12,:) = [];
class1_n = train_data_c(index1,:);

train_features1 = [class1_p;class1_n];
% 正类表示为1,负类表示为-1
train_labels1 = [ones(12,1);-1*ones(12,1)];

% 类别2
class2_p = train_data(13:24,:);
% randperm(n,k)是从1到n的序号中随机返回k个
index1 = randperm(48,12);
% 从其余样本中随机选择k个
train_data_c = train_data;
train_data_c(13:24,:) = [];
class2_n = train_data_c(index1,:);

train_features2 = [class2_p;class2_n];
% 正类表示为1,负类表示为-1
train_labels2 = [ones(12,1);-1*ones(12,1)];

% 类别3
class3_p = train_data(25:36,:);
% randperm(n,k)是从1到n的序号中随机返回k个
index1 = randperm(48,12);
% 从其余样本中随机选择k个
train_data_c = train_data;
train_data_c(25:36,:) = [];
class3_n = train_data_c(index1,:);

train_features3 = [class3_p;class3_n];
% 正类表示为1,负类表示为-1
train_labels3 = [ones(12,1);-1*ones(12,1)];

% 类别4
class4_p = train_data(37:48,:);
% randperm(n,k)是从1到n的序号中随机返回k个
index1 = randperm(48,12);
% 从其余样本中随机选择k个
train_data_c = train_data;
train_data_c(37:48,:) = [];
class4_n = train_data_c(index1,:);

train_features4 = [class4_p;class4_n];
% 正类表示为1,负类表示为-1
train_labels4 = [ones(12,1);-1*ones(12,1)];

% 类别5
class5_p = train_data(49:60,:);
% randperm(n,k)是从1到n的序号中随机返回k个
index1 = randperm(48,12);
% 从其余样本中随机选择k个
train_data_c = train_data;
train_data_c(49:60,:) = [];
class5_n = train_data_c(index1,:);

train_features5 = [class5_p;class5_n];
% 正类表示为1,负类表示为-1
train_labels5 = [ones(12,1);-1*ones(12,1)];

%%
% 分别训练5个类别的SVM模型
model1 = fitcsvm(train_features1,train_labels1,'ClassNames',{'-1','1'});
model2 = fitcsvm(train_features2,train_labels2,'ClassNames',{'-1','1'});
model3 = fitcsvm(train_features3,train_labels3,'ClassNames',{'-1','1'});
model4 = fitcsvm(train_features4,train_labels4,'ClassNames',{'-1','1'});
model5 = fitcsvm(train_features5,train_labels5,'ClassNames',{'-1','1'});
fprintf('-----模型训练完毕-----\n\n');
%%
% label是n*1的矩阵,每一行是对应测试样本的预测标签;
% score是n*2的矩阵,第一列为预测为“负”的得分,第二列为预测为“正”的得分。
% 用训练好的5个SVM模型分别对测试样本进行预测分类,得到5个预测标签
[label1,score1] = predict(model1,test_features);
[label2,score2] = predict(model2,test_features);
[label3,score3] = predict(model3,test_features);
[label4,score4] = predict(model4,test_features);
[label5,score5] = predict(model5,test_features);
% 求出测试样本在5个模型中预测为“正”得分的最大值,作为该测试样本的最终预测标签
score = [score1(:,2),score2(:,2),score3(:,2),score4(:,2),score5(:,2)];
% 最终预测标签为k*1矩阵,k为预测样本的个数
final_labels = zeros(20,1);
for i = 1:size(final_labels,1)
    % 返回每一行的最大值和其位置
    [m,p] = max(score(i,:));
    % 位置即为标签
    final_labels(i,:) = p;
end
fprintf('-----样本预测完毕-----\n\n');
% 分类评价指标

group = test_labels; % 真实标签
grouphat = final_labels; % 预测标签
[C,order] = confusionmat(group,grouphat,'Order',[1;2;3;4;5]); % 'Order'指定类别的顺序
c1_p = C(1,1) / sum(C(:,1));
c1_r = C(1,1) / sum(C(1,:));
c1_F = 2*c1_p*c1_r / (c1_p + c1_r);
fprintf('c1类的查准率为%f,查全率为%f,F测度为%f\n\n',c1_p,c1_r,c1_F);

c2_p = C(2,2) / sum(C(:,2));
c2_r = C(2,2) / sum(C(2,:));
c2_F = 2*c2_p*c2_r / (c2_p + c2_r);
fprintf('c2类的查准率为%f,查全率为%f,F测度为%f\n\n',c2_p,c2_r,c2_F);

c3_p = C(3,3) / sum(C(:,3));
c3_r = C(3,3) / sum(C(3,:));
c3_F = 2*c3_p*c3_r / (c3_p + c3_r);
fprintf('c3类的查准率为%f,查全率为%f,F测度为%f\n\n',c3_p,c3_r,c3_F);

c4_p = C(4,4) / sum(C(:,4));
c4_r = C(4,4) / sum(C(4,:));
c4_F = 2*c4_p*c4_r / (c4_p + c4_r);
fprintf('c4类的查准率为%f,查全率为%f,F测度为%f\n\n',c4_p,c4_r,c4_F);

c5_p = C(5,5) / sum(C(:,5));
c5_r = C(5,5) / sum(C(5,:));
c5_F = 2*c5_p*c5_r / (c5_p + c5_r);
fprintf('c5类的查准率为%f,查全率为%f,F测度为%f\n\n',c5_p,c5_r,c5_F);  
            
            
figure;
subplot(121);
% gscatter函数可以按分类或者分组画离散点
% group为分组向量,对应每一个坐标的类别
group_test = test_labels;
gscatter(test_data(:,1),test_data(:,2),group_test);

title('测试数据样本真实分布');
xlabel('样本特征1');
ylabel('样本特征2');
legend('Location','Northwest');
grid on;

subplot(122);
% gscatter函数可以按分类或者分组画离散点
% group为分组向量,对应每一个坐标的类别
group_test = final_labels;
gscatter(test_data(:,1),test_data(:,2),group_test);

title('测试数据样本预测分布');
xlabel('样本特征1');
ylabel('样本特征2');
legend('Location','Northwest');
grid on;

KELM

添加链接描述

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值