python3.7------LDA分类器

本文介绍了如何在Python3.7中利用LDA(Latent Dirichlet Allocation)算法进行文本分类,详细阐述了实现代码、测试过程及使用的训练数据集(25个室内样本和25个室外样本)。
摘要由CSDN通过智能技术生成

1、LDA实现代码
2、测试结果
3、测试所用训练数据(25insoor,25outdoor)

import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from sklearn.datasets.samples_generator import make_classification
from mpl_toolkits.mplot3d import Axes3D
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


#
def load_dataset(dataset_path, cls_name_list):
    data = []
    label_list = []

    # for every class image
    for cls_name_idx in range(len(cls_name_list)):
        cls_name = cls_name_list[cls_name_idx]

        cls_data_path = os.path.join(dataset_path, cls_name)
        image_name_list = sorted(os.listdir(cls_data_path))

        # for every image in one class
        for image_name in image_name_list:
            RGB_data = []
            img = cv2.imread(os.path.join(cls_data_path, image_name))
            RGB_data.append(int(np.mean(img[:, :, 2])))
            RGB_data.append(int(np.mean(img[:, :, 1])))
            RGB_data.append(int(np.mean(img[:, :, 0])))
            data.append(RGB_data)
            label_list.append(cls_name_idx)  # use class name index as label

    return np.array(data), np.array(label_list)


def sigmoid(z):
    return 1. / (1 + np.exp(-z))


def loss(x, y, theta):
    return -((y.dot(np.log(sigmoid(x.dot(theta)) + 1e-10))) + (1 - y).dot(
        np.log(1 - sigmoid(x.dot(theta)) + 1e-10))) / len(y)


# 定义求loss函数偏导函数
def partial_loss(x, y, theta):
    return -(x.T.dot(y - sigmoid(x.dot(theta)))) / len(y)


# 计算精度
def compute_accuracy(gt_labels, pred_labels):
    N = gt_labels.shape[0]
    accuracy = np.sum(np.int32(gt_labels == pred_labels)) / N  # 正确率计算

    return accuracy


# 定义梯度下降函数
def gradient_descent(x, y, initial_theta, alpha=0.000001, n_iters=1e3, epsilon=1e-8):
    cur_iters = 0
    theta = initial_theta
    while cur_iters < n_iters:
        next_theta = theta - alpha * partial_loss(x, y, theta)

        if cur_iters % 100 == 0:
            print('iters=%d   loss=%f   accuracy=%f' % (
            cur_iters, loss(x, y, theta), compute_accuracy(y, predict(x, theta))))
        # if abs(loss(x, y, theta) - loss(x, y, next_theta)) < epsilon:
        #     break

        theta = next_theta
        cur_iters += 1
    return theta


# 预测函数
def predict(x, theta):
    temp = x.dot(theta)
    return [1 if i > 0 else 0 for i in temp]


def pca(X, k):
    n_samples, n_feature = X.shape
    mean = np.array([np.mean(X[:, i]) for i in range(n_feature)])  # 求平均值
    norm_X = X - mean  # 去中心化
    scatter_matrix = np.dot(np.transpose(norm_X), norm_X)  # 计算散度矩阵
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值