机器学习初学代码(三) LDA(多分类)

# -*- coding: utf-8 -*-
# author: Xin Chen

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 多分类通用:
# data 的格式应该是DataFrame的格式,其中前几列为特征,最后一列为标签
# label从0开始取得数字

def calWeights(data, K):
    labels = np.unique(data['label'])
    cNum = len(labels)
    if K >= cNum:
        print 'K is too large, please input again'
        exit(0)

    # Sw 类内散度矩阵
    dim = np.shape(data)[1]-1  # 减1是因为有label
    Sw = np.mat(np.zeros((dim, dim)))
    for i in labels:
        datai = data.ix[data['label'] == i]
        Ci = np.mat(datai.drop('label', axis=1))
        ui = np.mean(Ci, axis=0)
        Si = (Ci-ui).T * (Ci-ui)
        Sw += Si

    # St 全局散度矩阵
    C = np.mat(data.drop('label', axis=1))
    u = np.mean(C, axis=0)
    St = (C-u).T *(C-u)

    # Sb 类间散度矩阵
    Sb = St - Sw

    S = Sw.I * Sb
    eigValues, eigVectors = np.linalg.eig(S)      # 用来求特征值和特征向量
    order= eigValues.argsort()[::-1]
    select = order[:K]
    Weights = eigVectors[:, select]
    return Weights


def LDAplot(data):
    # 也要注意这个函数画的是投影点而不是原始的点
    w = calWeights(data, K=2)
    if np.shape(w)[1] != 2:
        print '维度太大,无法画图!'
        return
    labels = np.unique(data['label'])
    C = np.mat(data.drop('label', axis=1))
    CC = C * w
    corlist = ['r', 'y', 'g', 'b', 'w']
    for i in labels:
        CCi = CC[:, data['label'] == i]
        plt.scatter(CCi[:, 0], CCi[:, 1], c=corlist[i], marker="o")
    plt.show()


from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
Y = iris.target
data = pd.DataFrame(X)
data['label'] = Y
Weights = calWeights(data, K=2)
print "opticalweights=", Weights
LDAplot(data)

 

转载于:https://my.oschina.net/u/3590872/blog/1243191

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值