PCA降维

PCA降维:点击打开链接


from numpy import *
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from numpy import linalg as LA
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.preprocessing import normalize
import tensorflow as tf
from mpl_toolkits.mplot3d import Axes3D


data = []

cty_data = []
def data_p(fliename):
    fr = open(fliename)

    tid = 0
    cid = 0
    for line in fr.readlines():     
        tline = line.strip().split(';')

        data.append(tline)
        
        cn = tline[0]
        if tid==0:
            cty_data.append([tid])
            #print(cty_data)
            #print(cty_data[cid][0])
        elif cn == data[cty_data[cid][0]][0]:
            cty_data[cid].append(tid)
        else:
            cid += 1
            cty_data.append([tid])

        tid += 1
            

data_p('C:\\Users\\imac\\Desktop\\2018\\bigdata\\py\\wealth1951.txt')
cty_data = np.asarray(cty_data)
#print(cty_data)
data = np.asarray(data)
#print(data[cty_data[:,:],0])

cdata = []

def getdata():
    for i in range(len(cty_data)):
        itm =  data[cty_data[i,:],3:6]
        #print(itm)
        itm = np.asarray(itm)
        itmm = []
        for j in range(3):
            for k in range(len(itm)-1,0,-1):
                itm[k,j] = (float(itm[k,j])-float(itm[k-1,j]))/float(itm[k-1,j])

            #print(itm[1:len(itm),j])
            itmm.append(np.mean(np.asarray(itm[1:len(itm),j],dtype=np.float32)))

        #print(itm)
        cdata.append(itmm)

   
getdata()
cdata = np.asarray(cdata,dtype=np.float32)

#print(cdata)
cdata = normalize(cdata)

def eigValPct(eigvals,percentage):
    sortArray = sort(eigvals)
    sortArray = sortArray[-1::-1]#特征值从大到小排序
    arraySum = sum(sortArray)

    tmpSum = 0
    num = 0
    for i in sortArray:
        tmpSum += i
        num += 1
        if tmpSum > percentage * arraySum:
            return num


def pca(dataMat,percentage=0.9):
    meanVals = mean(dataMat,axis = 0)#对每一列求平均值,因为协方差的计算中需要减去均值
    meanRemoved = dataMat - meanVals

    covMat = cov(meanRemoved,rowvar=0)#cov()计算方差
    eigVals,eigVects = linalg.eig(mat(covMat))#特征值和特征向量
    #k=eigValPct(eigVals,percentage)
    k=2
    eigValInd = argsort(eigVals)
    eigValInd = eigValInd[:-(k+1):-1]#从排好序的特征值,从后往前取k个
    print(type(eigValInd))
    redEigVects = eigVects[:,eigValInd]#返回排序后特征值对应的特征向量redEigVects(主成分

    lowDDataMat = meanRemoved*redEigVects#将原始数据投影到主成分上得到新的低维数据lowDDataMat

    reconMat = (lowDDataMat*redEigVects.T) + meanVals#得到重构数据reconMat

    return lowDDataMat, reconMat


ccdata,_ = pca(cdata)

fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')  

ax.scatter(ccdata[:, 0], ccdata[:, 1]) 

plt.show()



  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值