属性规约与数值规约概述
属性规约
数值规约
主成分分析
属性规约之主成分分析
PCA算法
from sklearn.decomposition import PCA
import pymysql
import pandas as pd
db = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', db='taobao')
sql = 'select hits, comment from myhexun'
df = pd.read_sql(sql=sql, con=db)
r = df['comment'] / df['hits']
df['r'] = r
# ====== 主成分分析 ======
pca1 = PCA()
pca1.fit(df)
# 返回模型中各个特征量
cm = pca1.components_
# print(cm)
# 各个成分中各自方差百分比,贡献率
ratio = pca1.explained_variance_ratio_
# print(ratio)
pca2 = PCA(2)
pca2.fit(df)
# 降维
dr = pca2.transform(df)
# print(dr)
# 恢复降维
recovery = pca2.inverse_transform(dr)
print(recovery)
实战