将Excel表格按照需求合并,并进行PCA降维处理。数据下载地址PCA降维处理.zip-深度学习文档类资源-CSDN下载
import pandas as pd
from sklearn.decomposition import PCA
def PCA_demo():
# 1.获取数据
order_products = pd.read_csv('../data/order_products__prior.csv')
products = pd.read_csv('../data/products.csv')
orders = pd.read_csv('../data/orders.csv')
aisles = pd.read_csv('../data/aisles.csv')
#print(aisles) #测试
# 2.合并表
# order_products_prior.csv: 订单与商品信息
# 字段: order_id, product_id, add_to_cart_order, reordered
# products.csv: 商品信息
# 字段: product_id, product_name, aisle_id, department_id
# orders.csv: 用户的订单信息
# 字段: order_id, user_id, eval_set, order_number, ...
# aisles.csv: 商品所属具体物品类别
# 字段: aisle_id, aisle
# 合并aisles和products ----> aisle和product_id合并
tab1 = pd.merge(aisles, products, on = ['aisle_id', 'aisle_id'])
'''
pd.set_option('display.max_columns', None) #显示完整的列
pd.set_option('display.max_rows', None) #显示完整的行
'''
# print(tab1,tab1.keys()) #测试
tab2 = pd.merge(tab1, order_products, on = ['product_id', 'product_id'])
tab3 = pd.merge(tab2, orders, on = ['order_id', 'order_id'])
# 3.找到user_id和aisle之间的联系
table = pd.crosstab(tab3['user_id'], tab3['aisle'])
print('table:\n',table.shape)
data = table
# 4.PCA降维
transfer = PCA(n_components = 0.90) #降维保留90的信息
data_new = transfer.fit_transform(data)
print('data_new:\n',data_new,data_new.shape)
return None
if __name__ == '__main__':
PCA_demo()