# 1读取数据
#.2.合并表
#3.找到user_id和aisle_之间的关系
import pandas as pd
#1.获取数据
order_products=pd.read_csv("C:/Users/18578/Desktop/instacart/order_products__prior.csv")
products=pd.read_csv('C:/Users/18578/Desktop/instacart/products.csv')
orders=pd.read_csv('C:/Users/18578/Desktop/instacart/orders.csv')
aisles=pd.read_csv('C:/Users/18578/Desktop/instacart/aisles.csv')
tab1=pd.merge(aisles,products,on=["aisle_id","aisle_id"])
tab2=pd.merge(tab1,order_products,on=['product_id','product_id'])
tab3=pd.merge(tab2,orders,on=["order_id","order_id"])
tab3
aisle_id | aisle | product_id | product_name | department_id | order_id | add_to_cart_order | reordered | user_id | eval_set | order_number | order_dow | order_hour_of_day | days_since_prior_order | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | prepared soups salads | 209 | Italian Pasta Salad | 20 | 94246 | 5 | 0 | 114082 | prior | 26 | 0 | 20 | 1.0 |
1 | 1 | prepared soups salads | 22853 | Pesto Pasta Salad | 20 | 94246 | 4 | 0 | 114082 | prior | 26 | 0 | 20 | 1.0 |
2 | 4 | instant foods | 12087 | Chicken Flavor Ramen Noodle Soup | 9 | 94246 | 15 | 0 | 114082 | prior | 26 | 0 | 20 | 1.0 |
3 | 4 | instant foods | 47570 | Original Flavor Macaroni & Cheese Dinner | 9 | 94246 | 14 | 1 | 114082 | prior | 26 | 0 | 20 | 1.0 |
4 | 13 | prepared meals | 10089 | Dolmas | 20 | 94246 | 25 | 0 | 114082 | prior | 26 | 0 | 20 | 1.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
32434484 | 134 | specialty wines champagnes | 47713 | Sparkling Rose | 5 | 3014872 | 1 | 0 | 63218 | prior | 1 | 1 | 14 | NaN |
32434485 | 134 | specialty wines champagnes | 49562 | Blanc De Noirs Sparkling Wine | 5 | 34570 | 1 | 1 | 37901 | prior | 13 | 3 | 9 | 13.0 |
32434486 | 134 | specialty wines champagnes | 49562 | Blanc De Noirs Sparkling Wine | 5 | 250923 | 1 | 1 | 26431 | prior | 27 | 5 | 11 | 10.0 |
32434487 | 134 | specialty wines champagnes | 49562 | Blanc De Noirs Sparkling Wine | 5 | 1319402 | 1 | 1 | 26431 | prior | 34 | 2 | 14 | 7.0 |
32434488 | 134 | specialty wines champagnes | 49562 | Blanc De Noirs Sparkling Wine | 5 | 2298986 | 1 | 0 | 37901 | prior | 9 | 2 | 11 | 13.0 |
32434489 rows × 14 columns
table=pd.crosstab(tab3['user_id'],tab3['aisle'])
data=table[:1000]
#PCSA 降维
from sklearn.decomposition import PCA
# 1) 实例化一个转换器类
transfer=PCA(n_components=0.95)
#2) 调用fit——transform
data_new=transfer.fit_transform(data)
data_new.shape
(1000, 35)