import pandas as pd
import numpy as np
pd.set_option("display.max_column",None)
path = 'D:\Visual_studio\project\zzzzzzzz\chipotle.tsv'
df = pd.read_csv(path,sep='\t')
##**1**
#print(df.shape)##1_1数据集的维度
#print(df.dtypes)##1_2每一列的格式
#print(df.index)##1_3输出索引
#print(df.columns)##1_4每一列的列名称
#print(type(df))
#print(df)
##df1 = df[['item_name','item_price']]
#print(df1.head(10))
#print(df1.shape)
##
#**2_1**有多少种商品
#print(df['item_name'].unique())
#print(len(df['item_name'].unique()))
#**2_2**
#rst = df.groupby('item_name',as_index=False)['quantity'].agg({'quantity':np.count_nonzero})
#print(type(rst))
#print(rst.shape)
#print(rst)
#购买最多的三种商品
#rst.sort_values(['quantity'],inplace=True,ascending=False)
#print(rst.head(3))#被下单次数最多的三种商品
#quan = df.groupby('item_name',as_index=False)['quantity'].agg({'quantity':np.count_nonzero})#以item_name为主,加以索引以quantity为辅的数据集
#print(quan)
#print(quan['quantity'].values)
##**3*
##price = df.groupby('item_name',as_index=False)['item_price'].agg({'item_price':np.count_nonzero})#以item_name为主,加以索引以item_price为辅的数据集
##print(type(df['item_price']))
##def fun(x):
##return eval(x[1:])
#等价于
##fun = lambda x:eval(x[1:])
dollarrizer = lambda x:x[1:]
##将price数据类型改为float型
df['item_price'] = df['item_price'].apply(dollarrizer) ##等价于 df['item_price'] = df['item_price'].apply(dollarrizer = lambda x:x[1:])
#print(df['item_price'])
##**3_2**
df['total'] = df['item_price']*df['quantity']
print(df['total'].agg({np.sum}))
print(type(df['total'].values))
a = np.sum(df['total'].values)
print(a)
##**4**
##
#count = df['order_id'].values*df['quantity'].values
#print(count.sum())##计算订单总和
#count1 = df['quantity'].values.sum()##计算quantity的数量
#print(count1)
#print(count.sum()/count1)