import pandas as pd
chipo = pd. read_csv( "D:/东华研/数据分析/pandas_exercise/exercise_data/chipotle.tsv" , sep= "\t" )
chipo. head
<bound method NDFrame.head of order_id quantity item_name \
0 1 1 Chips and Fresh Tomato Salsa
1 1 1 Izze
2 1 1 Nantucket Nectar
3 1 1 Chips and Tomatillo-Green Chili Salsa
4 2 2 Chicken Bowl
... ... ... ...
4617 1833 1 Steak Burrito
4618 1833 1 Steak Burrito
4619 1834 1 Chicken Salad Bowl
4620 1834 1 Chicken Salad Bowl
4621 1834 1 Chicken Salad Bowl
choice_description item_price
0 NaN $2.39
1 [Clementine] $3.39
2 [Apple] $3.39
3 NaN $2.39
4 [Tomatillo-Red Chili Salsa (Hot), [Black Beans... $16.98
... ... ...
4617 [Fresh Tomato Salsa, [Rice, Black Beans, Sour ... $11.75
4618 [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese... $11.75
4619 [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... $11.25
4620 [Fresh Tomato Salsa, [Fajita Vegetables, Lettu... $8.75
4621 [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... $8.75
[4622 rows x 5 columns]>
chipo. columns
Index(['order_id', 'quantity', 'item_name', 'choice_description',
'item_price'],
dtype='object')
chipo. shape[ 1 ]
5
chipo. index
RangeIndex(start=0, stop=4622, step=1)
c = chipo[ [ "quantity" , "item_name" ] ] . groupby( [ "item_name" ] , as_index= False ) . agg( { "quantity" : sum } )
c. sort_values( [ "quantity" ] , ascending= False , inplace= True )
c. head( 5 )
item_name quantity 17 Chicken Bowl 761 18 Chicken Burrito 591 25 Chips and Guacamole 506 39 Steak Burrito 386 10 Canned Soft Drink 351
chipo[ "item_name" ] . nunique( )
50
chipo[ "choice_description" ] . value_counts( ) . head( )
[Diet Coke] 134
[Coke] 123
[Sprite] 77
[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream, Lettuce]] 42
[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream, Guacamole, Lettuce]] 40
Name: choice_description, dtype: int64
total_items_orders = chipo[ "quantity" ] . sum ( )
total_items_orders
4972
dollarizer = lambda x: float ( x[ 1 : - 1 ] )
chipo[ 'item_price' ] = chipo[ 'item_price' ] . apply ( dollarizer)
chipo[ 'item_price' ] . sum ( )
34500.16
chipo[ 'sub_total' ] = round ( chipo[ 'item_price' ] * chipo[ 'quantity' ] , 2 )
chipo[ 'sub_total' ] . sum ( )
39237.02
chipo
order_id quantity item_name choice_description item_price sub_total 0 1 1 Chips and Fresh Tomato Salsa NaN 2.39 2.39 1 1 1 Izze [Clementine] 3.39 3.39 2 1 1 Nantucket Nectar [Apple] 3.39 3.39 3 1 1 Chips and Tomatillo-Green Chili Salsa NaN 2.39 2.39 4 2 2 Chicken Bowl [Tomatillo-Red Chili Salsa (Hot), [Black Beans... 16.98 33.96 ... ... ... ... ... ... ... 4617 1833 1 Steak Burrito [Fresh Tomato Salsa, [Rice, Black Beans, Sour ... 11.75 11.75 4618 1833 1 Steak Burrito [Fresh Tomato Salsa, [Rice, Sour Cream, Cheese... 11.75 11.75 4619 1834 1 Chicken Salad Bowl [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... 11.25 11.25 4620 1834 1 Chicken Salad Bowl [Fresh Tomato Salsa, [Fajita Vegetables, Lettu... 8.75 8.75 4621 1834 1 Chicken Salad Bowl [Fresh Tomato Salsa, [Fajita Vegetables, Pinto... 8.75 8.75
4622 rows × 6 columns
chipo[ 'order_id' ] . nunique( )
1834
chipo[ 'sub_total' ] . sum ( ) / chipo[ 'order_id' ] . nunique( )
21.39423118865867
chipo[ [ 'order_id' , 'sub_total' ] ] . groupby( by= [ 'order_id' ]
) . agg( { 'sub_total' : 'sum' } ) [ 'sub_total' ] . mean( )
21.394231188658654
chipo[ 'item_name' ] . nunique( )
50