import pandas as pd
import matplotlib. pyplot as plt
import seaborn as sns
% matplotlib inline
meituan= pd. read_csv( 'meituan.csv' )
meituan. info( )
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1680 entries, 0 to 1679
Data columns (total 8 columns):
web-scraper-order 1680 non-null object
web-scraper-start-url 1680 non-null object
name 1680 non-null object
address 1680 non-null object
rating 1680 non-null float64
comment 1680 non-null float64
price 1667 non-null float64
cat 1680 non-null object
dtypes: float64(3), object(5)
memory usage: 105.1+ KB
meituan. head( 10 )
web-scraper-order web-scraper-start-url name address rating comment price cat 0 1563028765-1694 http://sh.meituan.com/meishi/c35/pn8/ 斯比特花园意大利餐厅(新世界大丸百货店) 黄浦区南京东路228号新世界大丸百货6层人均 4.2 2169.0 98.0 foreign 1 1563028775-1753 http://sh.meituan.com/meishi/c35/pn3/ 好伦哥自助餐厅(南京东路店) 黄浦区南京东路600号亚太广场6楼1室人均 3.0 6005.0 73.0 foreign 2 1563028769-1720 http://sh.meituan.com/meishi/c35/pn6/ 蘭赫咖啡(周浦店) 浦东新区年家浜路327号三楼(全季酒店三楼)人均 4.2 326.0 55.0 foreign 3 1563028721-1396 http://sh.meituan.com/meishi/c35/pn29/ 地中海的月亮(虹桥·食尚天地店) 青浦区沪青平公路1899号虹桥·食尚天地1栋1层106-3人均 3.5 0.0 107.0 foreign 4 1563028767-1705 http://sh.meituan.com/meishi/c35/pn7/ 拉蒂娜巴西烤肉自助餐Latina(长泰店) 浦东新区祖冲之路1239弄长泰广场1E09东庭院(地铁2号线4号出口左侧50米,星巴克后面)人均 3.4 657.0 148.0 foreign 5 1563028727-1432 http://sh.meituan.com/meishi/c35/pn26/ R Bar & Restaurant 徐汇区乌鲁木齐中路247号一层-10人均 4.5 0.0 140.0 foreign 6 1563028735-1488 http://sh.meituan.com/meishi/c35/pn22/ 芝根芝底(西渡店) 奉贤区沪杭公路215号(邮政储蓄银行旁)人均 5.0 370.0 33.0 foreign 7 1563028729-1445 http://sh.meituan.com/meishi/c35/pn25/ Barolo steakhouse巴洛洛意大利牛排餐厅 黄浦区建国中路155弄7号人均 5.0 1.0 238.0 foreign 8 1563028777-1778 http://sh.meituan.com/meishi/c35/pn2/ 斗牛士牛排(南京东路悦荟店) 黄浦区南京东路353号悦荟广场(原353店)7F(置地广场旁边)人均 4.4 1285.0 120.0 foreign 9 1563028723-1412 http://sh.meituan.com/meishi/c35/pn28/ 佩德罗巴西烤肉餐厅(禹州商业广场店) 浦东新区沪南公路9936弄(禹州商业广场2层)人均 3.0 83.0 103.0 foreign
meituan = meituan. dropna( subset= [ "price" ] )
meituan[ 'rating' ] = meituan[ 'rating' ] . fillna( 3 )
meituan[ 'comment' ] = meituan[ 'comment' ] . fillna( 0 )
meituan. describe( )
rating comment price count 1667.000000 1667.000000 1667.000000 mean 4.204079 512.169166 109.988002 std 0.520557 1415.567751 93.524620 min 3.000000 0.000000 9.000000 25% 3.900000 6.000000 60.000000 50% 4.100000 49.000000 89.000000 75% 4.500000 369.500000 123.000000 max 5.000000 24585.000000 1670.000000
cols= [ 'rating' , 'comment' , 'price' ]
meituan[ cols] . corr( )
rating comment price rating 1.000000 0.076882 0.189531 comment 0.076882 1.000000 -0.128461 price 0.189531 -0.128461 1.000000
plt. figure( figsize= ( 10 , 8 ) )
sns. scatterplot( x= "price" , y= "rating" , data= meituan)
<matplotlib.axes._subplots.AxesSubplot at 0x1a22829d68>
bins= [ 3 , 3.5 , 4 , 4.5 , 5 ]
labels= [ '<=3.5' , '<=4' , "<=4.5" , "<=5" ]
meituan[ 'rating2' ] = pd. cut( meituan. rating, bins, right= True , labels= labels)
meituan. groupby( [ 'rating2' ] ) [ 'rating' ] . describe( )
count mean std min 25% 50% 75% max rating2 <=3.5 209.0 3.475598 0.056566 3.2 3.5 3.5 3.5 3.5 <=4 495.0 3.892727 0.127843 3.6 3.8 3.9 4.0 4.0 <=4.5 537.0 4.336872 0.154714 4.1 4.2 4.4 4.5 4.5 <=5 395.0 4.917722 0.131141 4.6 4.8 5.0 5.0 5.0
plt. figure( figsize= ( 10 , 5 ) )
sns. boxplot( x= 'rating' , y= 'cat' , palette= sns. color_palette( 'pastel' ) , data= meituan)
plt. tick_params( labelsize= 20 )
plt. figure( figsize= ( 10 , 5 ) )
sns. boxplot( x= 'price' , y= 'cat' , palette= sns. color_palette( 'pastel' ) , data= meituan)
plt. tick_params( labelsize= 20 )
sns. countplot( x= "cat" , hue= 'rating2' , data= meituan)
<matplotlib.axes._subplots.AxesSubplot at 0x1a23b7c2b0>
sns. barplot( x= "cat" , y= "comment" , hue= "rating2" , data= meituan)
<matplotlib.axes._subplots.AxesSubplot at 0x1a23c49a20>