import pandas as pd
import os
root_path = r'D:\00data\desktop\上课\《数据分析工程资料》课件\数据分析工程实践' . encode( 'utf-8' )
os. chdir( root_path)
os. getcwd( )
'D:\\00data\\desktop\\上课\\《数据分析工程资料》课件\\数据分析工程实践'
data = pd. read_csv( 'lianjia.csv' )
D:\00installer\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:2785: DtypeWarning: Columns (1,11,12,13,14,27) have mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
data. shape
(318851, 29)
pd. set_option( 'display.max_columns' , None )
pd. set_option( 'max_colwidth' , 100 )
pd. set_option( 'max_info_columns' , 100 )
data. head( 2 )
URL
ID
Lng
Lat
CommunityID
TradeTime
DOM
Followers
Total price
Price
Square
Bed Room
Living room
Kitchen
Bathroom
楼
Building Type
Construction time
renovation condition
building structure
Ladder ratio(梯户比)
Elevator
Property rights for five years(房屋满五年)
Subway
District
Community average
Unnamed: 26
Unnamed: 27
Unnamed: 28
0
https://bj.lianjia.com/chengjiao/101084782030.html
1.01085e+11
116.475489
40.019520
1111027376244
2016/8/9
1464.0
106
415.0
31680
131.00
2
1
1
1
高 26
塔楼
2005
简装
钢混
0.217
1.0
0.0
1.0
朝阳
56021.0
NaN
NaN
NaN
1
https://bj.lianjia.com/chengjiao/101086012217.html
1.01086e+11
116.453917
39.881534
1111027381879
2016/7/28
903.0
126
575.0
43436
132.38
2
2
1
2
高 22
塔楼
2004
精装
钢混
0.667
1.0
1.0
0.0
朝阳
71539.0
NaN
NaN
NaN
data. info( )
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 318851 entries, 0 to 318850
Data columns (total 29 columns):
URL 318851 non-null object
ID 318851 non-null object
Lng 318851 non-null float64
Lat 318851 non-null float64
CommunityID 318851 non-null int64
TradeTime 318851 non-null object
DOM 160874 non-null float64
Followers 318851 non-null int64
Total price 318851 non-null float64
Price 318851 non-null int64
Square 318851 non-null float64
Bed Room 318851 non-null object
Living room 318851 non-null object
Kitchen 318851 non-null object
Bathroom 318851 non-null object
楼 318851 non-null object
Building Type 316830 non-null object
Construction time 318851 non-null object
renovation condition 318851 non-null object
building structure 318851 non-null object
Ladder ratio(梯户比) 318851 non-null float64
Elevator 318819 non-null float64
Property rights for five years(房屋满五年) 318819 non-null float64
Subway 318819 non-null float64
District 318851 non-null object
Community average 318388 non-null float64
Unnamed: 26 0 non-null float64
Unnamed: 27 184584 non-null object
Unnamed: 28 184389 non-null float64
dtypes: float64(12), int64(3), object(14)
memory usage: 70.5+ MB
district_sum = data[ 'District' ] . value_counts( ) . sort_values( ascending= False )
district_sum
朝阳 107244
昌平 38634
海淀 38200
西城 31293
丰台 29338
东城 17086
大兴 15313
通州 13974
石景山 11371
顺义 9202
房山 2955
亦庄开发区 2537
门头沟 1704
Name: District, dtype: int64
data[ [ 'Lng' , 'Lat' ] ] . describe( )
Lng
Lat
count
318851.000000
318851.