import numpy as np
import pandas as pd
import scipy as stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
import warnings;warnings.simplefilter('ignore')
1 提取数据 (kaggle下载的csv文件)
data = pd.read_excel('D:/Download/housing.xlsx')
data
0
https://bj.lianjia.com/chengjiao/101084782030....
101084782030
116.475489
40.019520
1111027376244
2016-08-09
1464.0
106
415.0
31680
...
1.0
2005
3
6
0.217
1.0
0.0
1.0
7
56021.0
1
https://bj.lianjia.com/chengjiao/101086012217....
101086012217
116.453917
39.881534
1111027381879
2016-07-28
903.0
126
575.0
43436
...
1.0
2004
4
6
0.667
1.0
1.0
0.0
7
71539.0
2
https://bj.lianjia.com/chengjiao/101086041636....
101086041636
116.561978
39.877145
1111040862969
2016-12-11
1271.0
48
1030.0
52021
...
4.0
2005
3
6
0.500
1.0
0.0
0.0
7
48160.0
3
https://bj.lianjia.com/chengjiao/101086406841....
101086406841
116.438010
40.076114
1111043185817
2016-09-30
965.0
138
297.5
22202
...
1.0
2008
1
6
0.273
1.0
0.0
0.0
6
51238.0
4
https://bj.lianjia.com/chengjiao/101086920653....
101086920653
116.428392
39.886229
1111027381174
2016-08-28
927.0
286
392.0
48396
...
4.0
1960
2
2
0.333
0.0
1.0
1.0
1
62588.0
2 OLS最小二乘法进行回归分析
X = data[['followers','square','buildingStructure']