# coding: utf-8
# In[1]:
# 启动graphlab create
import graphlab
sales = graphlab.SFrame("data/home_data.gl")
sales
# In[2]:
sales.show()
# In[3]:
graphlab.canvas.set_target('ipynb')
# In[5]:
# 房屋面积与价格的散点图
sales.show(view='Scatter Plot', x='sqft_living', y='price')
# # 分离出训练集和测试集
# In[6]:
train_data, test_data = sales.random_split(.8, seed=0)
# ## 构建回归模型
# In[13]:
sqft_model = graphlab.linear_regression.create(train_data, target='price', features=['sqft_living'])
# # 评估线性回归模型
# In[14]:
print test_data['price'].mean()
# In[15]:
print sqft_model.evaluate(test_data)
# # 用图表查看预测价格和真实价格的散点图
# In[17]:
import matplotlib.pyplot as plt
get_ipython().magic(u'matplotlib inline')
# In[18]:
plt.plot(test_data['sqft_living'], test_data['price'], '.',
test_data['sqft_living'], sqft_model.predict(test_data), '-')
# In[20]:
# 预测模型系数
sqft_model.get('coefficients')
# # 探索数据中的其它特征
# In[27]:
my_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'zipcode']
# In[28]:
sales[my_features].show()
# In[29]:
sales.show(view='BoxWhisker Plot', x='zipcode', y='price')
# # 构建更多特征的回归模型
# In[33]:
my_features_model = graphlab.linear_regression.create(train_data, target='price', features=my_features)
# In[35]:
print my_features_model.evaluate(test_data)
# In[36]:
plt.plot(test_data['sqft_living'], test_data['price'], '.',
test_data['sqft_living'], my_features_model.predict(test_data), '-')
# In[37]:
# 预测模型系数
my_features_model.get('coefficients')
# # 应用学到的模型来预测房屋的售价
# In[39]:
house1 = sales[sales['id'] == '5309101200']
# In[40]:
house1
# In[41]:
print sqft_model.predict(house1)
# In[42]:
print my_features_model.predict(house1)
# In[ ]: