import pandas as pd
import numpy as np
sales = pd.read_csv("home_data.csv")
from sklearn.cross_validation import train_test_split
train_data,test_data = train_test_split(sales,train_size=0.8)
from sklearn.linear_model import LinearRegression
X = train_data[['sqft_living']]
y=train_data.price
#build the linear regression object
lm=LinearRegression()
# Train the model using the training sets
lm.fit(X,y)
#print the y intercept
print(lm.intercept_)
#print the coefficents
print(lm.coef_)
lm.predict(300)
from math import sqrt
from sklearn.metrics import mean_squared_error
y_true=train_data.price.loc[0:5,]
test_data=test_data[['price']].reset_index()
y_pred=test_data.price.loc[0:5,]
predicted =y_pred.