使用sklearn 回归模型,预测房价例子
from sklearn import linear_model
import pandas as pd
import numpy as np
def get_data():
# square_feet,price
train_data = pd.read_csv("train_data.csv")
return np.array(train_data[["square_feet", "comfortable"]]).reshape(-1, 2), np.array(train_data["price"]).reshape(
-1, 1)
if __name__ == "__main__":
# 获取训练数据
x, y = get_data()
# 创建回归模型
regr = linear_model.LinearRegression()
# 训练模型
regr.fit(x, y)
# 获取预测数据
df = pd.read_csv("predict_data.csv")
predict_x = np.array(df[["square_feet", "comfortable"]]).reshape(-1, 2)
# 获取预测结果
predict_outcome = regr.predict(predict_x)
# intercept_和coef_是模型参数
# y = coef_[0]*x1 + coef_[1]*x2 + intercept_
print({'intercept': regr.intercept_, 'coefficient': regr.coef_})
df['pred'] = predict_outcome
print(df)
训练数据 train_data.csv
square_feet,comfortable,price 1,1,100 2,1,200 3,1,300 4,1,400 5,1,500 6,1,600 7,1,700 8,1,800 9,1,900 1,2,200 2,2,400 3,2,600 4,2,800 5,2,1000 6,2,1200 7,2,1400 8,2,1600 9,2,1800 1,3,300 2,3,600 3,3,900 4,3,1200 5,3,1500 6,3,1800 7,3,2100 8,3,2400 9,3,2700 1,4,400 2,4,600 3,4,1000 4,4,1300 5,4,1400 6,4,1500 7,4,2600 8,4,2700 9,4,2800
预测数据 predict_data.csv
square_feet,comfortable 5,0.1 4,1
结果输出
{'intercept': array([-946.52777778]), 'coefficient': array([[230.41666667, 376.66666667]])}
square_feet comfortable pred
0 5 0.1 243.222222
1 4 1.0 351.805556