Kaggle入门:House Prices - Advanced Regression Techniques

不定时更新Kaggle代码

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import *

train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
#y_train.hist()
print('-----------------------DATA_PROCESSING-----------------------')
data = pd.concat((train_data, test_data), axis=0)
#print(data.columns.values.tolist())
#print('data.shape:', data.shape)
data.eval('Built2Sold = YrSold-YearBuilt', inplace=True)
data.eval('Add2Sold = YrSold-YearRemodAdd', inplace=True)
data.eval('GarageBlt = YrSold-GarageYrBlt', inplace=True)
data.drop(['YrSold', 'YearBuilt', 'YearRemodAdd', 'GarageYrBlt', 'SalePrice'], axis=1, inplace=True)

#哑变量不能处理int类型变量,故先转为str
data['OverallQual'] = data['OverallQual'].astype(str)
data['OverallCond'] = data['OverallCond'].astype(str)
data['MSSubClass'] = data['MSSubClass'].astype(str)

dummied_data = pd.get_dummies(data)
#print(dummied_data.shape)
#print(dummied_data.isnull().sum().sort_values(ascending=False).head())

mean_cols = dummied_data.mean()
dummied_data = dummied_data.fillna(mean_cols)

print('-----------------------TRAIN_TEST_SPLIT-----------------------')
X_train = dummied_data.iloc[:1460, :]
X_test = dummied_data.iloc[1460:, :]
print(X_test.iloc[0, :])
y_train = train_data.loc[:, 'SalePrice']
y_train = np.log1p(y_train)

print('-------------------------MODEL_GENERATING---------------------')
#X_train, X_test, y_train, y_test = train_test_split()

from sklearn.linear_model import LinearRegression, Lasso, Ridge, BayesianRidge
lin_reg = BayesianRidge()
lin_reg.fit(X_train, y_train)
y_pred = lin_reg.predict(X_test)
y_pred = np.expm1(y_pred)
X_id = X_test.loc[:, 'Id']
#y_pred[np.isinf(y_pred)] = -1
print('----------------------OUTPUT_FILE_GENERATING------------------')
# 导入CSV安装包
import csv

# 1. 创建文件对象
f = open(r'C:\Users\Lenovo\Desktop\ju\test_y.csv','w', newline='')

# 2. 基于文件对象构建 csv写入对象
csv_writer = csv.writer(f)

# 3. 构建列表头
csv_writer.writerow(["Id","SalePrice"])

# 4. 写入csv文件内容

for x, y in zip(X_id, y_pred):
    csv_writer.writerow([x, y])
print('-------------------------WORD_DONE---------------------')
# 5. 关闭文件
f.close()
   
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值