不定时更新Kaggle代码
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import *
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
#y_train.hist()
print('-----------------------DATA_PROCESSING-----------------------')
data = pd.concat((train_data, test_data), axis=0)
#print(data.columns.values.tolist())
#print('data.shape:', data.shape)
data.eval('Built2Sold = YrSold-YearBuilt', inplace=True)
data.eval('Add2Sold = YrSold-YearRemodAdd', inplace=True)
data.eval('GarageBlt = YrSold-GarageYrBlt', inplace=True)
data.drop(['YrSold', 'YearBuilt', 'YearRemodAdd', 'GarageYrBlt', 'SalePrice'], axis=1, inplace=True)
#哑变量不能处理int类型变量,故先转为str
data['OverallQual'] = data['OverallQual'].astype(str)
data['OverallCond'] = data['OverallCond'].astype(str)
data['MSSubClass'] = data['MSSubClass'].astype(str)
dummied_data = pd.get_dummies(data)
#print(dummied_data.shape)
#print(dummied_data.isnull().sum().sort_values(ascending=False).head())
mean_cols = dummied_data.mean()
dummied_data = dummied_data.fillna(mean_cols)
print('-----------------------TRAIN_TEST_SPLIT-----------------------')
X_train = dummied_data.iloc[:1460, :]
X_test = dummied_data.iloc[1460:, :]
print(X_test.iloc[0, :])
y_train = train_data.loc[:, 'SalePrice']
y_train = np.log1p(y_train)
print('-------------------------MODEL_GENERATING---------------------')
#X_train, X_test, y_train, y_test = train_test_split()
from sklearn.linear_model import LinearRegression, Lasso, Ridge, BayesianRidge
lin_reg = BayesianRidge()
lin_reg.fit(X_train, y_train)
y_pred = lin_reg.predict(X_test)
y_pred = np.expm1(y_pred)
X_id = X_test.loc[:, 'Id']
#y_pred[np.isinf(y_pred)] = -1
print('----------------------OUTPUT_FILE_GENERATING------------------')
# 导入CSV安装包
import csv
# 1. 创建文件对象
f = open(r'C:\Users\Lenovo\Desktop\ju\test_y.csv','w', newline='')
# 2. 基于文件对象构建 csv写入对象
csv_writer = csv.writer(f)
# 3. 构建列表头
csv_writer.writerow(["Id","SalePrice"])
# 4. 写入csv文件内容
for x, y in zip(X_id, y_pred):
csv_writer.writerow([x, y])
print('-------------------------WORD_DONE---------------------')
# 5. 关闭文件
f.close()