实用机器学习(hw1/hw4)
1. 环境安装
autogluon
2. baseline 代码分析
import pandas as pd
import numpy as np
import scipy
from autogluon.tabular import TabularPredictor
data = pd.read_feather('house_sales.ftr')
df = data[['Sold Price', 'Sold On', 'Type', 'Year built', 'Bedrooms', 'Bathrooms']].copy()
c = 'Sold Price'
if c in df.select_dtypes('object').columns:
df.loc[:, c] = np.log10(
pd.to_numeric(df[c].replace(r'[$,-]', '', regex=True)) + 1
)
df = df[(df['Sold Price'] >= 4) & (df['Sold Price'] <= 8)]
test_start, test_end = pd.Timestamp(2021, 2, 15), pd.Timestamp(2021, 3, 1)
train_start = pd.Timestamp(2021, 1, 1)
df['Sold On'] = pd.to_datetime(df['Sold On'], errors='coerce')
train = df[(df['Sold On'] >= train_start) & (df['Sold On'] < test_start)]
test = df[(df['Sold On'] >= test_start) & (df['Sold On'] < test_end)]
print</