q6第一版

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import numpy as np

X_ord = pd.read_csv('./Q4_backup.csv')
btc_weekly = pd.read_csv('./Q2_backup.csv')

filtered_btc = btc_weekly[(btc_weekly['Date'] >= '2020-05-01') & (btc_weekly['Date'] <= '2021-05-31')]
filtered_X_ord = X_ord[(X_ord['Date_of_Monday'] >= '2020-05-01') & (X_ord['Date_of_Monday'] <= '2021-05-31')]
filtered_X_ord['Date'] = filtered_X_ord['Date_of_Monday']
merged_df = pd.merge(filtered_X_ord, filtered_btc, on='Date', how='inner')

featrue_cols = X_ord.columns[1:]
kf = KFold(n_splits=5, shuffle=True, random_state=5206)
res = (0, float('inf'))
for p in range(1, 21):
    cols = featrue_cols[:p]
    X = np.array(merged_df[cols])
    y = np.array(merged_df['Adj Close'])
    mean_err = 0
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model = LinearRegression()
        model.fit(X_train, y_train)
        mean_err += np.sqrt(mean_squared_error(model.predict(X_test), y_test))
    mean_err = mean_err/5
    if mean_err < res[-1]:
        res = (p, mean_err)

best_p = res[0]
best_5_fold_rmse = res[-1]
model = LinearRegression()
X = np.array(merged_df[featrue_cols[:best_p]])
y = np.array(merged_df['Adj Close'])
model.fit(X, y)
print(f'best p: {best_p}, best 5 fold rmse: {best_5_fold_rmse}')
print(f'model coef: {model.coef_}')

dsadasdsadsa

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值