import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
# 假设数据已经加载为df
# df = pd.read_csv('data.csv') # 替换为实际数据加载代码
# 性别转换为数值
df['性别'] = df['性别'].map({'男': 0, '女': 1})
# 计算配速等特征
def calculate_pace(time1, time2):
t1 = pd.to_datetime(time1)
t2 = pd.to_datetime(time2)
delta = (t2 - t1).total_seconds()
return delta
segments = ['起点:庆典广场过线时间', 'U1:二道营过线时间', 'U2:转枝莲过线时间',
'U3:东坪过线时间', 'U4:桦林子过线时间', 'U5:多乐美地过线时间',
'U6:太舞滑雪小镇进站过线时间', 'U6:太舞滑雪小镇出站过线时间',
'U7:雪如意过线时间', 'U8:和平驿站过线时间', 'U9:翠云山顶餐厅过线时间',
'U10:云顶滑雪公园进站过线时间', 'U10:云顶滑雪公园出站过线时间',
'U11:万龙滑雪场过线时间', 'U12:诗柏•云酒店过线时间', 'U13:东梁底过线时间',
'U14:阿那亚崇礼过线时间', 'W1翠云山银河滑雪场星芒餐厅', '终点:庆典广场过线时间']
# 计算各段配速
for i in range(len(segments) - 1):
df[f'配速_{i+1}'] = df.apply(lambda row: calculate_pace(row[segments[i]], row[segments[i+1]]), axis=1)
# 总里程(假设每段距离为1公里)
df['总里程'] = len(segments) - 1
# 计算总平均配速
df['总平均配速'] = df[[f'配速_{i+1}' for i in range(len(segments) - 1)]].mean(axis=1)
# 计算总配速均差
df['总配速均差'] = df[[f'配速_{i+1}' for i in range(len(segments) - 1)]].std(axis=1)
# 计算最大配速和最小配速
df['最大配速'] = df[[f'配速_{i+1}' for i in range(len(segments) - 1)]].max(axis=1)
df['最小配速'] = df[[f'配速_{i+1}' for i in range(len(segments) - 1)]].min(axis=1)
# 计算配速均方差
df['配速均方差'] = df[[f'配速_{i+1}' for i in range(len(segments) - 1)]].apply(np.var, axis=1)
# 目标变量
df['净计时成绩'] = pd.to_datetime(df['净计时成绩'], format='%H:%M:%S').apply(lambda x: x.hour * 3600 + x.minute * 60 + x.second)
# 特征选择
features = ['年龄', '性别', '总里程', '总平均配速', '总配速均差', '最大配速', '最小配速', '配速均方差']
X = df[features]
y = df['净计时成绩']
# 数据分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 模型训练
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)
# 预测
y_pred = model.predict(X_test)
# 评估
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
- 53.
- 54.
- 55.
- 56.
- 57.
- 58.
- 59.
- 60.
- 61.
- 62.
- 63.
- 64.
- 65.
- 66.
- 67.
- 68.