NBA结果预测

数据处理

import csv

'''处理18-19赛季result'''
month = [1, 2, 3, 4, 5, 6, 10, 11, 12]
with open("2018-2019_result.csv", "w", newline='') as file_write:
    file_writer = csv.writer(file_write)
    header = ['WTeam', 'LTeam', 'WLoc']
    file_writer.writerow(header)
    for i in month:
        with open(r'2018-2019_{}_result.csv'.format(i), 'r') as file_read:
            file_reader = csv.reader(file_read)
            for row in list(file_reader)[1:]:
                score_v = int(row[3])
                score_h = int(row[5])
                v_team = row[2]
                h_team = row[4]
                if score_v > score_h:
                    w_team = v_team
                    l_team = h_team
                    wloc = 'V'
                else:
                    w_team = h_team
                    l_team = v_team
                    wloc = 'H'
                writing = [w_team, l_team, wloc]
                file_writer.writerow(writing)


'''提取19-20赛季安排'''
with open("2019-2020_schedule_result.csv", 'r') as file_read:
    with open('2019-2020_schedule.csv', 'w', newline='') as file_write:
        file_reader = csv.reader(file_read)
        file_writer = csv.writer(file_write)
        header = ['Vteam', 'Hteam']
        file_writer.writerow(header)
        for row in list(file_reader)[1:]:
            v_team = row[2]
            h_team = row[4]
            writing = [v_team, h_team]
            file_writer.writerow(writing)

预测结果

import pandas as pd
import math
import matplotlib.pyplot as plt
import csv
import random
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import learning_curve
from sklearn.model_selection import cross_val_score

#删去csv文件中队名后带的*号
def del_star(data):
    data['Team']=data['Team'].map(lambda x:x.strip('*'))

#移除不需要的数据,如比赛场数,比赛地点,比赛时间
def init_data(m_stats,o_stats,p_stats):
    nm_stats = m_stats.drop(['Rk', 'Arena'], axis=1)
    no_stats = o_stats.drop(['Rk', 'G', 'MP'], axis=1)
    nt_stats = p_stats.drop(['Rk', 'G', 'MP'], axis=1)
    del_star(nm_stats)
    del_star(no_stats)
    del_star(nt_stats)
    team_stats1 = pd.merge(nm_stats,no_stats,how='left',on='Team')
    team_stats1 = pd.merge(team_stats1,nt_stats,how='left',on='Team')
    return team_stats1.set_index('Team')

#使用**ELO**评分机制来判定球队的等级
start_elo = 1600
team_elo = {}
team_stats = {}
def get_elo(team):
    if team not in team_elo: # 队伍elo积分初始化
        team_elo[team]=start_elo
    return team_elo[team]

def elo(w_team,l_team):
    winner_rank=get_elo(w_team) #胜者积分
    loser_rank=get_elo(l_team)  #败者积分
    rank_diff=winner_rank-loser_rank
    e=(-rank_diff)/400
    pd=1/(1+math.pow(10,e))
    if winner_rank<2100:
        k=32
    elif 2100<=winner_rank<2400:
        k=24
    else:
        k=16
    new_winner_rank=round(winner_rank + k * (1-pd))
    new_loser_rank=round(loser_rank + k *(-pd))
    return  new_winner_rank,new_loser_rank

#处理整合所有数据,对于主场作战队伍,elo分数加100
def process_all_data(result_data,team_data):
    x=[]    # 训练集
    y=[]    # 标记向量
    for index,row in result_data.iterrows():
        w_team=row['WTeam']
        l_team=row['LTeam']
        w_team_elo=get_elo(w_team)
        l_team_elo=get_elo(l_team)
        if row['WLoc']=='H':
            w_team_elo=w_team_elo+100
        else:
            l_team_elo=l_team_elo+100
        #以胜利方和失败方的elo作为特征向量
        features1=[w_team_elo]+list(team_data.loc[w_team][:])
        features2=[l_team_elo]+list(team_data.loc[l_team][:])
        if random.random()>0.5:
            x.append(features1+features2)
            y.append(0)
        else:
            x.append(features2+features1)
            y.append(1)
        n_w_team_elo,n_l_team_elo=elo(w_team,l_team)
        team_elo[w_team]=n_w_team_elo
        team_elo[l_team]=n_l_team_elo
    return np.nan_to_num(x),y

#获取预测队伍的特征值
def predict(v_team,h_team,model,team_data):
    features=[get_elo(v_team)]+list(team_data.loc[v_team][:])+[get_elo(h_team)+100]+list(team_data.loc[h_team][:])
    features=[np.nan_to_num(features)]
    return model.predict_proba(features)

#绘制学习曲线
def plot_learning_curve(estimator, title, x, y, ylim=None, cv=None,
                        train_sizes=np.linspace(.1, 1.0, 5)):
    plt.figure()
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, x, y, cv=10, n_jobs=1, train_sizes=train_sizes)
    train_scores_mean=np.mean(train_scores,axis=1)
    train_scores_std = np.std(train_scores,axis=1)
    test_scores_mean=np.mean(test_scores,axis=1)
    test_scores_std = np.std(test_scores,axis=1)
    plt.fill_between(train_sizes,test_scores_mean-test_scores_std,
                     test_scores_mean+test_scores_std,alpha=0.1,color="r"
                     )
    plt.fill_between(train_sizes,train_scores_mean-train_scores_std,
                     train_scores_mean+train_scores_std,alpha=0.1,
                     color="g"
                     )
    plt.plot(train_sizes,train_scores_mean,'o-',color="r",
             label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
             label="Cross-validation score")
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    plt.legend(loc="best")
    plt.grid("on")
    if ylim:
        plt.ylim(ylim)
        plt.title(title)
        plt.show()

def main():
    #读取数据
    m_stats = pd.read_csv('2018-2019Miscellaneous Stats.csv').dropna(axis=1,how='all')
    o_stats = pd.read_csv('2018-2019Opponent Per Game Stats.csv')
    p_stats = pd.read_csv('2018-2019Team Per Game Stats.csv')
    team_data = init_data(m_stats,o_stats,p_stats)
    result_data=pd.read_csv('2018-2019_result.csv')
    x,y=process_all_data(result_data,team_data)
    #使用逻辑回归模型
    model=linear_model.LogisticRegression()
    model.fit(x,y)
    #开始训练
    schedule = pd.read_csv('2019-2020_schedule.csv')
    result=[]
    print("准确率为:",cross_val_score(model, x, y, cv=10, scoring='accuracy', n_jobs=-1).mean())
    for index,row in schedule.iterrows():
        v_team=row['Vteam']
        h_team=row['Hteam']
        rate=predict(v_team,h_team,model,team_data)
        pred=0
        if rate[0][0]>0.5:
            w_team=v_team
            l_team=h_team
            pred=round(rate[0][0],3)
        else:
            w_team=h_team
            l_team=v_team
            pred=round(rate[0][1],3)
        result.append([w_team,l_team,pred])
    header=['WTeam','LTeam','Possibility']
    with open('2019-2020_pre_result.csv','w',newline='') as f:
        writer=csv.writer(f)
        writer.writerow(header)
        writer.writerows(result)
    plot_learning_curve(model,'learning curve',x,y,(0.5,1),10)

if __name__=="__main__":
    main()

  • 7
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值