python大赛对名_数据挖掘 消除队名的影响

#import pandas as pd#import numpy as np## #加载数据集#data_filename = "NBA15_16_dataset/basketball.csv"#dataset = pd.read_csv(data_filename,encoding="utf-8")##清洗数据##1#dataset = pd.read_csv(data_filename,parse_dates=["Date"])##2#dataset.columns = ["Date", "Start(ET)","Visitor Team", "VisitorPts", "Home Team", "HomePts", "OT?", "Score Type","Attend.", "Notes"]##抽取新的特征#dataset["HomeWin"] = dataset["VisitorPts"] < dataset["HomePts"]## dataset.head()#y_true = dataset["HomeWin"].values#dataset["HomeWin"].mean()##构造新属性 需要预测的两只球队在各自的上场比赛中胜负情况#from collections import defaultdict

#won_last = defaultdict(int)#dataset["HomeLastWin"] = 0#dataset["VisitorLastWin"] = 0#for index, row in dataset.iterrows():

#home_team = row["Home Team"]#visitor_team = row["Visitor Team"]#row["HomeLastWin"] = won_last[home_team]

#dataset.set_value(index, "HomeLastWin", won_last[home_team])#dataset.set_value(index, "VisitorLastWin", won_last[visitor_team])#won_last[home_team] = int(row["HomeWin"])#won_last[visitor_team] = 1 - int(row["HomeWin"])

##决策树进行预测#from sklearn.tree import DecisionTreeClassifier#from sklearn.cross_validation import cross_val_score#import numpy as np

#clf = DecisionTreeClassifier(random_state=14)#x_previouswins = dataset[["HomeLastWin", "VisitorLastWin"]].values

#scores = cross_val_score(clf, x_previouswins, y_true, scoring="accuracy")#print(scores)#print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))##新建特征 排名#standings_filename = "NBA15_16_dataset/standings.csv"#standings = pd.read_csv(standings_filename, skiprows=0, encoding="utf-8")#standings.head()#dataset["HomeTeamRanksHigher"] = 0#for index, row in dataset.iterrows():#home_team = row["Home Team"]#visitor_team = row["Visitor Team"]#home_rank = standings[standings["Team"] == home_team]["Rk"].values[0]#visitor_rank = standings[standings["Team"] == visitor_team]["Rk"].values[0]#dataset.set_value(index, "HomeTeamRanksHigher",int(home_rank < visitor_rank))#X_homehigher = dataset[["HomeTeamRanksHigher","HomeLastWin", "VisitorLastWin",]].values

#clf = DecisionTreeClassifier(random_state=14)#scores = cross_val_score(clf, X_homehigher, y_true, scoring="accuracy")#print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))#dataset["HomeTeamRanksHigher"] = 0#for index, row in dataset.iterrows():#home_team = row["Home Team"]#visitor_team = row["Visitor Team"]#home_rank = standings[standings["Team"] == home_team]["Rk"].values[0]#visitor_rank = standings[standings["Team"] == visitor_team]["Rk"].values[0]#dataset.set_value(index, "HomeTeamRanksHigher",int(home_rank < visitor_rank))#X_homehigher = dataset[["HomeTeamRanksHigher","HomeLastWin", "VisitorLastWin",]].values

#clf = DecisionTreeClassifier(random_state=14)#scores = cross_val_score(clf, X_homehigher, y_true, scoring="accuracy")#print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值