#import pandas as pd#import numpy as np## #加载数据集#data_filename = "NBA15_16_dataset/basketball.csv"#dataset = pd.read_csv(data_filename,encoding="utf-8")##清洗数据##1#dataset = pd.read_csv(data_filename,parse_dates=["Date"])##2#dataset.columns = ["Date", "Start(ET)","Visitor Team", "VisitorPts", "Home Team", "HomePts", "OT?", "Score Type","Attend.", "Notes"]##抽取新的特征#dataset["HomeWin"] = dataset["VisitorPts"] < dataset["HomePts"]## dataset.head()#y_true = dataset["HomeWin"].values#dataset["HomeWin"].mean()##构造新属性 需要预测的两只球队在各自的上场比赛中胜负情况#from collections import defaultdict
#won_last = defaultdict(int)#dataset["HomeLastWin"] = 0#dataset["VisitorLastWin"] = 0#for index, row in dataset.iterrows():
#home_team = row["Home Team"]#visitor_team = row["Visitor Team"]#row["HomeLastWin"] = won_last[home_team]
#dataset.set_value(index, "HomeLastWin", won_last[home_team])#dataset.set_value(index, "VisitorLastWin", won_last[visitor_team])#won_last[home_team] = int(row["HomeWin"])#won_last[visitor_team] = 1 - int(row["HomeWin"])
##决策树进行预测#from sklearn.tree import DecisionTreeClassifier#from sklearn.cross_validation import cross_val_score#import numpy as np
#clf = DecisionTreeClassifier(random_state=14)#x_previouswins = dataset[["HomeLastWin", "VisitorLastWin"]].values
#scores = cross_val_score(clf, x_previouswins, y_true, scoring="accuracy")#print(scores)#print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))##新建特征 排名#standings_filename = "NBA15_16_dataset/standings.csv"#standings = pd.read_csv(standings_filename, skiprows=0, encoding="utf-8")#standings.head()#dataset["HomeTeamRanksHigher"] = 0#for index, row in dataset.iterrows():#home_team = row["Home Team"]#visitor_team = row["Visitor Team"]#home_rank = standings[standings["Team"] == home_team]["Rk"].values[0]#visitor_rank = standings[standings["Team"] == visitor_team]["Rk"].values[0]#dataset.set_value(index, "HomeTeamRanksHigher",int(home_rank < visitor_rank))#X_homehigher = dataset[["HomeTeamRanksHigher","HomeLastWin", "VisitorLastWin",]].values
#clf = DecisionTreeClassifier(random_state=14)#scores = cross_val_score(clf, X_homehigher, y_true, scoring="accuracy")#print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))#dataset["HomeTeamRanksHigher"] = 0#for index, row in dataset.iterrows():#home_team = row["Home Team"]#visitor_team = row["Visitor Team"]#home_rank = standings[standings["Team"] == home_team]["Rk"].values[0]#visitor_rank = standings[standings["Team"] == visitor_team]["Rk"].values[0]#dataset.set_value(index, "HomeTeamRanksHigher",int(home_rank < visitor_rank))#X_homehigher = dataset[["HomeTeamRanksHigher","HomeLastWin", "VisitorLastWin",]].values
#clf = DecisionTreeClassifier(random_state=14)#scores = cross_val_score(clf, X_homehigher, y_true, scoring="accuracy")#print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))