爱篮球,爱人工智能,爱生活。
探索性的对科比数据集进行分析
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import KFold
# import data
filename= "data.csv"
raw = pd.read_csv(filename)
print (raw.shape)
raw.head()
打印结果:
# 5000 for test
kobe = raw[pd.notnull(raw['shot_made_flag'])]
print (kobe.shape)
打印结果:
(25697, 25)
#plt.subplot(211) first is raw second Column
alpha = 0.02
plt.figure(figsize=(10,10))
# loc_x and loc_y
plt.subplot(121)
plt.scatter(kobe.loc_x, kobe.loc_y, color='R', alpha=alpha)
plt.title('loc_x and loc_y')
# lat and lon
plt.subplot(122)
plt.scatter(kobe.lon, kobe.lat, color='B', alpha=alpha)
plt.title('lat and lon')
打印结果:
raw['dist'] = np.sqrt(raw['loc_x']**2 + raw['loc_y']**2)
loc_x_zero = raw['loc_x'] == 0
#print (loc_x_zero)
raw['angle'] = np.array([0]*len(raw))
raw['angle'][~loc_x_zero] = np.arctan(raw['loc_y'][~loc_x_zero] / raw['loc_x'][~loc_x_zero])
raw['angle'][loc_x_zero] = np.pi / 2
raw['remaining_time'] = raw['minutes_remaining'] * 60 + raw['seconds_remaining']
print(kobe.action_type.unique())
print(kobe.combined_shot_type.unique())
print(kobe.shot_type.unique())
print(kobe.shot_type.value_counts())
打印结果:
['Jump Shot' 'Driving Dunk Shot' 'Layup Shot' 'Running Jump Shot' 'Reverse Dunk Shot' 'Slam Dunk Shot' 'Driving Layup Shot' 'Turnaround Jump Shot' 'Reverse Layup Shot' 'Tip Shot' 'Running Hook Shot' 'Alley Oop Dunk Shot' 'Dunk Shot' 'Alley Oop Layup shot' 'Running Dunk Shot' 'Driving Finger Roll Shot'