NBA20-21赛季数据分析 | Python数据分析及可视化实战

数据的一般处理

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
plt.rcParams['axes.unicode_minus'] = False
sns.set_style('darkgrid', {'font.sans-serif':['SimHei', 'Arial']})
df_player = pd.read_csv(r"C:\Users\imqqdong\Downloads\NBAdata\season_2021_detailed.csv")
df_team = pd.read_csv(r"C:\Users\imqqdong\Downloads\NBAdata\season_2021_basic.csv")
df_team.head()
dateweekdayhome_teamhome_scoreaway_teamaway_scoreattendanceovertimeremarks
02020-12-22T19:00:00TuesdayBrooklyn Nets125Golden State Warriors990NaNNaN
12020-12-22T22:00:00TuesdayLos Angeles Lakers109Los Angeles Clippers1160NaNNaN
22020-12-23T19:00:00WednesdayCleveland Cavaliers121Charlotte Hornets1140NaNNaN
32020-12-23T19:00:00WednesdayIndiana Pacers121New York Knicks1070NaNNaN
42020-12-23T19:00:00WednesdayOrlando Magic113Miami Heat1070NaNNaN
df_team_sim = df_team.drop(columns=["weekday","attendance","overtime","remarks"])
df_team_sim["winner"] = 0
df_team_sim["loser"] = 0
for i in range(len(df_team_sim)):
    if df_team_sim["home_score"][i] > df_team_sim["away_score"][i]:
        df_team_sim["winner"][i] = df_team_sim["home_team"][i]
        df_team_sim["loser"][i] = df_team_sim["away_team"][i]
    elif df_team_sim["home_score"][i] < df_team_sim["away_score"][i]:
        df_team_sim["winner"][i] = df_team_sim["away_team"][i]
        df_team_sim["loser"][i] = df_team_sim["home_team"][i]

sns.countplot(x = df_team_sim["winner"],color = "orange")
plt.xticks(rotation = 90)
plt.show() 

在这里插入图片描述

win_lose_times = pd.DataFrame(df_team_sim["winner"].value_counts())
win_lose_times["lose_times"] = df_team_sim["loser"].value_counts()
#会不会自动对齐索引
win_lose_times.head()
winnerlose_times
Phoenix Suns6325
Utah Jazz5825
Los Angeles Clippers5734
Milwaukee Bucks5731
Philadelphia 76ers5628
win_lose_times.rename(columns={"winner":"win_times"},inplace = True)
fig= plt.figure(figsize = (20,10),dpi = 100)
win_lose_times.plot(y = ["win_times","lose_times"],kind = "bar"
                    ,use_index = True,title = "2020-2021赛季NBA各队伍胜负场次",
                   grid = True,)
<AxesSubplot:title={'center':'2020-2021赛季NBA各队伍胜负场次'}>




<Figure size 2000x1000 with 0 Axes>

在这里插入图片描述

本赛季各个队伍的胜率情况

win_lose_times["wp"] = win_lose_times["win_times"]/(win_lose_times["win_times"]+win_lose_times["lose_times"])
win_lose_times.sort_values(by = "wp",ascending=False)
win_timeslose_timeswp
Phoenix Suns63250.715909
Utah Jazz58250.698795
Philadelphia 76ers56280.666667
Brooklyn Nets55290.654762
Milwaukee Bucks57310.647727
Los Angeles Clippers57340.626374
Denver Nuggets51310.621951
Atlanta Hawks51380.573034
Dallas Mavericks45340.569620
Los Angeles Lakers45340.569620
Portland Trail Blazers44340.564103
New York Knicks42350.545455
Golden State Warriors39350.527027
Miami Heat40360.526316
Memphis Grizzlies41380.518987
Boston Celtics38400.487179
Indiana Pacers35390.472973
Washington Wizards36430.455696
San Antonio Spurs33400.452055
Charlotte Hornets33400.452055
New Orleans Pelicans31410.430556
Chicago Bulls31410.430556
Sacramento Kings31410.430556
Toronto Raptors27450.375000
Minnesota Timberwolves23490.319444
Oklahoma City Thunder22500.305556
Cleveland Cavaliers22500.305556
Orlando Magic21510.291667
Detroit Pistons20520.277778
Houston Rockets17550.236111
fig = plt.figure(figsize = (30,20),dpi =100)
x = np.arange(len(win_lose_times.index.values))
width = 0.4
plt.bar(x = x - width/2,height = win_lose_times["win_times"],label = "胜利场次",align="center",width = width)
plt.bar(x = x + width/2,height = win_lose_times["lose_times"],label = "失败场次",align="center",width = width)
plt.xticks(ticks = x,rotation = 90,fontsize =25,labels = win_lose_times.index.values)
plt.yticks(fontsize = 25)
plt.legend(fontsize = 25)
plt.show()

在这里插入图片描述

df_player.head()
dateteamplayerroleMPFGFGAFG_PCTFG3FG3A...ORBDRBTRBASTSTLBLKTOVPFPTSPLUS_MINUS
02020-12-22T19:00:00Brooklyn NetsTyler JohnsonReserve5.083333010.00000...101010000-12.0
12020-12-22T19:00:00Brooklyn NetsKyrie IrvingStarter25.41666710160.62547...134400132632.0
22020-12-22T19:00:00Brooklyn NetsKevin DurantStarter24.4000007160.43812...145331132226.0
32020-12-22T19:00:00Brooklyn NetsJoe HarrisStarter20.333333480.50025...257201221021.0
42020-12-22T19:00:00Brooklyn NetsSpencer DinwiddieStarter19.316667260.33313...05540031924.0

5 rows × 24 columns

#看看詹姆斯这个赛季的数据
df_james = df_player[df_player["player"]=="LeBron James"]
df_james.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 52 entries, 50 to 24083
Data columns (total 24 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        52 non-null     object 
 1   team        52 non-null     object 
 2   player      52 non-null     object 
 3   role        52 non-null     object 
 4   MP          52 non-null     float64
 5   FG          52 non-null     int64  
 6   FGA         52 non-null     int64  
 7   FG_PCT      52 non-null     float64
 8   FG3         52 non-null     int64  
 9   FG3A        52 non-null     int64  
 10  FG3_PCT     52 non-null     float64
 11  FT          52 non-null     int64  
 12  FTA         52 non-null     int64  
 13  FT_PCT      51 non-null     float64
 14  ORB         52 non-null     int64  
 15  DRB         52 non-null     int64  
 16  TRB         52 non-null     int64  
 17  AST         52 non-null     int64  
 18  STL         52 non-null     int64  
 19  BLK         52 non-null     int64  
 20  TOV         52 non-null     int64  
 21  PF          52 non-null     int64  
 22  PTS         52 non-null     int64  
 23  PLUS_MINUS  52 non-null     float64
dtypes: float64(5), int64(15), object(4)
memory usage: 10.2+ KB
#处理一下空值
pd.set_option("display.max_columns",None)
df_james.loc[df_james["FT_PCT"].isnull() == True,:]
dateteamplayerroleMPFGFGAFG_PCTFG3FG3AFG3_PCTFTFTAFT_PCTORBDRBTRBASTSTLBLKTOVPFPTSPLUS_MINUS
239392021-06-01T22:00:00Los Angeles LakersLeBron JamesStarter31.5166679190.4746100.600NaN1457003124-24.0
df_james.fillna(value = 0,inplace = True)
df_james.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 52 entries, 50 to 24083
Data columns (total 24 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   date        52 non-null     object 
 1   team        52 non-null     object 
 2   player      52 non-null     object 
 3   role        52 non-null     object 
 4   MP          52 non-null     float64
 5   FG          52 non-null     int64  
 6   FGA         52 non-null     int64  
 7   FG_PCT      52 non-null     float64
 8   FG3         52 non-null     int64  
 9   FG3A        52 non-null     int64  
 10  FG3_PCT     52 non-null     float64
 11  FT          52 non-null     int64  
 12  FTA         52 non-null     int64  
 13  FT_PCT      52 non-null     float64
 14  ORB         52 non-null     int64  
 15  DRB         52 non-null     int64  
 16  TRB         52 non-null     int64  
 17  AST         52 non-null     int64  
 18  STL         52 non-null     int64  
 19  BLK         52 non-null     int64  
 20  TOV         52 non-null     int64  
 21  PF          52 non-null     int64  
 22  PTS         52 non-null     int64  
 23  PLUS_MINUS  52 non-null     float64
dtypes: float64(5), int64(15), object(4)
memory usage: 10.2+ KB

詹姆斯本赛季的平均出场时间

#詹姆斯本赛季场均出场时间33.94分钟
fig = plt.figure(figsize=(20,10),dpi= 100)
x = np.arange(len(df_james))
mp_mean = df_james["MP"].mean()
df_james["MP"].plot(kind = "bar")
plt.axhline(mp_mean)
plt.xticks(ticks=x,labels = df_james["date"])
plt.yticks(fontsize=15)
plt.text(x = 0,y = mp_mean+2,s = "Mean:%.2f"%mp_mean,fontsize = 20,)
plt.show()

在这里插入图片描述

现在看一下詹姆斯在场上的时候,湖人的胜率

#詹姆斯上场时,湖人的胜率
'''
这块的计算错了,也放在上面给大家看一下
主要原因是dataframe与字符串的比较
#df_james.date.values[i]
def LA_WP(dates):
'''
    '''
    。。。
    '''
    win_times=0
    lose_times=0
    for date in dates:
        #win_times = 0
        #lose_times = 0  
        winner = df_team_sim[df_team_sim["date"]==date].winner.values
        if winner.all() == "Los Angeles Lakers":
            win_times += 1
        else :
            lose_times+=1
    return ("胜利场次:%d,失败场次:%d,胜率:%.2f"%(win_times,lose_times,win_times/(win_times+lose_times)))

LA_WP(df_james.date.values)
'胜利场次:23,失败场次:29,胜率:0.44'

计算詹姆斯不在场时,湖人的胜率

先不管上面的错误,计算詹姆斯不在场时,湖人的胜率

#詹姆斯不在场时,湖人的胜率
#找出詹姆斯不在场的比赛的时间,用isin取反即可
df_nojames = df_team_sim[~df_team_sim["date"].isin(df_james["date"])]
df_nojames = df_nojames[(df_nojames["home_team"]=="Los Angeles Lakers" )|( df_nojames["away_team"]=="Los Angeles Lakers")]
len(df_nojames)
27
win_games = df_nojames[df_nojames["winner"] == "Los Angeles Lakers"]
lose_games = df_nojames[df_nojames["loser"]=="Los Angeles Lakers"]
wp_nojames = len(win_games)/(len(win_games)+len(lose_games))
wp_nojames
0.4444444444444444
lose_games
datehome_teamhome_scoreaway_teamaway_scorewinnerloser
5232021-03-03T22:00:00Sacramento Kings123Los Angeles Lakers120Sacramento KingsLos Angeles Lakers
6252021-03-21T22:00:00Phoenix Suns111Los Angeles Lakers94Phoenix SunsLos Angeles Lakers
6352021-03-23T19:30:00New Orleans Pelicans128Los Angeles Lakers111New Orleans PelicansLos Angeles Lakers
6552021-03-25T22:00:00Los Angeles Lakers101Philadelphia 76ers109Philadelphia 76ersLos Angeles Lakers
7042021-03-31T22:00:00Los Angeles Lakers97Milwaukee Bucks112Milwaukee BucksLos Angeles Lakers
7312021-04-04T15:30:00Los Angeles Clippers104Los Angeles Lakers86Los Angeles ClippersLos Angeles Lakers
7612021-04-08T19:30:00Miami Heat110Los Angeles Lakers104Miami HeatLos Angeles Lakers
7952021-04-12T19:30:00New York Knicks111Los Angeles Lakers96New York KnicksLos Angeles Lakers
8242021-04-15T22:00:00Los Angeles Lakers113Boston Celtics121Boston CelticsLos Angeles Lakers
8582021-04-19T22:00:00Los Angeles Lakers97Utah Jazz111Utah JazzLos Angeles Lakers
8812021-04-22T21:30:00Dallas Mavericks115Los Angeles Lakers110Dallas MavericksLos Angeles Lakers
8942021-04-24T20:30:00Dallas Mavericks108Los Angeles Lakers93Dallas MavericksLos Angeles Lakers
9232021-04-28T19:30:00Washington Wizards116Los Angeles Lakers107Washington WizardsLos Angeles Lakers
9932021-05-06T22:00:00Los Angeles Clippers118Los Angeles Lakers94Los Angeles ClippersLos Angeles Lakers
10022021-05-07T22:00:00Portland Trail Blazers106Los Angeles Lakers101Portland Trail BlazersLos Angeles Lakers
#根据时间和队名的匹配,找出詹姆斯在场的每一个比赛,组合到一个dataframe中
data = pd.DataFrame()
for i in range(len(df_james)):
    james_game = df_team_sim[(df_team_sim["date"]==df_james["date"].values[i])
                               &((df_team_sim["winner"]=="Los Angeles Lakers")|(df_team_sim["loser"]=="Los Angeles Lakers"))]
    data = pd.concat([data,james_game])
#詹姆斯在场上的时候胜率wp_james
james_win = len(data[data["winner"]=="Los Angeles Lakers"])
james_lose = len(data[data["loser"]=="Los Angeles Lakers"])
wp_james = james_win/(james_win+james_lose)
wp_james
0.6346153846153846

不在场时胜率40%,在场时胜率60%,可见詹姆斯的对球队胜率的影响还是很大的。后续将继续关注其与戴维斯合作时的相关数据,将更注重数据的可视化展现…
后续更新…

  • 7
    点赞
  • 30
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值