首先恭喜中国战队LGD夺下本次利雅得大师赛冠军!
目录
数据(因为原数据的英雄都是id,这里进行连接将英雄id替换为英雄名 )
数据的爬取
数据来源于opendota,对opendota的api进行调用获取数据,想要找到利雅得的所有比赛id,那就要先找到这个比赛,首先咱们来看看opendota的api页面,网址为OpenDota API
打开到match页面会看到请求数据的api介绍以及对应的url里面的json数据
我们找一场利雅得大师赛的比赛,比如这场,比赛编号为6676393091
利用api提供的网站打开看看
一大串json数据,这样看不方便,按F12打开抓包工具在里面看
看得出这场比赛里面的数据有很多分组,对应的分组是什么意思都可以在api文档查看,其中league中的leagueid就是这次利雅得大师赛的大赛id为14391,这就是dota2不同的大赛之间的区别,然后通过leagueid获取到利雅得大师赛的所有比赛id,再进行数据爬取就行,下面我直接上代码
首先是导入需要的模块,爬取利雅得的所有比赛id,参数id就是利雅得大师赛id 14391
import os.path
import time
import pandas as pd
import requests
import threading
def pro_game(id):
pro_match_id = []
pro_url = f'https://api.opendota.com/api/leagues/{id}/matches'
text = requests.get(url=pro_url,headers=headers).json()
for item in text:
pro_match_id.append(item['match_id'])
print(pro_match_id)
return pro_match_id
获取的比赛id之后我们需要对每个比赛的数据进行爬取,在爬取之前我们需要对比赛id进行post请求,因为如果进行post的话,opendota的内置功能就会把录像进一步解析,从而我们可以获得更多数据,比如推塔时间,团战爆发的次数,这些都是直接get得不到的,必须先post请求一下
def jiexi(match_id):
post_url = f'https://api.opendota.com/api/request/{match_id}'
pos = requests.post(url=post_url, headers=headers)
match_url = f'https://api.opendota.com/api/matches/{match_id}'
match_text = requests.get(url=match_url, headers=headers).json()
return match_text
get得到每场比赛数据之后就可以选择你想要的数据进行爬取,选手数据爬取与存储如下:
本来我是想看看能不能定位选手是哪条路的,但是因为dota2打法很多样,选手每场分路也不一样,导致分路这个数据不是很准确,所以我在后面的数据分析也没用上他
def role(lr):
if lr == 1:
return '优势路'
if lr == 2:
return '中路'
if lr == 3:
return '劣势路'
根据选手所处的哪一方以及哪一方胜利来判断选手是否赢得这场比赛
选手数据以及战队数据爬取
def xuanshou(match_text):
print('开始')
for item in match_text['players']:
row_dic = {}
try:
row_dic['match_id'] = item['match_id']
row_dic['name'] = item['name']
row_dic['ID'] = item['account_id']
row_dic['英雄id'] = item['hero_id']
l_role = item['lane_role']
row_dic['位置'] = role(l_role)
row_dic['10分钟补刀'] = item['benchmarks']['lhten']['raw']
row_dic['杀人数'] = item['kills']
row_dic['死亡数'] = item['deaths']
row_dic['助攻'] = item['assists']
row_dic['反眼数'] = item['observer_kills']
row_dic['总金钱'] = item['total_gold']
row_dic['治疗量'] = item['hero_healing']
row_dic['GPM'] = item['benchmarks']['gold_per_min']['raw']
row_dic['XPM'] = item['benchmarks']['xp_per_min']['raw']
row_dic['每分钟补刀数'] = item['benchmarks']['last_hits_per_min']['raw']
row_dic['每分钟伤害'] = item['benchmarks']['hero_damage_per_min']['raw']
row_dic['总伤害'] = item['hero_damage']
row_dic['天辉方战队'] = match_text['radiant_team']['name']
row_dic['天辉杀人数'] = match_text['radiant_score']
row_dic['夜宴方战队'] = match_text['dire_team']['name']
row_dic['夜宴杀人数'] = match_text['dire_score']
place = item['isRadiant']
if place == True:
row_dic['所在方'] = '天辉'
if item['win'] == 0:
row_dic['输赢'] = '输'
if item['win'] == 1:
row_dic['输赢'] = '赢'
if place == False:
row_dic['所在方'] = '夜宴'
if item['win'] == 0:
row_dic['输赢'] = '输'
if item['win'] == 1:
row_dic['输赢'] = '赢'
except:
print('出错')
row_list.append(row_dic)
time.sleep(0.5)
def paqu(match_text):
dicq = {}
dicq['比赛id'] = match_text['match_id']
dicq['持续时间'] = match_text['duration']/60
dicq['天辉方战队'] = match_text['radiant_team']['name']
dicq['天辉杀人数'] = match_text['radiant_score']
dicq['夜宴方战队'] = match_text['dire_team']['name']
dicq['夜宴杀人数'] = match_text['dire_score']
dicq['胜利方'] = '天辉' if match_text['radiant_win'] == True else '夜宴'
dicq['团战次数'] = len(match_text['teamfights'])
for item in match_text['objectives']:
if item['type']=='building_kill' and item['key'] =='npc_dota_goodguys_tower1_mid':
dicq['天辉中路一塔'] = item['time']/60
if item['type']=='building_kill' and item['key'] =='npc_dota_goodguys_tower1_top':
dicq['天辉劣势路一塔'] = item['time']/60
if item['type']=='building_kill' and item['key'] =='npc_dota_goodguys_tower1_bot':
dicq['天辉优势路一塔'] = item['time']/60
if item['type'] == 'building_kill' and item['key'] =='npc_dota_badguys_tower1_mid':
dicq['夜宴中路1塔'] = item['time'] / 60
if item['type']=='building_kill' and item['key'] =='npc_dota_badguys_tower1_top':
dicq['夜宴优势路一塔'] = item['time']/60
if item['type']=='building_kill' and item['key'] =='npc_dota_badguys_tower1_bot':
dicq['夜宴劣势路一塔'] = item['time']/60
quanju_list.append(dicq)
print(f"{dicq['天辉方战队']}vs{dicq['夜宴方战队']}完成")
主函数里面使用双线程进行数据的爬取
if __name__ =='__main__':
# account_id = 1150728771
# m = match(account_id,100)
where = input('请输入文件存储地址:')
name = 'DOTA2利雅得.xlsx'
dizhi = os.path.join(where,name)
id = pro_game(14391)
print(id)
row_list = []
quanju_list = []
for mid in id:
shuju = jiexi(mid)
thread1 = threading.Thread(target=xuanshou,args=(shuju,))
thread2 = threading.Thread(target=paqu,args=(shuju,))
thread1.start()
thread2.start()
zd_d = pd.DataFrame(quanju_list)
r_d = pd.DataFrame(row_list)
r_d.to_excel(dizhi,sheet_name='选手数据')
with pd.ExcelWriter(dizhi,mode='a',engine='openpyxl')as writer:
zd_d.to_excel(writer,sheet_name='战队数据')
运行程序后结果大概是这样
数据分析
import pandas as pd
import matplotlib.pyplot as plt
lujing = r"E:/python文件/DOTA2利雅得.xlsx"
lujing1 = r"E:\python文件\DOTA2数据.xlsx"
shuju = pd.read_excel(lujing,sheet_name='选手数据')
shuju1 = pd.read_excel(lujing1,sheet_name='dota2英雄id')
shuju = pd.merge(shuju,shuju1,on='英雄id')
shuju.head()
数据(因为原数据的英雄都是id,这里进行连接将英雄id替换为英雄名 )
选手KDA前十名
def kda(shuju):
if shuju['死亡数'] == 0:
k = shuju['杀人数']+shuju['助攻']
return k
else:
k = (shuju['杀人数']+shuju['助攻'])/shuju['死亡数']
return k
shuju['KDA'] = shuju.apply(kda,axis=1)
kda_m = shuju.groupby(['name','战队'])['KDA'].mean()
kda_m.sort_values(ascending=False,inplace=True)
kda_m.head(10)
选手参战率,参葬率,战死率
def canzhan(shuju):
if shuju['所在方'] =='天辉':
x = (shuju['杀人数']+shuju['助攻'])/shuju['天辉杀人数']
else:
x = (shuju['杀人数']+shuju['助攻'])/shuju['夜宴杀人数']
return x
def canzang(shuju):
if shuju['所在方'] =='天辉':
y = shuju['死亡数']/shuju['夜宴杀人数']
else:
y = shuju['死亡数']/shuju['天辉杀人数']
return y
shuju['参战率'] = shuju.apply(canzhan,axis=1)
shuju['参葬率'] = shuju.apply(canzang,axis=1)
x = shuju.loc[(shuju['战队']=='PSG.LGD')&(shuju['输赢']=='输'),:]
x = shuju.groupby(['name','战队'])[['参葬率','参战率']].mean()
x['战死率'] = x['参葬率']/x['参战率']
x.sort_values(by='战死率',ascending=True).head(10)
选手场均十分钟补刀数前十,以及他们的GPM,XPM
budao = shuju.groupby(['name','战队'])[['10分钟补刀','GPM','XPM']].mean()
budao = budao.reset_index(drop=False)
budao.sort_values(by='10分钟补刀',ascending=False).head(10)
选手场均反眼数前十
fanyan = shuju.groupby(['name','战队'])['反眼数'].mean()
fanyan = fanyan.reset_index()
fy = fanyan.sort_values(by='反眼数',ascending = False)
fy.head(10)
战队场均治疗量
zhiliao = shuju.groupby(['战队'])['治疗量'].mean()
zhiliao.sort_values(ascending=False,inplace=True)
x = zhiliao.reset_index(drop=False)
x
本次比赛英雄的上场数前十以及胜率
changshu = shuju.groupby(['英雄名'])['输赢'].count()
hero_wl = shuju.groupby(['英雄名'])['输赢'].value_counts()
wl = hero_wl[:,'赢']
hero = pd.merge(changshu,wl,on='英雄名')
hero['胜率'] = hero['输赢_y']/hero['输赢_x']
hero.rename(columns={'输赢_x':'场数','输赢_y':'胜场'},inplace = True)
y =hero.sort_values(['场数','胜场'],ascending=False).head(10)
y
计算先摧毁对方优势路,中路,劣势路一塔的胜率
x.fillna(100,inplace=True)
tiaojian1 = '天辉中路一塔>夜宴中路1塔 and 胜利方=="天辉"'
tiaojian2 = '天辉中路一塔<夜宴中路1塔 and 胜利方=="夜宴"'
y1 = x.query(tiaojian1)
y2 = x.query(tiaojian2)
(len(y1)+len(y2))/len(x)
0.722
tiaojian1 = '天辉优势路一塔>夜宴优势路一塔 and 胜利方=="天辉"'
tiaojian2 = '天辉优势路一塔<夜宴优势路一塔 and 胜利方=="夜宴"'
y1 = x.query(tiaojian1)
y2 = x.query(tiaojian2)
(len(y1)+len(y2))/len(x)
0.592
tiaojian1 = '天辉劣势路一塔>夜宴劣势路一塔 and 胜利方=="天辉"'
tiaojian2 = '天辉劣势路一塔<夜宴劣势路一塔 and 胜利方=="夜宴"'
y1 = x.query(tiaojian1)
y2 = x.query(tiaojian2)
(len(y1)+len(y2))/len(x)
0.759
最后看看LGD的使用英雄以及胜率
tiaojian = "战队=='PSG.LGD'"
sec = shuju.query(tiaojian)
changshu = sec.groupby(['英雄名'])['输赢'].count()
win = sec.groupby(['英雄名'])['输赢'].value_counts()
wl = win[:,'赢']
hero = pd.merge(changshu,wl,on='英雄名')
hero['胜率'] = hero['输赢_y']/hero['输赢_x']
hero.rename(columns={'输赢_x':'场数','输赢_y':'胜场'},inplace = True)
y =hero.sort_values(['场数','胜场'],ascending=False).head(10)
y