任务
-
使用任意方法爬取王者荣耀赛程
-
爬取如下图所示数据
完整代码
from selenium import webdriver import time class match: def __init__(self): self.time = '' # 比赛时间 self.status = '' # 比赛状态 self.place = '' # 比赛城市 self.team1 = '' # 队伍1的名字 self.team2 = '' # 队伍2的名字 self.score = '' # 比分 def print_match_info(self): print(self.time,self.status,self.place,self.team1,self.team2,self.score) def main(): match_info_list = [] with webdriver.Chrome() as driver: driver.implicitly_wait(10) # 隐式等待 driver.get("https://pvp.qq.com/match/kpl/index.shtml") # 点击2020秋季赛常规赛 match_type = driver.find_elements_by_xpath('//ul[@class="kpl_schedule_nav"]/li/a') for i,m in enumerate(match_type): m.click() time.sleep(1) # 爬取第几周 week = driver.find_elements_by_xpath('//div[@class="kpl_schedule_date clearfix"][%d]/a'%(i+1)) for j,w in enumerate(week): if j+1 >= 8 and j < len(week): driver.execute_script("arguments[0].click();", driver.find_element_by_id('dateNext')) print(w.text) # 进入下一周 driver.execute_script("arguments[0].click();", w) time.sleep(1) if i == 1 and j == 0: # 2020秋季赛季后赛第一周 for x in range(4): driver.execute_script("arguments[0].click();", driver.find_element_by_id('matchPrev')) # 向左翻 # 比赛信息 match_info = driver.find_elements_by_xpath('//*[@id="matchList"]/li') for n,info in enumerate(match_info): if (n+1) >= 3 and n < len(match_info)-1: driver.execute_script("arguments[0].click();", driver.find_element_by_id('matchNext')) # 向右翻 # 存储信息结构 match_info_struct = match() # 比赛地点 match_info_struct.place = info.find_element_by_xpath('./div[@class="match-date"]/i[1]').get_attribute('class') # 比赛时间 match_time = info.find_element_by_xpath('./div[@class="match-date"]/p[1]') match_info_struct.time = match_time.find_element_by_xpath('./i').text + ' ' + match_time.find_element_by_xpath('./span').text # 比赛状态 match_info_struct.status = info.find_element_by_xpath('./div[@class="match-date"]/p[2]').text # 队伍的名字 team = info.find_elements_by_xpath('./div[@class="match-team pr"]//p[@class="team-info"]') match_info_struct.team1,match_info_struct.team2 = team[0].text,team[1].text # 比分 score = info.find_elements_by_xpath('./div[@class="match-team pr"]//div[@class="pa match-score"]/p') match_info_struct.score = score[0].text + score[1].text + score[2].text # 保存信息 match_info_list.append(match_info_struct) # 打印信息 match_info_struct.print_match_info() # 打印信息 # for info in match_info_list: # info.print_match_info() if __name__ == __name__: main()
完成效果
PS:如有需要Python学习资料的小伙伴可以加点击下方链接自行获取
常见错误
-
链接点击不了 原因:链接被图片挡住 解决方法:使用 execute_script 来点击