coding=utf-8
“”"
author:lei
function: 虎牙直播爬取
“”"
import time
from selenium import webdriver
class HuYa(object):
def init(self):
self.url = “https://www.huya.com/l”
options = webdriver.ChromeOptions()
options.binary_location = r"D:\文件2\Google\Chrome\Application\chrome.exe"
self.driver = webdriver.Chrome(r"D:\文件\软件\chromedriver_win32\chromedriver.exe", options=options)
def parse_data(self):
time.sleep(1)
room_list = self.driver.find_elements_by_xpath("/html/body/div[2]/div/div/div[4]/ul/li")
print(len(room_list))
temp_list = []
for room in room_list:
temp = {}
temp["title"] = room.find_element_by_xpath("./a[2]").text
temp["url"] = room.find_element_by_xpath("./a[2]").get_attribute("href")
temp["owner"] = room.find_element_by_xpath("./span[1]/span[1]/i").text
temp["type"] = room.find_element_by_xpath("./span[1]/span[2]/a").text
temp["num"] = room.find_element_by_xpath("./span[1]/span[3]/i[2]").text
temp_list.append(temp)
return temp_list
def save_data(self, temp_list):
for temp in temp_list:
print(temp)
def run(self):
self.driver.get(self.url)
while True:
temp_list = self.parse_data()
self.save_data(temp_list)
try:
el_next = self.driver.find_element_by_xpath("//a[@class='laypage_next']")
self.driver.execute_script("scrollTo(0, 100000)")
el_next.click()
except:
print("结束!")
break
if name == ‘main’:
huya = HuYa()
huya.run()