coding=utf-8
“”"
author:lei
function:斗鱼网站的爬取
“”"
import time
from selenium import webdriver
class DouYu(object):
def __init__(self):
self.url = "https://www.douyu.com/directory/all"
options = webdriver.ChromeOptions()
options.binary_location = r"D:\文件2\Google\Chrome\Application\chrome.exe"
self.driver = webdriver.Chrome(r"D:\文件\软件\chromedriver_win32\chromedriver.exe", options=options)
def parse_data(self):
time.sleep(2)
room_list = self.driver.find_elements_by_xpath("//*[@id='listAll']/section[2]/div[2]/ul/li/div")
print(len(room_list))
data_list = []
# 遍历房间列表,从每一个房间节点中获取数据
for room in room_list:
temp = {}
temp["title"] = room.find_element_by_xpath("./a[1]/div[2]/div[1]/h3").text
temp["type"] = room.find_element_by_xpath("./a[1]/div[2]/div[1]/span").text
temp["owner"] = room.find_element_by_xpath("./a[1]/div[2]/div[2]/h2").text
temp["num"] = room.find_element_by_xpath("./a[1]/div[2]/div[2]/span").text
temp["img"] = room.find_element_by_xpath("./a/div[1]/div[1]/img").get_attribute("src")
data_list.append(temp)
return data_list
def save_data(self, data_list):
for data in data_list:
print(data)
def run(self):
# url
# driver
# get
self.driver.get(self.url)
while True:
# parse
data_list = self.parse_data()
# save
self.save_data(data_list)
# next
try:
el_next = self.driver.find_element_by_xpath("//li[@class=' dy-Pagination-next']/span")
self.driver.execute_script("scrollTo(0, 100000)")
el_next.click()
except:
break
if name == ‘main’:
douyu = DouYu()
douyu.run()