爬取某直播平台所有正在直播的房间信息
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
class Douyu(object):
def __init__(self):
self.url="https://www.douyu.com/directory/all"
self.driver=webdriver.Chrome()
def parse_data(self):
time.sleep(3)
room_list=self.driver.find_elements(By.XPATH,'//*[@id="listAll"]/section[2]/div[2]/ul/li/div')
print(len(room_list))
data_list=[]
for room in room_list:
temp = {}
temp['title']=room.find_element(By.XPATH,'./a/div[2]/div[1]/h3').text
temp['type']=room.find_element(By.XPATH,'./a/div[2]/div[1]/span').text
temp['owner']=room.find_element(By.XPATH,'./a[1]/div[2]/div[2]/h2').text
temp['num']=room.find_element(By.XPATH,'./a[1]/div[2]/div[2]/span').text
data_list.append(temp)
return data_list
def save_data(self, data_list):
for data in data_list:
print(data)
def run(self):
self.driver.get(self.url)
while True:
data_list = self.parse_data()
self.save_data(data_list)
try:
self.driver.execute_script('scrollTo(0,1000000)')
el_next = self.driver.find_element(By.XPATH,'//*[contains(text(),"下一页")]')
el_next.click()
except:
break
if __name__ == '__main__':
dy=Douyu()
dy.run()