学习python爬虫,看到爬取斗鱼的房间名,观众数(python3.7 2021年7月28日15:53:20)
# -*- coding=utf-8 -*-
import unittest
from selenium import webdriver
from bs4 import BeautifulSoup as bs
class Tencent(unittest.TestCase):
def setUp(self): # 初始化方法
self.driver = webdriver.Chrome(executable_path="D:\Python_module\chromdriver\chromedriver.exe")
self.driver.maximize_window()
def testTencent(self): # 测试方法必须有 test 字样开头
# self.driver.get("https://careers.tencent.com/search.html?query=co_1&sc=1")
self.driver.get("https://www.douyu.com/directory/all")
while True:
soup = bs(self.driver.page_source, "lxml")
names = soup.find_all("h3", {"class", "DyListCover-intro"}) # 房间名,返回列表
numbers = soup.find_all("span", {"class", "DyListCover-hot"}) # 观众人数,返回列表
# zip(names, numbers) 将 name, number,2个列表合并成一个元组: [(1, 2),(3, 4)...]
for name, number in zip(names, numbers):
print("房间名: " + name.get_text() + " , 观众人数: " + number.get_text())
if self.driver.page_source.find("dy-Pagination-disabled") != -1:
break
self.driver.find_element_by_class_name("dy-Pagination-next").click() # 点击下一页
def tearDown(self):
self.driver.quit() # 退出 driver.Chrome()
if __name__ == "__main__":
unittest.main() # 启动测试模块
测试结果: