#!/usr/bin/env python
# coding=utf-8
from bs4 import BeautifulSoup
import unittest
from selenium import webdriver
import time
class Douyu(unittest.TestCase):
# 初始化方法,必须是setUp
def setUp(self):
self.driver = webdriver.Chrome()
self.num = 0
self.count = 0
self.list = []
# 具体的测试用例方法,一定要用test开头
def testDouyu(self):
self.driver.get("https://www.douyu.com/directory/all")
while True:
soup = BeautifulSoup(self.driver.page_source, 'lxml')
# 返回页面的房间标题列表和观众人数
titles = soup.find_all('h3', {'class': 'ellipsis'})
nums = soup.find_all('span', {'class': 'dy-num fr'})
# 使用zip()函数把列表合并,并创建一个元组对的列表[(1,2),(3,4)...]
for title, num in zip(titles, nums):
print(u"观众人数:" + num.get_text().strip(), u"\t房间标题:" + title.get_text().strip())
self.num += 1
i = num.get_text().strip()
# print(type(i)) # str类型
if i[-1] =='万':
j = float(i.replace('万',''))*10000
self.list.append(int(j))
else:
self.list.append(int(i))
# 如果点击到最后找到了“shark-pager-disable-next”不会返回-1,终止循环,page_source.find()未找到内容则返回-1
if self.driver.page_source.find("shark-pager-disable-next") != -1:
break
# 模拟点击下一页
self.driver.find_element_by_class_name("shark-pager-next").click()
time.sleep(4)
def tearDown(self):
# 退出
print("加载完成。。。")
# print(self.list)
# print(len(self.list))
for x in self.list:
# print(type(x))
# print(x)
self.count += x
print('总直播人数:'+str((self.num)))
# print('总计%s人观看'%self.count)
if self.count%10000%1000 != 0:
if self.count%10000//1000!=0:
print('合计%s人观看' % (str((self.count)//10000) + '万' + str(self.count%10000//1000) +'千' + str(self.count % 10000 % 1000)))
else:
print('合计%s人观看' % (str((self.count)//10000)+ '万' + str(self.count % 10000 % 1000)))
else:
print('合计%s人观看'%(str((self.count)//10000) + '万' + str(self.count%10000//1000)+'千'))
self.driver.quit()
if __name__ == "__main__":
unittest.main()