爬取dy平台主播名字,在线人数
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import time
driver = webdriver.PhantomJS()
driver.get("https://www.douyu.com/directory/all")
#while True:
i = 1
while True:
#html源码
html = driver.page_source
# 创建解析对象
soup = bs(html,"lxml")
# 直接调用方法查找元素
# 存放所有主播的元素对象
names = soup.find_all("span",{"class":"dy-name ellipsis fl"})
# 存放人气的元素对象
nums = soup.find_all("span",{"class":"dy-num fr"})
# name和number 是一个对象,get_text()
for name,number in zip(names,nums):
print("\t观众人数:",number.get_text().strip(),end="")
print("\t主播名字:",name.get_text().strip())
page = driver.find_element_by_class_name("jumptxt").send_keys(i)
if driver.page_source.find("shark-pager-disable-next") == -1:
driver.find_element_by_class_name("shark-pager-submit").click()
time.sleep(5)
i+=1
# driver.find_element_by_class_name("shark-pager-next").click()
else:
break