from time import sleep
import requests
from bs4 import BeautifulSoup
# 爬取分类页面数据
#获取斗鱼分类页面数据
def get_directory():
#获取网页
url = 'https://www.douyu.com/directory'
html = requests.get(url).text
# print(html)
#解析网页
soup = BeautifulSoup(html, 'lxml')
unit_list = soup.find_all('li', class_='unit')
# print(unit_list)
#抓取想要的数据
results = []
for unit in unit_list:
href = 'https://www.douyu.com' + unit.find('a')['href']
# print(href)
images = unit.find('img')['data-original']
# print(images)
title = unit.find('p').text
# print(title)
#生成字典
dict1 = {
'href': href,
'image': images,
'title': title
}
sleep(2)
results.append(dict1)
return results
def main():
results = get_directory()
for result in results:
href = result['href']
directory_image = result['image']
directory_title = result['title']
detail_list = get_zhibo(href, directory_title, directory_image)
if __name__ == '__main__':
m