利用BeautifulSoup爬取百度某贴吧首页所有的帖子主题和对应的作者名字(要求存到字典中)。
import requests
from bs4 import BeautifulSoup
url = 'https://tieba.baidu.com/f?kw=%E5%8C%97%E4%BA%AC%E4%BA%A4%E9%80%9A%E5%A4%A7%E5%AD%A6%E6%B5%B7%E6%BB%A8%E5%AD%A6%E9%99%A2&fr=index'
head = {'user-agent':'Mozilla/5.0',
'cookie':'你的cookie'
}
rw = requests.get(url=url,headers=head)
rw.encoding='utf-8'
soup = BeautifulSoup(rw.text,'lxml')
Titles = soup.find_all('a',class_="j_th_tit")
Authors = soup.find_all('span',class_="tb_icon_author")
dict ={}
for key,value in zip(Titles,Authors):
dict[key.string] = value.get("title")
# dict[key.get("title")] = value.get("title")
print(dict)
运行结果
D:\PyCharmProject\venv\Scripts\python.exe D:/PyCharm