import requests
import re
# 爬取数据
url = "https://movie.douban.com/chart"
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0'}
response = requests.get(url, headers=headers)
# print(response)
html_str = response.text
# 解析数据
pattern = re.compile('<a.*?nbg.*?title="(.*?)">', re.S)
items = re.findall(pattern, html_str)
# print(items)
# 存储数据
with open('douBan.txt', 'w', encoding='utf-8') as f:
for item in items:
f.write(item + '\n')
print(item)
print('done!')
Python 爬取豆瓣排行榜
最新推荐文章于 2024-09-17 23:15:58 发布