import requests
import re
import csv
url = "https://movie.douban.com/top250"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
}
resp = requests.get(url,headers = headers) #处理一个小小的反爬
# print(resp)
# print(resp.text)#打印页面源代码
s = resp.text
resp.close() #关掉resp
obj = re.compile(r'<li>.*?<span class="title">(?P<name>.*?)</span>.*?<br>(?P<year>.*?) .*?v:average">(?P<grade>.*?)</span>.*?<span>(?P<remark>.*?)人评价</span>',re.S) #re.S让.能匹配换行符
result = obj.finditer(s)
f = open("data.csv",mode="w",encoding='utf-8') #w可改为a+
csvwriter = csv.writer(f)
for i in result:
# print(i.group("name"))
# print(i.group("year").strip())
# print(i.group("grade"))
# print(i.group("remark"))
dic = i.groupdict()
dic['year'] = dic['year'].strip()
csvwriter.writerow(dic.values())
f.close()
print("over!")
re模块之豆瓣电影排行榜
最新推荐文章于 2024-07-12 16:16:27 发布