对代码有疑问的可评论留言
# -*- coding: utf-8 -*-
import json
import re
import requests
import csv
if __name__ == '__main__':
#UA伪装:将对应的User-Agent封装到一个字典中
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
#批量获取不同企业的id值
url='https://movie.douban.com/top250?start=0'
#参数的封装
resp = requests.get(url,headers=headers)
page_content = resp.text
obj = re.compile('<li>.*?<span class="title">(?P<name>.*?)</span>.*?<p class="">.*?<br>(?P<year>.*?) .*?<span class="rating_num" property="v:average">(?P<score>.*?)</span>.*?<span>(?P<num>.*?)人评价</span>',re.S)
result = obj.finditer(page_content)
f=open("data.csv",mode="w")
csvwriter =csv.writer(f)
for it in result:
print("电影名称:--------")
print(it.group("name"))
print("电影出产年份:--------")
print(it.group("year").strip())
print("电影评分是:--------")
print(it.group("score"))
print("该电影有"+it.group("num")+"人评价")
#将数据写进csv文件
dic=it.groupdict()
dic['year'] =dic['year'].strip()
csvwriter.writerow(dic.values())
f.close()
print("over")