#拿到页面源代码 requests
#通过re来提取想要的有效信息
import requests
import re
url="https://movie.douban.com/top250"
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
}
resp=requests.get(url,headers=headers)
page_contect=resp.text
#解析数据
obj=re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>',re.S)
#开始匹配
result=obj.finditer(page_contect)
for it in result:
print(it.group("name"))
# print(it.group("scorce"))
# print(it.group("num"))
# print(it.group("year"))