没错,就是之前学过的匹配字符串的正则表达式,先导入re模块,这里只是简单介绍怎么使用,具体详细的re模块的应用和概念可以翻阅之前关于字符串的正则表达式的博客---->从入门到入狱-------re模块
import requests
import re
def get_data():
url = 'https://movie.douban.com/top250'
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36"
}
response = requests.get(url, headers=header)
if response.status_code == 200:
return response.text
else:
print('请求失败')
def jiexi_data(data):
# 提取电影名称 (?s) 匹配多行
re_ = r'(?s)<li>.+?<span class="title">(.+?)</span>.+?<span class="rating_num" property="v:average">(.+?)</span>.+?<span>(.+?)</span>.+?</li>'
result = re.findall(re_, data)
print(result)
jiexi_data(get_data())