import re
import requests
url = 'https://www.maoyan.com/board/4'
headers = {
'User-Agent': 'Mozilla/4.0(compatible;MSIE8.0;WindowsNT6.0;Trident/4.0)'
}
resp = requests.get(url, headers=headers)
if resp.status_code == 200:
# print(resp.text)
html = resp.text
# pattern = re.compile('<a\shref="(.*?)">(.*?)</a>') # 获取a标签内的内容
# pattern = re.compile('<i\sclass="board-index.*?">(.*?)</i>') # 获得序列
# pattern = re.compile('<img.*?class.*?src="(.*?)">') # 获取照片
# pattern = re.compile('<a.*?boarditem-click.*?>(.*?)</a>') # 获得名字
# pattern = re.compile('<p.*?releasetime.*?>(.*?)</p>') # 获取上映时间
pattern = re.compile('<p.*?star.*?>(.*?)</p>') # 获取主演
# pattern = re.compile('.*?integer.*?>(.*?)</i>') # 获取评分的整数位
# pattern = re.compile('.*?fraction.*?>(.*?)</i>') # 获取评分的小数位
items = re.findall(pattern, html)
print(items)