#coding utf-8 #mojun import re import requests url='https://maoyan.com/board' prox_list={ 'http':'http://171.38.37.239:8123', 'https':'https://221.224.136.211:35101', } headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' } response = requests.request("get",url,proxies=prox_list,headers=headers) # response=requests.get(url) #试下get方法行不行 # print(response.text) base_list=re.compile(r'<dd>([\w\W]*?)</dd>') all=base_list.findall(response.text) # print(len(all)) for dd in all: ##电影名称 movie_title=re.compile(r'<a.*?>(.*?)</a>') title=movie_title.findall(dd) print(title[0].strip()) ##电影主演 movie_start=re.compile(r'<p class="star">([\w\W]*?)</p>') start=movie_start.findall(dd) print(start[0].strip()) ##电影上演时间 movie_time = re.compile(r'<p class="releasetime">([\w\W]*?)</p>') time = movie_time.findall(dd) print(time[0].strip()) ##电影排名 movie_mingci = re.compile(r'<i class="board-index board-index-\d+">([\w\W]*?)</i>') mingci = movie_mingci.findall(dd) print(mingci[0].strip()) ##电影评分 movie_integer = re.compile(r'<i class="integer">([\w\W]*?)</i>') a = movie_integer.findall(dd) movie_fraction = re.compile(r'<i class="fraction">([\w\W]*?)</i>') b = movie_fraction.findall(dd) c=a[0].strip()+b[0].strip() print(c) ##电影缩略图 movie_tu = re.compile(r'<img data-src="(.*?)"') tu = movie_tu.findall(dd) print(tu[0].strip()) #tu[0].strip() 这里的如何有空的酒不行了,最好做个判断!