-- coding:utf-8 --
import json
import requests
from requests.exceptions import RequestException
import re
import time
def get_one_page(url):
try:
#伪装浏览器
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36’
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
#返回爬取的html文本
return response.text
return None
except RequestException:
return None
def parse_one_page(html):
#定义正则表达式,获取需要的信息
pattern = re.compile(’
+ '.?>(. ?).?star">(. ?)