思路和上文大同小异。
import requests
from requests.exceptions import RequestException
import re
import json
headers = {
'User-Agent':'Mozilla/5.0(Macintosh;Intel Mac OS X 10_11_4)AppleWebKit/537.36(KHTML,like Gecko)Chrome/52.0.2743.116 Safari/537.36'}
#提取单页内容,用try,except方便找bug
def get_one_page(url):
try:
response = requests.get(url, headers=headers)#传入headers参数
if response.status_code