老规矩,先上代码:
# coding=utf-8
import os
import requests
import time
from PIL import Image
from io import BytesIO
from lxml import etree
# 先定义一个opener函数:
def open_mn_web(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
'Cookie': 'adClass0803 = 2;Hm_lvt_91a405e20ecacb7d51b80c9e4804c045 = 1557565685, 1557565698, 1557593627, 1557738534;Hm_lpvt_91a405e20ecacb7d51b80c9e4804c045 = 1557758918',
'Referer': url}
response = requests.get(url, headers=headers)
response.encoding = 'gb2312'
print(response.text)
if response.status_code == 200:
return response.content
return None
except RecursionError:
return None
def parse_mn_web(html):
htmls = etree.