导入
import requests
from lxml import etree
具体使用
import requests
from lxml import etree
import os,time
def mz_spider(base_url,headers):
res=requests.get(base_url,headers)
html=etree.HTML(res.text)
img_src=html.xpath('//div[@class="TypeList"]/ul/li/a/@href')
for img_url in img_src:
# print(img_url)
img_parse(img_url)
def img_parse(img_url):
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)',
'Referer': 'http://www.umei.cc/tags/qingchun_1.htm'
}
res=requests.get(img_url,headers)
res.encoding=res.apparent_encoding
html=etree.HTML(res.text)
#获取标题
title=html.xpath("//div[@class='Ar