xpath版本
import requests
from lxml import etree
import os
from hashlib import md5
def get_html(url, headers):
html = requests.get(url, headers)
return html.text
def parse_html(html):
content = etree.HTML(html)
href_list = content.xpath(
'//li[contains(@class,"j_thread_list")]//div[contains(@class,"threadlist_title")]/a/@href'
)
return href_list
def parse_image(img_list):
content = etree.HTML(img_list)
src_list = content.xpath('//img[@class="BDE_Image"]/@src')
return src_list
def download_image(url, headers):
image_content = requests.get(url, headers).content
if not os.path.exists("yangmi"):
os.mkdir("yangmi")
file = md5(str(image_content).encode('utf-8')).hexdigest()
filename = "yangmi" + "//" + file + ".jpg"
if not os.path.exists(filename):
with