#-*- coding: UTF-8 鈥?-
importfeedparserimportrequestsfrom lxml importetreeimportthreadingimportrandomimportosdefget_url2():
rss_url= 'https://www.mzitu.com/all/'r=requests.get(rss_url)
page=etree.HTML(r.text)
result=page.xpath('/html/body/div[2]/div[1]/div[2]/ul/li/p[2]/a')print('鏈?d缁勫浘'%len(result))
page_url=[]for x inresult:
page_url.append(x.get('href'))#print(x.get('href'))
returnpage_urldefdownload(dirname, imgurl):
headers={'referer':'https://www.mzitu.com/','user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
filename= imgurl.split('/')[-1]
r= requests.get(imgurl, headers = headers, stream=True)ifos.path.exists(dirname):
with open(dirname+ '/' + filename, 'wb') as f:for chunk in r.iter_content(chunk_size=32):
f.write(chunk)print('涓嬭浇:%s涓? % filename)
else:
os.mkdir(dirname)
with open(dirname+ '/' + filename, 'wb') as f:for chunk in r.iter_content(chunk_size=32):
f.write(chunk)print('涓嬭浇:%s涓? % filename)
defget_img(url):
r=requests.get(url)
page=etree.HTML(r.text)
span= page.xpath('/html/body/div[2]/div[1]/div[4]/a[5]/span')
hs= page.xpath('//h2[@class="main-title"]')for h inhs:
title=h.textfor a inspan:
pages=a.texttry:for i in range(int(pages)+1):if i == 1:pass
else:
imgpage= url + '/' +str(i)
r1=requests.get(imgpage)
page1=etree.HTML(r1.text)
x_href= page1.xpath('/html/body/div[2]/div[1]/div[3]/p/a/img')for href inx_href:
imgurl= href.get('src')
download(title, imgurl)exceptKeyboardInterrupt:pass
except:pass
defmain():
urls=get_url2()
threads=[]for i inrange(len(urls)):
t= threading.Thread(target=get_img, args=(urls[0+i],))
threads.append(t)for i inthreads:
i.start()for i inthreads:
i.join()if __name__ == '__main__':
main()