没用的代码

import requests
from bs4 import BeautifulSoup
import lxml
import os
import time
import re

url = 'https://www.mzitu.com/mm/'
headers = {'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3','cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1572826495,1573125543; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1573212979','user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36'}
headers2={'Referer': 'https://www.mzitu.com','User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36'}
start_html = requests.get(url,headers=headers)
Soup = BeautifulSoup(start_html.text, 'lxml')
max_page = Soup.find('div',class_='nav-links').find_all('a')[-2].get_text()
for page in range(1,int(max_page)+1):
    page_name = url+'page/'+str(page)
    page_html = requests.get(page_name,headers=headers)
    time.sleep(0.01)
    page_soup = BeautifulSoup(page_html.text,'lxml')
    all_li = page_soup.find('div',class_='postlist').find_all('li')
    for i in range(0,len(all_li)):
        picname = all_li[i].find('img')['alt'].replace('?','?')
        page_html_url = all_li[i].find('a')['href']
        path = str(picname).strip()  ##去掉空格
        # try:
        #     os.makedirs(os.path.join("F:\meizitu", path),exist_ok=True)
        # except OSError:
        #     print('创建文件夹有误'+list[i])
        # continue
        os.makedirs(os.path.join("F:\meizituu", path),exist_ok=True)  ##创建一个存放套图的文件夹
        os.chdir("F:\meizituu\\" + path)  ##切换到上面创建的文件夹
        #print(picname+":"+page_html_url)
        image_html = requests.get(page_html_url,headers=headers)
        time.sleep(0.01)
        image_soup = BeautifulSoup(image_html.text,'lxml')
        max_image_page = image_soup.find('div', class_='pagenavi').find_all('span')[-2].get_text()
        for image_page in range(1, int(max_image_page) + 1):
            image_page_url = page_html_url + '/' + str(image_page)
            #print(image_page_url)
            every_image_html = requests.get(image_page_url,headers=headers)
            time.sleep(0.01)
            every_image_soup = BeautifulSoup(every_image_html.text,'lxml')
            every_img_url = every_image_soup.find('div', class_='main-image').find('img')['src']
            #print(every_img_url)
            name = every_img_url[-6:-4]
            every_img = requests.get(every_img_url, headers=headers2)
            if os.path.exists('F:\meizituu'+'\\'+path+'\\'+name+'.jpg'):
                print(name + '.jpg文件在' + path + '中存在 ! ')
            else:
                f = open(name + '.jpg', 'ab')
                f.write(every_img.content)
                f.close()
                print('保存成功:'+every_img_url)

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值