爬取整个网站图片

最新推荐文章于 2024-09-12 18:31:52 发布

佐倉

最新推荐文章于 2024-09-12 18:31:52 发布

阅读量641

点赞数

分类专栏： python 爬虫

本文链接：https://blog.csdn.net/qq_38641985/article/details/105881389

版权

python 同时被 2 个专栏收录

194 篇文章 5 订阅

订阅专栏

爬虫

20 篇文章 0 订阅

订阅专栏

网站：http://pic.netbian.com
在这里插入图片描述

#-*- coding:utf-8 -*-
import urllib2
import re,sys,os

reload(sys)
sys.setdefaultencoding("utf-8")
#http://pic.netbian.com

import requests

num=1
headers = {
    'Referer': 'http://pic.netbian.com/e/search/result/?searchid=1224',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
}
def down_img(url,root):
    global num
    response = requests.get(url, headers=headers)
    pic= response.text
   

    reg = re.compile('a href="(/tupian/\w+.html)"')
        
    file_pic= re.findall(reg,pic)

    if not os.path.exists("Pic"):
        os.makedirs("Pic")
    if not os.path.exists("Pic/"+root):
        os.makedirs("Pic/"+root) 

    for i in file_pic:
        img_url="http://pic.netbian.com/"+i
        response = requests.get(img_url, headers=headers)
        pic_text= response.text
        reg = re.compile(r'/uploads.*?.jpg')
        file_address= re.findall(reg,pic_text)
        img_address="http://pic.netbian.com/"+file_address[0]
        #print img_address
        request =  urllib2.Request(url=img_address, headers=headers)
        response = urllib2.urlopen(request)
        new_name="Pic/"+root+"/"+"%d" % num +".jpg"
        print new_name
        if not os.path.exists(new_name):
            with open(new_name, "wb") as f:
                f.write(response.read())
        num+=1
 
type_pic=["4kfengjing","4kyouxi","4kmeinv","4kdongman","4kyingshi","4kmingxing",
          "4kqiche","4kdongwu","4krenwu","4kmeishi","4kzongjiao","4kbeijing"]
type_img=int(raw_input("请输入选择下载的类型：\n1.风景\n2.游戏\n3.美女\n4.动漫\n5.影视\n6.明星\n7.汽车\n8.动物\n9.人物\n10.美食\n11.宗教\n12.背景\n".encode(sys.getfilesystemencoding())))
page=int(raw_input("请输入下载页数:页数>=1\n".encode(sys.getfilesystemencoding())))
for i in range(1,page+1):#此处页面的个数，可根据情况修改
    url=("http://pic.netbian.com/%s/index_"% type_pic[type_img-1])+str(i)+".html"
    if "_1.html" in url:
        url=url.replace("_1.html",".html")
    print url
    down_img(url,type_pic[type_img-1])
    
print "finish!\n"

在这里插入图片描述