爬取多页图片

import requests
import re
import os
headers={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36X-Requested-With: XMLHttpRequest'}
"""
context 二进制数据  爬取图片视频使用这个格式
json  对象数据
text 字符串数据
"""
def get_page_response(url):
    response=requests.get(url=url,headers=headers)
    return response
def get_page_text(response):
    page_text=response.content
    return page_text
def write_file(page_text,filename):
    with open(f"./{filename}.jpg","wb") as fp:
        fp.write(page_text)
    return None
def main():
    url="http://img.itlun.cn/uploads/allimg/180506/1-1P5061TS6-lp.jpg"
    filename="1"
    page_response=get_page_response(url)
    page_text=get_page_text(page_response)
    write_file(page_text,filename)
def main1():
    url="http://pic.netbian.com/4kmeinv/"
    page_text=requests.get(url=url,headers=headers).text
    ex='<img src="(.*?)" alt.*?>'
    img_src_list=re.findall(ex,page_text,re.S)
    # print(img_src_list)
    for src in img_src_list:
        src='http://pic.netbian.com'+src
        src_data=requests.get(url=src,headers=headers).content
        img_name=src.split("/")[-1]
        img_Path="./女汉子/"+img_name
        with open(img_Path,"wb") as fp:
            fp.write(src_data)
            print(img_name+"下载成功!!!!!")
def main2():
    url="http://pic.netbian.com/"
    page_text = requests.get(url=url, headers=headers).text
    ex = '<img src="(.*?)" alt.*?>'
    img_src_list = re.findall(ex, page_text, re.S)
    # print(img_src_list)
    for src in img_src_list:
        src = 'http://pic.netbian.com' + src
        src_data = requests.get(url=src, headers=headers).content
        img_name = src.split("/")[-1]
        img_Path = "./女汉子/" + img_name
        with open(img_Path, "wb") as fp:
            fp.write(src_data)
            print(img_name + "下载成功!!!!!")
    for pageNum in range(2,50):
        # https: // pic.netbian.com / index_3.html
        new_url=url+"index_"+str(pageNum)+".html"
        page_text = requests.get(url=new_url, headers=headers).text
        ex = '<img src="(.*?)" alt.*?>'
        img_src_list = re.findall(ex, page_text, re.S)
        # print(img_src_list)
        for src in img_src_list:
            src = 'http://pic.netbian.com' + src
            src_data = requests.get(url=src, headers=headers).content
            img_name = src.split("/")[-1]
            img_Path = "./女汉子/" + img_name
            with open(img_Path, "wb") as fp:
                fp.write(src_data)
                print(img_name + "下载成功!!!!!")
if __name__=="__main__":
    # main()
    if not os.path.exists("./女汉子"):
        os.mkdir("./女汉子")
    # main1()
    main2()
    # https: // pic.netbian.com / index_3.html
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

神来回复

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值