爬取百度图片450张猫图片

import requests
import json

base_url = "http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=" \
          "result&queryWord=%E7%8C%AB&cl=2&lm=&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=&" \
          "copyright=&word=%E7%8C%AB&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&" \
          "force=&pn={}&rn=30&gsm=3c&1586780971519="

referer = "http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=&st=-1&fm=result&" \
          "fr=&sf=1&fmq=1586774219869_R&pv=&ic=&nc=1&z=&hd=&latest=&copyright=&se=1&showtab=0&fb=0&width=&" \
          "height=&face=0&istype=2&ie=utf-8&sid=&word=%E7%8C%AB"

header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                        "Chrome/80.0.3987.106 Safari/537.36", "Referer":referer}

index = 1

for i in range(1, 16):
    url = base_url.format(str(i * 30))
    response_1 = requests.get(url)
    js_data = json.loads(response_1.text)
    for j in range(0, 30):
        img_url = js_data["data"][j]["thumbURL"]
        print(img_url, index, sep=" ")
        response_2 = requests.get(img_url, headers=header)
        if index == 430:
            print(response_2.status_code)
        if response_2.status_code == 200:
            with open("/home/yan/jupyter_notebook_file/Neural_Network/baidu_image_cat_dataset/cat_" + str(index) + ".jpg",
                      'wb') as f:
                f.write(response_2.content)
                f.close()
            index += 1
        else:
            print("Warning:403")
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值