[爬个图]从某度图片获取数据

最新推荐文章于 2022-03-02 22:37:58 发布

小新GSUNG0222

最新推荐文章于 2022-03-02 22:37:58 发布

阅读量187

点赞数 1

分类专栏：爬虫文章标签：其他

本文链接：https://blog.csdn.net/qq_36848732/article/details/106617111

版权

爬虫专栏收录该内容

1 篇文章 0 订阅

订阅专栏

爬个某度的图片，没啥说的，直接用吧

import requests
import re
import time
import os
import urllib.parse
import json

# 要爬的页数
page_num=1 
photo_dir="C:\\Users\\hp\\Desktop\\getBaiduImg\\"

def getThumbImage(word):
    num=0
    url = "http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={0}&pn={1}"
    while num<50:

        page_url=url.format(urllib.parse.quote(word),num*page_num)
        print(page_url)
        response=requests.get(page_url)
        pic_urls=re.findall('"thumbURL":"(.*?)",',response.text,re.S)
        
        if pic_urls:
        
            for pic_url in pic_urls:
                name=pic_url.split('/')[-1]
                print(pic_url)
                headers={
                    "Referer":page_url,
                }
                html=requests.get(pic_url,headers=headers)
                with open(os.path.join(word_dir,name),'wb')as f:
                    f.write(html.content)
        num=num+1

def getThumb2Image(word):
    num=0
    url = "https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={0}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=&copyright=&word={0}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&pn={1}&rn="+str(page_num)+"&gsm=1e&1552975216767="
    while num<50:

        page_url=url.format(urllib.parse.quote(word),num*page_num)
        print(page_url)
        response=requests.get(page_url)
        pic_urls=re.findall('"thumbURL":"(.*?)",',response.text,re.S)
        for pic_url in pic_urls:
            name=pic_url.split('/')[-1]
            print(pic_url)
            headers={
                "Referer":page_url,
            }
            html=requests.get(pic_url,headers=headers)
            with open(os.path.join(word_dir,name),'wb')as f:
                f.write(html.content)
        num=num+1
        print(num)
        

if __name__ == "__main__":
    word = "人脸"
    word_dir=os.path.join(photo_dir,word)
    if not os.path.exists(word_dir):
        os.mkdir(word_dir)
    getThumb2Image(word)

小新GSUNG0222

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
[爬个图]从某度图片获取数据

爬个某度的图片，没啥说的，直接用吧import requestsimport reimport timeimport osimport urllib.parseimport json# 要爬的页数page_num=1 photo_dir="C:\\Users\\hp\\Desktop\\getBaiduImg\\"def getThumbImage(word): num=0 url = "http://image.baidu.com/search/flip?tn=
复制链接

扫一扫