爬取百度图片

最新推荐文章于 2024-05-27 13:31:48 发布

回眸忆梦

最新推荐文章于 2024-05-27 13:31:48 发布

阅读量200

点赞数 1

分类专栏：爬虫

本文链接：https://blog.csdn.net/qq_46689983/article/details/106841898

版权

爬虫专栏收录该内容

13 篇文章 0 订阅

订阅专栏

import requests
from day02_spider.useragents import  ua_list
import random
import os
import re
from urllib import parse

class BaiduImageSpider(object):
    def __init__(self):
        self.url='https://image.baidu.com/search/index?tn=baiduimage&word={}'
        #计数
        self.i=1

    #获取图片
    def get_image(self,url,word):
        headers={"User-Agent":random.choice(ua_list)}
        #获取图片链接
        html=requests.get(url=url,headers=headers).text
        pattern=re.compile('"thumbURL":"(.*?)"',re.S)
        img_link_list=pattern.findall(html)
        directory = "D:/Python_code/python_train/images/{}/".format(word)
        # 如果电影名路径不存在，则先创建
        if not os.path.exists(directory):
            os.makedirs(directory)
        #for循环遍历，下载每张图片
        for img_link in img_link_list:
            self.save_image(img_link,directory,word)

    def save_image(self,img_link,directory,word):
        headers={"User-Agent":random.choice(ua_list)}
        #向图片链接发送请求，得到bytes类型
        html=requests.get(url=img_link,headers=headers).content
        filename=directory+'{}_{}.jpg'.format(word,self.i)
        with open(filename,"wb") as f:
            f.write(html)
        self.i+=1
        print(filename,"下载成功！")

    #入口函数
    def run(self):
        word=input("请输入你要保存的图片：")
        #将word编码
        word1=parse.quote(word)
        url=self.url.format(word1)
        self.get_image(url,word)

if __name__ == '__main__':
    spider=BaiduImageSpider()
    spider.run()

回眸忆梦

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
爬取百度图片

import requestsfrom day02_spider.useragents import ua_listimport randomimport osimport refrom urllib import parseclass BaiduImageSpider(object): def __init__(self): self.url='https://image.baidu.com/search/index?tn=baiduimage&word={
复制链接

扫一扫