360图片搜索爬虫

最新推荐文章于 2021-12-01 22:22:32 发布

PWL&WQ

最新推荐文章于 2021-12-01 22:22:32 发布

阅读量1k

点赞数 1

分类专栏： python爬虫文章标签： python爬取360图片 json

本文链接：https://blog.csdn.net/weixin_42618912/article/details/86547141

版权

python爬虫专栏收录该内容

1 篇文章

订阅专栏

360图片搜索爬虫

下面是一个360的图片搜索爬虫配置好下载目录，以及搜索关键词和搜索组数（一组1500左右）

import re
import requests
import json
import os

# 路径
BASE_URL = 'F:\python2'
# 查询名字
NAME = '哈欠'

class PictureDownload(object):
    def __init__(self, q=None, sn=100):
        self.url = 'https://m.image.so.com/j?q={}&src=srp&pn=100&sn={}&kn=0&gn=0&cn=0'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
        }
        self.q = q
        self.sn = sn
        self.num = 0
        self.total = 2
    
    def makedir(self):
        if not os.path.exists(os.path.join(BASE_URL, self.q)):
            os.makedirs(os.path.join(BASE_URL, self.q))

    def parse_url(self):
        response = requests.get(self.url.format(self.q, self.num), headers=self.headers)
        return response.content.decode()

    def parse_image_list(self, html_json_str):
        image_list = json.loads(html_json_str)['list']
        total = json.loads(html_json_str)['total']
        return image_list, total

    def save_image(self, image_list):

        for item in image_list:
            response = requests.get(item['thumb'], headers=self.headers)
            with open(os.path.join(BASE_URL, '%s\%s.jpg' % (self.q, item['index'])), 'wb') as f:
                f.write(response.content)

    def run(self):
        self.makedir()
        while self.num < self.total:
            html_json_str = self.parse_url()
            image_list, self.total = self.parse_image_list(html_json_str)
            self.save_image(image_list)
            self.num += 100
            print(self.num)




if __name__ == '__main__':
    xxx = PictureDownload(NAME)
    xxx.run()