任务需求:输入关键字下载100个图片保存到本地,每个关键字单独存放一个文件夹(GUI版)
任务描述:当输入关键字时会爬取100个与关键词有关的图片到本地每个关键词单独保存到一个文件夹中,比如说我输入黑客下载了100个关于黑客的图片这些图片都保存到“黑客”文件夹,然后输入python会爬取100个与python有关的图片保存到本地的"python"文件夹中
pip install 模块名 [-i Simple Index]
pip install pyinstaller 打包程序
pip install PyQt5
import requests from urllib.parse import quote from pprint import pprint from pdb import set_trace class 批量爬取百度图片: headers = { 'Accept': 'text/plain, */*; q=0.01', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'keep-alive', # 'Cookie': 'BDqhfp=%E9%BB%91%E5%AE%A2%26%26NaN-1undefined%26%265916%26%265; BIDUPSID=E695E9B2AF2F6BFFED9BD684584A8956; PSTM=1712380467; BAIDUID=34F3B544DDD48A4C76CCDD75A6DB9841:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=40299_40377_40416_40459_40439_40510_40446_60026_60032_60046_40080; BAIDUID_BFESS=34F3B544DDD48A4C76CCDD75A6DB9841:FG=1; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=null; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; ab_sr=1.0.1_Y2E1NjA2ODI1NmIyMWE0OGY3Y2Y5YzA1ZWE1MzBkY2YwMGQ0M2RjYWE5Mjk3YjdiNTUwMmEwZTk2ZGNiODZkNGI0NWVmYzAxODEwNTk5ZjA2NTA4ZTg0OTZhZjAzYjcwNjM3NjU5M2Y2MzY5YTRjNzJhY2MxNDc5MmMzN2ZhMTUwYTQ4MDVlZDViNWZlNDNhZGE1NjRlYjMyOWYwMzY1Mw==', 'Referer': 'https://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&dyTabStr=MCwxLDMsMiw2LDQsNSw4LDcsOQ%3D%3D&word=%E9%BB%91%E5%AE%A2', 'Sec-Fetch-Dest': 'empty', 'Sec-Fetch-Mode': 'cors', 'Sec-Fetch-Site': 'same-origin', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest', 'sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', } def __init__(self): self.cont=1 url='https://image.baidu.com/search/acjson' word=quote('黑客') li=self.发送请求(url,word,30) for i in li: #i就是图片链接 self.发送请求(url=i) def 发送请求(self,url,word='黑客',pn=30): if 'JPEG' in url: response=requests.get(url,headers=self.headers).content self.存储数据(response) else: params = { "tn": "resultjson_com", "logid": "7871683271133482576", "ipn": "rj", "ct": "201326592", "is": "", "fp": "result", "fr": "", "word": word, "queryWord": word, "cl": "2", "lm": "-1", "ie": "utf-8", "oe": "utf-8", "adpicid": "", "st": "", "z": "", "ic": "", "hd": "", "latest": "", "copyright": "", "s": "", "se": "", "tab": "", "width": "", "height": "", "face": "", "istype": "", "qc": "", "nc": "1", "expermode": "", "nojc": "", "isAsync": "", "pn": pn, "rn": "30", } try: res = requests.get(url, params=params, headers=self.headers).json() return self.解析源代码(res) except: pass def 解析源代码(self,res): if res: li=[] for i in res['data']: if i.get('hoverURL',False):li.append(i.get('hoverURL',False)) return li else: return False def 存储数据(self,res): with open(str(self.cont)+'.jpg','wb') as f: f.write(res) 批量爬取百度图片()