python爬取图片教程-推荐|Python 爬虫系列教程一爬取批量百度图片

最新推荐文章于 2022-04-29 21:03:32 发布

weixin_39956558

最新推荐文章于 2022-04-29 21:03:32 发布

阅读量169

点赞数

Python 爬虫系列教程一爬取批量百度图片https://blog.csdn.net/qq_40774175/article/details/81273198# -*- coding: utf-8 -*-"""Created on Sun Sep 13 21:32:25 2020@author: ydc"""import reimport requestsfrom urllib import errorfrom bs4 import BeautifulSoupimport os num = 0numPicture = 0file = ""List = [] def Find(url, A): global List print("正在检测图片总数，请稍等.....") t = 0 i = 1 s = 0 while t < 1000: Url = url + str(t) try: # 这里搞了下 Result = A.get(Url, timeout=7, allow_redirects=False) except BaseException: t = t + 60 continue else: result = Result.text pic_url = re.findall(""objURL":"(.*?)",", result, re.S) # 先利用正则表达式找到图片url s += len(pic_url) if len(pic_url) == 0: break else: List.append(pic_url) t = t + 60 return s def recommend(url): Re = [] try: html = requests.get(url, allow_redirects=False) except error.HTTPError as e: return else: html.encoding = "utf-8" bsObj = BeautifulSoup(html.text, "html.parser") div = bsObj.find("div", id="topRS") if div is not None: listA = div.findAll("a") for i in listA: if i is not None: Re.append(i.get_text()) return Re def dowmloadPicture(html, keyword): global num # t =0 pic_url = re.findall(""objURL":"(.*?)",", html, re.S) # 先利用正则表达式找到图片url print("找到关键词:" + keyword + "的图片，即将开始下载图片...") for each in pic_url: print("正在下载第" + str(num + 1) + "张图片，图片地址:" + str(each)) try: if each is not None: pic = requests.get(each, timeout=7) else: continue except BaseException: print("错误，当前图片无法下载") continue else: string = file + r"\" + keyword + "_" + str(num) + ".jpg" fp = open(string, "wb") fp.write(pic.content) fp.close() num += 1 if num >= numPicture: return if __name__ == "__main__": # 主函数入口 ############################## # 这里加了点 headers = { "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Connection": "keep-alive", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0", "Upgrade-Insecure-Requests": "1" } A = requests.Session() A.headers = headers############################### word = input("请输入搜索关键词(可以是人名，地名等): ") # add = "http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=%E5%BC%A0%E5%A4%A9%E7%88%B1&pn=120" url = "https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=" + word + "&pn=" # 这里搞了下 tot = Find(url, A) Recommend = recommend(url) # 记录相关推荐 print("经过检测%s类图片共有%d张" % (word, tot)) numPicture = int(input("请输入想要下载的图片数量 ")) file = input("请建立一个存储图片的文件夹，输入文件夹名称即可") y = os.path.exists(file) if y == 1: print("该文件已存在，请重新输入") file = input("请建立一个存储图片的文件夹，)输入文件夹名称即可") os.mkdir(file) else: os.mkdir(file) t = 0 tmp = url while t < numPicture: try: url = tmp + str(t) # 这里搞了下 result = A.get(url, timeout=10, allow_redirects=False) except error.HTTPError as e: print("网络错误，请调整网络后重试") t = t + 60 else: dowmloadPicture(result.text, word) t = t + 60 print("当前搜索结束，感谢使用") print("猜你喜欢") for re in Recommend: print(re, end=" ")