百度抓图（python）

最新推荐文章于 2023-05-04 17:05:19 发布

m0_37571522

最新推荐文章于 2023-05-04 17:05:19 发布

阅读量263

点赞数

本文链接：https://blog.csdn.net/m0_37571522/article/details/90574533

版权

import requests
import re
import os
import urllib

keyword = ''

def getBaiduImage(url):
try:
rsp = requests.get(url)
rsp.raise_for_status()
except:
print('对不起，百度图片访问失败！程序退出')
return
pic_urls = re.findall('"objURL":"(.*?)",', rsp.text, re.S)
nextpage=re.findall(re.compile(r'<a href="(.*)" class="n">下一页</a>'),rsp.text,flags=0)
next_urls='http://image.baidu.com/'+nextpage[0]
sl=downLoadImage(pic_urls,0)
while (nextpage!=''):
rsp = requests.get(next_urls)
pic_urls = re.findall('"objURL":"(.*?)",', rsp.text, re.S)
sl=downLoadImage(pic_urls,sl)
if(sl>200):
break
nextpage=re.findall(re.compile(r'<a href="(.*)" class="n">下一页</a>'),rsp.text, flags=0)
next_urls='http://image.baidu.com/'+nextpage[0]

def downLoadImage(pictures,cnt):
count=cnt
if not os.path.exists(keyword):
os.mkdir(keyword)
imgs = pictures
for i in imgs:
try:
url = i
img = requests.get(url, timeout = 15)
img = img.content
form=i[i.rfind('.'):]
FileName = keyword+'\\'+keyword+str(count)+form
file = open(FileName,'bw')
file.write(img)
print('No.%d success' % count)
except :
print('No.%d failed'%count)
continue
count += 1
return count

# 用图片进行测试
# 关键词, 改为你想输入的词即可
keyword=input("输入需要查找的主题")
url_init_first = 'http://image.baidu.com/search/flip?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1497491098685_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&ctd=1497491098685%5E00_1519X735&word='
url= url_init_first + urllib.parse.quote(keyword, safe='/')