环境:python3
#!/usr/bin/python
# -*-coding:utf-8-*-
# FileName:spider.py
# Author:Cody
# Date:2018/5/11 22:10
import requests # http客户端
import re # 正则表达式模块
import random #随机数
def spiderPic(html, keyword):
print("正在查找:" + keyword + '对应的图片,正在从百度图库重下载,请稍等。。。 ')
for addr in re.findall(str('"objURL":"(.*?)"'), html, re.S):
# print("现在正在爬取的URL地址:" + str(addr)[0:50] + '...')
print("现在正在爬取的URL地址:" + addr)
try:
pics = requests.get(addr, timeout=10)
except requests.exceptions.ConnectionError:
print("当前Url请求错误")
continue
#假设产生的随机数不重复,在E盘下建img文件夹
fq = open('E:\\img\\'+str(random.randrange(1000, 2000)) + '.jpg','w+b')
fq.write(pics.content)
fq.close()
print('写入完成')
if __name__ == "__main__":
word = input("请输入关键词:")
result = requests.get("https://image.baidu.com/search/index?tn=baiduimage&ipn=r&istype=2&ie=utf-8&word=" + word)
# print(result.text)
print("写入完毕")
spiderPic(result.text, word)
输入:美女
结果: