##菜鸟爬虫的第一篇文章,大佬勿喷。
# 瀑布流的数据 index --> flip 传统页面呈现数据
import requests
import re
import urllib.parse
# 用户输入,编码
kw = input('请输入所搜图片名称:')
key = urllib.parse.quote(kw)
baseurl = 'https://image.baidu.com/search/flip?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word='
# 拼接url
# 1 拿到目标url
url = baseurl + key
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Mobile Safari/537.36'
}
# 2 拿到网页源码
# 发起请求,获取响应,返回响应对象
response = requests.get(url, headers=headers)
res = response.text
# print(res)
# "objURL":"http://www.17qq.com/img_qqtouxiang/86796769.jpeg"
# 3 利用正则表达式拿到所有的图片url(图片的地址)
result = re.findall(r'"objURL":"(.*?)"',res)
# print(result)
# 遍历所有图片url
for i in result:
# 获取图片名字
name = i[-10:]
name = re.sub('/', '', name)
# 解决图片格式问题
end = re.search('(\.jpg|\.png|\.jpeg|\.gif)$',name)
if end == name:
name = name + '.jpg'
print(name)
# 保存图片
with open('image/'+name, 'wb') as f:
# 抛出异常
try:
r = requests.get(i)
except Exception as e:
print(e)
f.write(r.content)
'''
# 爬取一张图片
# 方法一:
# 获取图片url
url = 'https://ss1.bdstatic.com/70cFvXSh_Q1YnxGkpoWK1HF6hhy/it/u=1078861629,3747050294&fm=26&gp=0.jpg'
# 发起请求,获取响应,返回响应对象
res = requests.get(url)
r = res.content
# 保存图片
with open('1.png', 'wb') as f:
f.write(r)
'''
'''
# 爬取一张图片
# 方法二:
# url = 'https://ss0.bdstatic.com/70cFuHSh_Q1YnxGkpoWK1HF6hhy/it/u=2775621640,2893161556&fm=26&gp=0.jpg'
#
# with open('1.jpg','wb') as f:
#
#
# r = requests.get(url)
#
# f.write(r.content)
'''