功能:自动根据关键词爬取指定页数的图片,并保存在指定目录下(关键词为文件夹名,图片网址为图片名)
参数:word 关键字
begin_page_num 开始页数
end_page_num 结束页数
"""
TODO:
funtion: lsp的救赎
other:
author: limenghua
createTime: 2020-12-25 17:20
"""
import requests
import urllib3
import urllib
import re
import os
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # fuck安全警告
class LspFighting:
def __init__(self, word, begin_page_num, end_page_num):
"""
初始化
:param word: 搜索关键词
"""
self.url = "https://image.baidu.com/search/acjson?tn=resultjson_com&logid=11432824844719315390&ipn=rj&" \
"ct=&is=&fp=result&queryWord=%E7%BE%8E%E9%A3%9F&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&" \
"st=-1&z=&ic=0&hd=&latest=©right=&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&" \
"expermode=&force=&rn=30&1608887774681=" + "&word=" + word
self.word = word
self.headers = {
# 请求头
"User-agent": "Mozilla/5.0",
"Content-Type": "utf-8",
}
self.begin_page_num = begin_page_num # 开始页数
self.end_page_num = end_page_num # 结束页数
self.page_num = 30 # 每页编号的增加值
def decode_url(self, url):
"""
对百度加密后的地址进行解码
:param url:百度加密的url
:return:解码后的url
"""
table = {
'w': "a", 'k': "b", 'v': "c", '1'