因发帖规则,如需爬取其他各大网站请详细看我发布的文章
本代码仅供教学使用,任何非法行为与本帖无关
超级规范的代码 —— 供大家免费使用、教学(搜狗)
import os
import requests
import json
def get_html(url):
response = requests.get(url)
return response.content
def paqu(shuliang, name, path):
global imagedata
if not os.path.isdir(path):
os.makedirs(path)
k = 1
for i in range(int(shuliang / 48)):
pic_urls = []
url = 'https://pic.sogou.com/napi/pc/searchList?mode=1&start=%d&xml_len=48&query=%s' % ((i+1) * 48, name)
content = get_html(url)
json_object = json.loads(content)
print(type(content))
for j in range(len(json_object["data"]["items"])):
imageurl = json_object["data"]["items"][j]["oriPicUrl"]
pic_urls.append(imageurl)
print(len(pic_urls))
for l in range(len(pic_urls)):
print("正在下载第" + str(k) + "张")
try:
imagedata = get_html(pic_urls[l])
except:
print("下载失败")
k = k - 1
with open(path + "/" + str(k) + ".jpg", "wb") as f:
f.write(imagedata)
k = k + 1
print("下载完成")
if __name__ == '__main__':
guanjianc = str(input("请输入关键字:"))
shuliang = int(input("请输入获取数量"))
wenjianmin = str(input("保存文件名"))
paqu(shuliang, guanjianc, "wenjianmin")
使用效果图
还有什么问题可在评论区提问