思路
https://www.xiaohongshu.com/user/profile/637e006e000000001f0155c2
爬取整个用户的图片,用户主页获取所有作品详情页网址
在详情页网址中找到多张图片地址 下载
代码
import tkinter as tk
from tkinter.constants import *
import requests
import re
import os
file_name = '小红书\\'
if not os.path.exists(file_name):
os.mkdir(file_name)
def get_url():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 SE 2.X MetaSr 1.0',
# 'referer': 'https://www.xiaohongshu.com/explore/66323e1b000000001e035e70',
'cookie': 'abRequestId=0bbc9ebf-b60d-58e4-b277-698ffb30e43f; xsecappid=xhs-pc-web; a1=18ee9c296e3u83i3vhvynjfwcbyp0063ah6mg2rdq50000225936; webId=6f0890a4af08c825d1edb8044f42ff07; gid=yYddjSJji204yYddjSJjKxJFdq7Yq3qhxhvIKfAViESDv628U88Kqk888JJ2jqK80fKfi2S0; web_session=0400698c6920fd0a5631a6632a344b28c6a2a1; acw_tc=5e131ea13d320430168e088373708fd285cc49d7c90afcef818991d1acaa0cde; webBuild=4.14.2; websectiga=cf46039d1971c7b9a650d87269f31ac8fe3bf71d61ebf9d9a0a87efb414b816c; sec_poison_id=51a97ac5-b39c-4f63-8ea1-1bae5a36752b; unread={%22ub%22:%22661a777f000000001b0090da%22%2C%22ue%22:%22662e6903000000001c0095f6%22%2C%22uc%22:25}'
}
resp = requests.get(url=url_entry.get(), headers=headers).text
# pprint.pprint(resp)
photos_url = re.findall('<a style="display:none;" '
'href="(.*?)" data-v-e98ee584></a>', resp)
# print(len(photos_url))
# print(photos_url)
for photo_url in photos_url:
photo_url = 'https://www.xiaohongshu.com' + photo_url
print(photo_url)
get_photos(photo_url)
def get_photos(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 SE 2.X MetaSr 1.0',
}
resp = requests.get(url=url, headers=headers).text
# print(resp)
photos = re.findall('<meta name="og:image" content="(.*?)">', resp)
# print(photos)
for photo in photos:
name = photo.split('/')[-1]
print(name)
content = requests.get(url=photo, headers=headers).content
with open(file_name + name + '.jpg', mode='wb') as f:
f.write(content)
# print(url_entry.get())
window = tk.Tk()
window.geometry('850x150')
window.title("小红书下载")
window["background"] = "#9AC0CD"
frame_zhinan = tk.Frame(window)
frame_zhinan.pack(side=TOP, pady=20)
frame_bofang = tk.Frame(window)
frame_bofang.pack(side=TOP)
url_entry = tk.Entry(frame_bofang, show=None, width=80)
url_entry.grid(row=0, column=0, columnspan=6)
btn = tk.Button(frame_bofang, text="开始下载", command=get_url)
btn.grid(row=0, column=10)
window.mainloop()