首先打开浏览器,用自己账号登录微博,找到喜欢的博主相册,F12查看,找到自己的登录cookie
打开python工具,新建项目,新建python文件。weibo_download_img.py 完整代码如下
# -*- utf-8 -*-
# @Time: 2023/4/17 12:49
# @Author: Yimi
# @File: weibo_download_img.py
# @Software: PyCharm
import hashlib
import importlib
import os
import random
import sys
import warnings
import requests
import urllib3
from logutils import Logger
requests.packages.urllib3.disable_warnings
warnings.filterwarnings("ignore")
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
importlib.reload(sys)
# 修改的参数
global uid
uid = '5745848368'
global save_path
save_path = 'D:/BeautifulPictures/python_images/weibo/ID_EVA'
global cookies
cookies = "替换自己的登录cookie替换自己的登录cookie替换自己的登录cookie替换自己的登录cookie"
# 修改的参数
# 越多越好
global meizi_headers
meizi_headers = [
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
"Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0",
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
]
global headers
headers = {'User-Agent': random.choice(meizi_headers)}
global UserAgent
UserAgent = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1',
'Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)',
'Mozilla/5.0 (Windows NT 10.0; WOW64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
]
global headers2
headers2 = {'User-Agent': random.choice(UserAgent),
'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
'Accept-Encoding': 'gzip',
"Referer": "https://www.baidu.com"}
global count
count = 0
# 创建文件夹
def createFile(file_path):
if os.path.exists(file_path) is False:
os.makedirs(file_path)
# 切换路径至上面创建的文件夹
os.chdir(file_path)
# 计算字符串的md5值
# 下载文件方法函数定义
def get_str_md5(content):
"""
计算字符串md5
:param content:
:return:
"""
m = hashlib.md5(content) # 创建md5对象
return m.hexdigest()
# 下载
def download(src, count):
headers2 = {
'User-Agent': random.choice(UserAgent),
'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
'Accept-Encoding': 'gzip',
"Referer": "https://www.baidu.com",
'cookie': cookies
}
log.logger.info('download 1. 图片 HTTP 地址 src ' + src)
array = src.split('/')
timePath = '{}'.format(array[3])
rootPath = '{}/{}/{}'.format(save_path, array[2], timePath)
isExists = os.path.exists(rootPath)
if not isExists:
createFile(rootPath)
file_name = '{}'.format(array[4])
log.logger.info('download 1. 图片 HTTP 地址 file_name ' + file_name)
pathFile = '{}/{}/{}'.format(rootPath, uid, file_name)
isExists = os.path.exists(pathFile)
if not isExists:
image = requests.get(src, headers=headers2, verify=False).content
pathFile = '{}/{}/{}'.format(rootPath, uid, file_name)
isExists = os.path.exists(pathFile)
if not isExists:
file = open(r'{}/{}'.format(rootPath, file_name), "wb") # img是文件夹名字,具体可以自行设置
file.write(image)
file.close()
print(src, " 下载成功", '{}/{}'.format(rootPath, file_name))
else:
# print(src, " 图片已存在", '{}/{}'.format(rootPath, file_name))
pass
def main(id, o, index):
uuu = "https://weibo.com/ajax/profile/getImageWall?uid={}&sinceid={}".format(uid, id)
urls = [uuu]
headers = {
'User-Agent': random.choice(meizi_headers),
'cookie': cookies
}
# o = 0
c1 = 0
for url in urls:
o += 1
try:
response1 = requests.get(url, headers=headers)
data1 = response1.json()
data = data1['data']
since_id = data['since_id']
list = data['list']
for d in list:
try:
c1 += 1
count = '{0}_{1}'.format(o, c1)
index += 1
pid = d['pid']
type = d['type']
url4 = 'https://wx2.sinaimg.cn/mw2000/{}.jpg'.format(pid)
if 'pic' == type:
url4 = 'https://wx2.sinaimg.cn/mw2000/{}.jpg'.format(pid)
if 'gif' == type:
url4 = 'https://wx2.sinaimg.cn/mw2000/{}.gif'.format(pid)
download(url4, str(index))
except Exception as e:
continue
main(since_id, o, index)
except Exception as e:
pass
# 测试入口
if __name__ == '__main__':
log = Logger('all.log', level='debug')
main(id, 0, 0)
附日志 logutils.py 保存功能代码
# -*- utf-8 -*-
# @Time: 2022/9/14 14:29
# @Author: Yimi
# @File: logutils.py
# @Software: PyCharm
import logging
from logging import handlers
class Logger(object):
# 日志级别关系映射
level_relations = {
'debug': logging.DEBUG,
'info': logging.INFO,
'warning': logging.WARNING,
'error': logging.ERROR,
'crit': logging.CRITICAL
}
def __init__(self, filename, level='info', when='D', backCount=3,
fmt='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'):
self.logger = logging.getLogger(filename)
format_str = logging.Formatter(fmt) # 设置日志格式
self.logger.setLevel(self.level_relations.get(level)) # 设置日志级别
sh = logging.StreamHandler() # 往屏幕上输出
sh.setFormatter(format_str) # 设置屏幕上显示的格式
# 往文件里写入 指定间隔时间自动生成文件的处理器
th = handlers.TimedRotatingFileHandler(filename=filename, when=when, backupCount=backCount, encoding='utf-8')
# 实例化TimedRotatingFileHandler
# interval是时间间隔,backupCount是备份文件的个数,如果超过这个个数,就会自动删除,when是间隔的时间单位,单位有以下几种:
# S 秒
# M 分
# H 小时、
# D 天、
# W 每星期(interval==0时代表星期一)
# midnight 每天凌晨
th.setFormatter(format_str) # 设置文件里写入的格式
self.logger.addHandler(sh) # 把对象加到logger里
self.logger.addHandler(th)
if __name__ == '__main__':
log = Logger('all.log', level='debug')
log.logger.debug('debug')
log.logger.info('info')
log.logger.warning('警告')
log.logger.error('报错')
log.logger.critical('严重')
Logger('error.log', level='error').logger.error('error')