【python】自动下载微博相册

最新推荐文章于 2024-05-10 08:20:32 发布

Tiramisu2023

最新推荐文章于 2024-05-10 08:20:32 发布

阅读量922

点赞数 1

文章标签： python windows firefox

本文链接：https://blog.csdn.net/lubiancongzi/article/details/130197661

版权

首先打开浏览器，用自己账号登录微博，找到喜欢的博主相册，F12查看，找到自己的登录cookie

打开python工具，新建项目，新建python文件。weibo_download_img.py 完整代码如下

# -*- utf-8 -*-
# @Time: 2023/4/17 12:49
# @Author: Yimi
# @File: weibo_download_img.py
# @Software: PyCharm
import hashlib
import importlib
import os
import random
import sys
import warnings

import requests
import urllib3

from logutils import Logger

requests.packages.urllib3.disable_warnings

warnings.filterwarnings("ignore")
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
importlib.reload(sys)
# 修改的参数
global uid
uid = '5745848368'
global save_path
save_path = 'D:/BeautifulPictures/python_images/weibo/ID_EVA'
global cookies
cookies = "替换自己的登录cookie替换自己的登录cookie替换自己的登录cookie替换自己的登录cookie"
# 修改的参数
# 越多越好
global meizi_headers
meizi_headers = [
    "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
    "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)",
    'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
    'Opera/9.25 (Windows NT 5.1; U; en)',
    'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
    'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
    'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
    'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9',
    "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7",
    "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0",
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'
]
global headers
headers = {'User-Agent': random.choice(meizi_headers)}

global UserAgent
UserAgent = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0',
    'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1',
    'Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
    'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)',
    'Mozilla/5.0 (Windows NT 10.0; WOW64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
]
global headers2
headers2 = {'User-Agent': random.choice(UserAgent),
            'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            'Accept-Encoding': 'gzip',
            "Referer": "https://www.baidu.com"}

global count
count = 0


# 创建文件夹
def createFile(file_path):
    if os.path.exists(file_path) is False:
        os.makedirs(file_path)
    # 切换路径至上面创建的文件夹
    os.chdir(file_path)


# 计算字符串的md5值
# 下载文件方法函数定义
def get_str_md5(content):
    """
    计算字符串md5
    :param content:
    :return:
    """
    m = hashlib.md5(content)  # 创建md5对象
    return m.hexdigest()

# 下载
def download(src, count):
    headers2 = {
        'User-Agent': random.choice(UserAgent),
        'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        'Accept-Encoding': 'gzip',
        "Referer": "https://www.baidu.com",
        'cookie': cookies
    }
    log.logger.info('download 1. 图片 HTTP 地址 src ' + src)
    array = src.split('/')
    timePath = '{}'.format(array[3])
    rootPath = '{}/{}/{}'.format(save_path, array[2], timePath)
    isExists = os.path.exists(rootPath)
    if not isExists:
        createFile(rootPath)
    file_name = '{}'.format(array[4])
    log.logger.info('download 1. 图片 HTTP 地址 file_name ' + file_name)
    pathFile = '{}/{}/{}'.format(rootPath, uid, file_name)
    isExists = os.path.exists(pathFile)
    if not isExists:
        image = requests.get(src, headers=headers2, verify=False).content
        pathFile = '{}/{}/{}'.format(rootPath, uid, file_name)
        isExists = os.path.exists(pathFile)
        if not isExists:
            file = open(r'{}/{}'.format(rootPath, file_name), "wb")  # img是文件夹名字，具体可以自行设置
            file.write(image)
            file.close()
            print(src, " 下载成功", '{}/{}'.format(rootPath, file_name))
        else:
            # print(src, " 图片已存在", '{}/{}'.format(rootPath, file_name))
            pass


def main(id, o, index):
    uuu = "https://weibo.com/ajax/profile/getImageWall?uid={}&sinceid={}".format(uid, id)
    urls = [uuu]

    headers = {
        'User-Agent': random.choice(meizi_headers),
        'cookie': cookies
    }
    # o = 0
    c1 = 0
    for url in urls:
        o += 1
        try:
            response1 = requests.get(url, headers=headers)
            data1 = response1.json()
            data = data1['data']
            since_id = data['since_id']
            list = data['list']
            for d in list:
                try:
                    c1 += 1
                    count = '{0}_{1}'.format(o, c1)
                    index += 1
                    pid = d['pid']
                    type = d['type']
                    url4 = 'https://wx2.sinaimg.cn/mw2000/{}.jpg'.format(pid)
                    if 'pic' == type:
                        url4 = 'https://wx2.sinaimg.cn/mw2000/{}.jpg'.format(pid)
                    if 'gif' == type:
                        url4 = 'https://wx2.sinaimg.cn/mw2000/{}.gif'.format(pid)
                    download(url4, str(index))
                except Exception as e:
                    continue
            main(since_id, o, index)
        except Exception as e:
            pass


# 测试入口
if __name__ == '__main__':
    log = Logger('all.log', level='debug')
    main(id, 0, 0)

附日志 logutils.py 保存功能代码

# -*- utf-8 -*-
# @Time: 2022/9/14 14:29
# @Author: Yimi
# @File: logutils.py
# @Software: PyCharm
import logging
from logging import handlers


class Logger(object):
    #  日志级别关系映射
    level_relations = {
        'debug': logging.DEBUG,
        'info': logging.INFO,
        'warning': logging.WARNING,
        'error': logging.ERROR,
        'crit': logging.CRITICAL
    }

    def __init__(self, filename, level='info', when='D', backCount=3,
                 fmt='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s'):
        self.logger = logging.getLogger(filename)
        format_str = logging.Formatter(fmt)  # 设置日志格式
        self.logger.setLevel(self.level_relations.get(level))  # 设置日志级别
        sh = logging.StreamHandler()  # 往屏幕上输出
        sh.setFormatter(format_str)  # 设置屏幕上显示的格式
        # 往文件里写入 指定间隔时间自动生成文件的处理器
        th = handlers.TimedRotatingFileHandler(filename=filename, when=when, backupCount=backCount, encoding='utf-8')
        #  实例化TimedRotatingFileHandler
        #  interval是时间间隔，backupCount是备份文件的个数，如果超过这个个数，就会自动删除，when是间隔的时间单位，单位有以下几种：
        # S 秒
        # M 分
        # H 小时、
        # D 天、
        # W 每星期（interval==0时代表星期一）
        # midnight 每天凌晨
        th.setFormatter(format_str)  # 设置文件里写入的格式
        self.logger.addHandler(sh)  # 把对象加到logger里
        self.logger.addHandler(th)


if __name__ == '__main__':
    log = Logger('all.log', level='debug')
    log.logger.debug('debug')
    log.logger.info('info')
    log.logger.warning('警告')
    log.logger.error('报错')
    log.logger.critical('严重')
    Logger('error.log', level='error').logger.error('error')

Tiramisu2023

关注

1
点赞
踩
6

收藏

觉得还不错? 一键收藏
1
评论
【python】自动下载微博相册

打开python工具，新建项目，新建python文件。weibo_download_img.py 完整代码如下。首先打开浏览器，用自己账号登录微博，找到喜欢的博主相册，F12查看，找到自己的登录cookie。附日志 logutils.py 保存功能代码。
复制链接

扫一扫