python 下载腾讯在线文档

112479
已于 2024-02-02 17:53:48 修改
阅读量1k
点赞数 1
文章标签： python
于 2024-02-02 17:46:53 首次发布
本文链接：https://blog.csdn.net/qq_18525247/article/details/135998575
版权
import requests

"""
1. 手动到chrome获取下载请求
2. 获取excel的动态id
3. 拼出excel的下载链接
4. 下载
"""


class Excel:
    def __init__(self):
        self.cookie_string = ""
        self.headers = {
            "authority": "docs.qq.com",
            "method": "GET",
            "path": "/v1/export/query_progress?u=28b403f5c49b4e38add0acaff339ed41&operationId=144115215919843666_fb7bbca6-a03a-23df-f662-ffdbc961bb9f",
            "scheme": "https",
            "Accept": "application/json, text/plain, */*",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Cookie": self.cookie_string,
            "Referer": "https://docs.qq.com/sheet/DU3NaS1h3Z2Voc09u?u=28b403f5c49b4e38add0acaff339ed41&tab=BB08J2",
            "Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
            "Sec-Ch-Ua-Mobile": "?0",
            "Sec-Ch-Ua-Platform": "Windows",
            "Sec-Fetch-Dest": "empty",
            "Sec-Fetch-Mode": "cors",
            "Sec-Fetch-Site": "same-origin",
            "Traceparent": "00-b895d4ff7358b61546dde0bd9c69e4fa-3d91d815d6e57d83-01",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        }

        self.data = {
            'exportType': 0,
            'switches': '{"embedFonts":false}',
            'exportSource': 'client',
            'docId': '300000000$SsZKXwgehsOn'
        }

    def get_operationId(self, browser_url):
        """
        :param browser_url: 浏览器的url
        :return: excel的动态id
        """
        "https://docs.qq.com/v1/export/export_office?u=28b403f5c49b4e38add0acaff339ed41"
        _response = requests.post(browser_url, data=self.data, headers=self.headers).json()
        print("excel动态id:", _response['operationId'])
        return _response['operationId']

    def excel_url(self, url):
        """
        获取excel的下载链接
        :return:
        """
        for i in range(5):
            response = requests.get(url, headers=self.headers).json()
            try:
                if response['file_url']:
                    return response['file_url']
            except Exception as e:
                pass
            else:
                return response['file_url']

    def write2excel(self, file_url, file_name="download_excel.xlsx"):
        """
        :param file_url: excel的下载链接
        :param file_name: excel命名
        :return:
        """
        excel_content = requests.get(url=file_url).content
        with open(file_name, 'wb') as f:
            f.write(excel_content)
        print("excel下载完成")

    # def split_url(self, browser_url):
    #     """
    #     分割url
    #     :param browser_url: 浏览器上的url
    #     :return:
    #     """
    #     return browser_url.split("&tab")[0]

    def __call__(self):
        """
        browser_url: chrome F12的下载请求：export_office
        :return:
        """
        # todo:通过浏览器url获取下载请求链接，解决需要f12的问题

        browser_url = ""
        operationId = self.get_operationId(browser_url)
        url = "https://docs.qq.com/v1/export/query_progress?u=28b403f5c49b4e38add0acaff339ed41&operationId=" + operationId
        excel_url = self.excel_url(url)
        self.write2excel(excel_url, file_name="download_excel.xlsx")


if __name__ == '__main__':
    debug = Excel()
    debug()