说明:
1、请记得修改四个参数,分别是:doc_url、local_padId、cookie_value、file_name。
2、通过按F12,查找到doc_info接口,然后复制:local_padId、cookie_value值。
3、执行文件即可下载腾讯在线文档到本地目录。
#! /usr/bin/python3
# -*- coding: utf-8 -*-
import json
import time
import requests
from lxml import etree
class TengXunDoc():
def __init__(self, doc_url, local_padId, cookie_value):
self.doc_url = doc_url
self.local_padId = local_padId
self.headers = {
'content-type': 'application/x-www-form-urlencoded',
'Cookie': cookie_value
}
# 获取当前用户信息
def get_now_user(self):
"""
# nowUserIndex = 'ec61dc70ef244cc0b771e772045092b6'
# uid = '144115225804776585'
# utype = 'wx'
"""
req = requests.get(url=self.doc_url, headers=self.headers)
req.encoding = req.apparent_encoding
html = etree.HTML(req.text)
user_list = html.xpath("/html/head/script[3]/text()")
# 转换成字符串,再转换成json格式读取数据
data = str(user_list[0])
user_dict = json.loads(data.replace('window.global_multi_user=','').replace(';', ''))
if user_dict.get("nowUserIndex") is not None:
print(user_dict.get("nowUserIndex"))
return user_dict['nowUserIndex']
return 'cookie过期,请重新输入'
# 获取当前用户ID号
def export_task(self, export_url):
body = {
"docId": self.local_padId,
"version": 2
}
req = requests.post(url=export_url, headers=self.headers, data=body)
operat_id = req.json()["operationId"]
return operat_id
# 下载腾讯文档
def downlowd_excel(self, check_url, file_name):
start_time = time.time()
file_url = ""
while True:
req = requests.get(url=check_url, headers=self.headers)
progress = req.json()["progress"]
if progress == 100:
file_url = req.json()["file_url"]
break
elif time.time() - start_time > 60:
print("数据下载超时,请检查")
break
if file_url:
self.headers["content-type"] = "application/octet-stream"
res = requests.get(url=file_url, headers=self.headers)
with open(file_name,'wb') as fp:
fp.write(res.content)
print(f"文件下载成功,文件名:{file_name}")
else:
print("下载excel表格失败,请检查文件获取地址")
if __name__ == '__main__':
# 腾讯文档在线地址
doc_url = "https://docs.qq.com/sheet/DQ1RrcEhmaXJNZkJW?tab=xxx"
# 查阅doc_info接口参数
local_padId = "300000000$xxxx"
cookie_value = ""
html = TengXunDoc(doc_url, local_padId, cookie_value)
now_user = html.get_now_user()
# 导出文件
export_url = f"https://docs.qq.com/v1/export/export_office?u={now_user}"
operat_Id = html.export_task(export_url)
check_url = f"https://docs.qq.com/v1/export/query_progress?u={now_user}&operationId={operat_Id}"
# 保存文件路径
file_name = "fruit/产品运维xxxx应急演练管理.xlsx"
html.downlowd_excel(check_url, file_name)