# -*- coding:utf-8 -*-
# @URL: d3d3LnF4YnNrLmNvbQ==
import requests
def rqu(length):
with open(f'test.pdf', 'ab+') as f:
r1 = length
r2 = r1 + 1048575
headers = {
"authority": "www.qxbsk.com",
"accept": "*/*",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"pragma": "no-cache",
"range": f"bytes={r1}-{r2}",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers, cookies=cookies, params=params, stream=True)
response.encoding = 'utf-8'
f.write(response.content)
def get_total_length(params):
response = requests.get(url, headers=headers, cookies=cookies, params=params, stream=True)
response.encoding = 'utf-8'
total_length = response.headers['Content-Length']
return total_length
if __name__ == '__main__':
headers = {
"authority": "www.qxbsk.com",
"accept": "*/*",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"pragma": "no-cache",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
cookies = {
"security_session_verify": "",
"home_lang": "cn",
"admin_lang": "cn",
"PHPSESSID": ""
}
url = "https://www.qxbsk.com/home/test/uploadss"
params = {
"file": "OTc5MzQ0",
"__token__": ""
}
totalLength = get_total_length(params)
print(totalLength)
if int(totalLength) < 1048576:
rqu(0)
print(f"下载中...{totalLength}/{totalLength}")
else:
for pLength in range(0, int(totalLength), 1048576):
rqu(pLength)
print(f"下载中...{pLength}/{totalLength}")
【汽修帮手】数据采集,爬虫,根据pdf文件流合并pdf文件
于 2024-02-29 00:19:04 首次发布