蓝奏云下载
2022年04月21日编写
# -*- coding: utf-8 -*-
## Python3
## 下载蓝奏云文件
## 参考链接:https://blog.csdn.net/qq_45429426/article/details/119819665
import os
import re
import sys
import json
import time
import requests
import urllib.parse
from lxml import etree
## 请求lanzouyun_url,获取transfer_url
def get_transfer_url(host_url,lanzouyun_url):
lanzouyun_headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'ccept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'cookie': 'codelen=1; pc_ad1=1',
'sec-ch-ua': '"Chromium";v="92", " Not A;Brand";v="99", "Google Chrome";v="92"',
'sec-ch-ua-mobile': '?0',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'cross-site',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'
}
lanzouyun_response = requests.get(url=lanzouyun_url, headers=lanzouyun_headers)
# print('[*] lanzouyun_response:', lanzouyun_response.text)
if lanzouyun_response.status_code == 200:
page_text = lanzouyun_response.text
parser = etree.HTMLParser(encoding="utf-8")
handled_html = etree.HTML(page_text, parser=parser)
# 获取transfer_url地址
transfer_url_src = handled_html.xpath('/html/body/div[3]/div[2]/div[4]/iframe/@src')[0]
# 获取文件名称
file_name = handled_html.xpath('/html/body/div[3]/div[1]/text()')[0]
## transfer_url
transfer_url = host_url + transfer_url_src
print('\n[*] file_name:', file_name)
print('[*] transfer_url:', transfer_url)
return (file_name,transfer_url)
else:
print('[*] 无法请求此链接,可能链接不存在或网络错误')
exit()
## 请求transfer_url,获取ajax_data
def get_ajax_data(lanzouyun_url,transfer_url):
transfer_headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'ccept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'cookie': 'codelen=1; pc_ad1=1',
'referer': lanzouyun_url,
'sec-ch-ua': '"Chromium";v="92", " Not A;Brand";v="99", "Google Chrome";v="92"',
'sec-ch-ua-mobile': '?0',
'sec-fetch-dest': 'iframe',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'
}
transfer_response = requests.get(url=transfer_url, headers=transfer_headers)
# print('\n[*] transfer_response:')
# print(transfer_response.text)
## 请求data
# action=downprocess&signs=%3Fctdf&sign=BWMBPww9VGVVXFFuVGRWalo2V2pTPAs_aADNUYgJpV2NUclR3WztQNVIyBGcEYAE1UjgOMl8yUWIKNQ_c_c&ves=1&websign=&websignkey=z4Fd
## 第一次解析
first_re_data = re.findall(r'data : ({.*?})',transfer_response.text)[0].replace(' ','')
print('\n[*] first_re_data:')
print(first_re_data)
signs = re.findall(r'signs\':(.*?),\'sign',first_re_data)[0]
sign = re.findall(r'sign\':(.*?),\'ves',first_re_data)[0].strip("'")
websignkey = re.findall(r'websignkey\':(.*?)}',first_re_data)[0]
print('\n[*] signs:', signs)
print('[*] sign:', sign)
print('[*] websignkey:', websignkey)
ajax_data = 'action=downprocess&signs=%s&sign=%s&ves=1&websign=&websignkey=%s' % (urllib.parse.quote(signs),sign,websignkey)
print('\n[*] ajax_data:')
print(ajax_data)
return ajax_data
## 请求ajax_url,获取download_url
def get_download_url(host_url,transfer_url,ajax_data):
ajax_url = host_url + '/' + 'ajaxm.php'
ajax_headers = {
'accept': 'application/json, text/javascript, */*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'content-length': '152',
'content-type': 'application/x-www-form-urlencoded',
'cookie': 'codelen=1; pc_ad1=1',
'origin': host_url,
'referer': transfer_url,
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Microsoft Edge";v="100"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.44',
'x-requested-with': 'XMLHttpRequest'
}
# print('\n[*] ajax_headers:')
# print(ajax_headers)
ajax_response = requests.post(url=ajax_url,data=ajax_data,headers=ajax_headers)
ajax_response_json = ajax_response.json()
# print('\n[*] ajax_response_json:')
# print(ajax_response_json)
download_url = str(ajax_response_json["dom"]+ '/' + 'file' + '/' + ajax_response_json["url"]).replace(r'\/','/')
print('\n[*] download_url:')
print(download_url)
return download_url
## 下载文件
def download_file(download_url,file_name):
download_headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'cookie': 'down_ip=1',
'sec-ch-ua': '"Chromium";v="92", " Not A;Brand";v="99", "Google Chrome";v="92"',
'sec-ch-ua-mobile': '?0',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'none',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'
}
download_response = requests.get(url=download_url,headers=download_headers)
file_data = download_response.content
with open('./' + file_name,'wb') as save_file:
#保存文件数据
save_file.write(file_data)
save_file.close()
print('\n[*] 文件下载完成')
## 主函数部分
def main():
lanzouyun_url = input('\n[*] 请输入蓝奏云链接:')
## 获取host_url
host_url = 'https://' + lanzouyun_url.split('/')[2]
## 请求lanzouyun_url,获取transfer_url
transfer_data = get_transfer_url(host_url,lanzouyun_url)
file_name = transfer_data[0]
transfer_url = transfer_data[1]
## 请求transfer_url,获取ajax_data
ajax_data = get_ajax_data(lanzouyun_url,transfer_url)
## 获取download_url
download_url = get_download_url(host_url,transfer_url,ajax_data)
## 下载文件
download_file(download_url,file_name)
if __name__ == '__main__':
main()