因无聊用python写了个爬取蓝奏云直链的爬虫(含注释并包装为函数)

如果需要专门出一篇文章,那就请多多评论回复
你的点赞是对我的无尽支持
程序含金量虽然不高,但是也有点,大佬勿喷!
程序中的保存文件及输入链接未经处理,可以根据自己需求对代码进行修改(比如加个input什么的),还请认真看注释!!!

此爬虫已过时,但仍可参考,最新版本文章看下方
最新爬虫版本

#作者CSDN:https://blog.csdn.net/qq_45429426?spm=1011.2124.3001.5343 漫游感知
#请勿转载!!!!
#没有的库请用pip指令下载
import requests              #第三方库,来发送请求等操作
import jsonpath               #第三方库,来解析json数据
from lxml import etree         #第三方库,来使用xpath解析网页
import re

def lancode(url):
    url = str(url)
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Host': 'lzt666.lanzous.com',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36'
    }

    respeno = requests.get(url=url,headers=headers).text
    Html_lan_xpath = etree.HTML(respeno)
    two_url_yuan = Html_lan_xpath.xpath('/html/body/div[3]/div[2]/div[4]/iframe/@src')[0]
    two_url_replace = str(two_url_yuan).replace('/fn?','')
    two_list = [two_url_replace,two_url_yuan]        #一个不带/fu?为请求头做铺垫,另一个则为请求表单做铺垫
    return two_list

def get_download_url(url_list):
    page_url = 'https://lzt666.lanzous.com'+url_list[1]
    page_headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        'Cookie': 'codelen=1; pc_ad1=1',
        'Host': 'lzt666.lanzous.com',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36'
    }
    data = {
        '':url_list[0]
    }

    page_code_sign = requests.get(url=page_url,headers=page_headers,data=data).text
    # print(page_code_sign)
    txt = re.findall("'sign':(.*?),'ves'",page_code_sign)
    txt_re = str(txt[1]).replace("'",'')       #获取正确传参code

    url_post = 'https://lzt666.lanzous.com/ajaxm.php'
    headers = {
        'Accept': 'application/json, text/javascript, */*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Content-Length': '136',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Cookie': 'codelen=1; pc_ad1=1',
        'Host': 'lzt666.lanzous.com',
        'Origin': 'https://lzt666.lanzous.com',
        'Referer': 'https://lzt666.lanzous.com' + url_list[1],
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest'
    }
    data = {
        'action': 'downprocess',
        'signs': '?ctdf',
        'sign': txt_re,
        'ves': '1',
        'websign': None
    }

    post_url_download = requests.post(url=url_post,headers=headers,data=data).json()
    # print(post_url_download)
    url_file = jsonpath.jsonpath(post_url_download,'$..url')[0]
    # print('Referer: https://lzt666.lanzous.com' + url_list[1],'sign:'+url_list[0])
    download_file_url = 'https://vip.d0.baidupan.com/file/' + url_file
    return str(download_file_url)                  #返回文件下载链接

def download(url):
    '''

    :param url: 需要下载的链接(前面函数已经解析好的)
    :return:
    '''
    url = str(url)
    data = {
        '': url.rsplit('/')[-1].replace('?','').replace('=',':')
    }

    headers = {
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
        'accept-encoding': 'gzip, deflate, br',
        'accept-language': 'zh-CN,zh;q=0.9',
        'cache-control': 'max-age=0',
        'cookie': 'down_ip=1',
        'sec-fetch-dest': 'document',
        'sec-fetch-mode': 'navigate',
        'sec-fetch-site': 'none',
        'sec-fetch-user': '?1',
        'upgrade-insecure-requests': '1',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36'
    }

    file = requests.get(url=url,headers=headers,data=data).content
    with open('./1.zip','wb') as file_down:
        #这里测试时下载的压缩文件,所以保存就这么写了,用的时候可以根据自己的需求来改动
        file_down.write(file)
        file_down.close()
        print('完成下载')
        return True

a = lancode('https://lzt666.lanzous.com/xxxx')        #在这里放入链接
b = get_download_url(a)
download(b)

  • 10
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 5
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值