使用you-get 多进程下载 cd_project_red的所有“赛博朋克2077“相关的视频

先贴上代码
有空补全详解

脉络

  1. 进入cd_project_red的个人空间
  2. 找到视频的信息,发现是json
  3. 遍历所有视频的description、title、bvid
  4. 用正则选出所有含"赛博朋克2077"的视频,用其bvid建一个list
  5. 通过多进程you-get,下载list中的所有bvid(这一步有更好建议,希望大佬留言)
import sys
from you_get import common as you_get       #导入you-get库
import requests
import random
import re
from concurrent.futures import ProcessPoolExecutor
user_agents = [
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
    "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
    ]

bvids=[]
bvids_list=[]
def get_bvid_raw(i):
    headers = {
               'User-Agent': random.choice(user_agents)
               }
    domain="https://api.bilibili.com/x/space/arc/search?mid=271442527&ps=30&tid=0&pn={}&keyword=&order=pubdate&jsonp=jsonp".format(i)
    # print(domain)
    response=requests.get(domain,headers=headers)
    
    data_raw= response.content.decode()
    # print(data_raw)
    descprition=re.findall(r'"description":"(.*?)"[\s\S]?',data_raw)
    # print(descprition)
    title=re.findall(r'"title":"(.*?)"',data_raw)
    # print(title)
    bvid_code=re.findall(r'"bvid":"(.*?)"',data_raw)
    # print(bvid)
    dict_raw=zip(title,bvid_code,descprition)
    # print(dict_raw)
    for title in dict_raw:
        if re.search("赛博朋克2077",title[0]) is not None or re.search("赛博朋克2077",title[2]) is not None:
            # print(title[0]+"hao")
            bvid=title[1]
            bvids.append(bvid)
        else:
            # print(title[0]+"no")
            pass
    # print(bvids)
    return bvids
def download(bvid):
    directory = r'E:\1'                         #设置下载目录
    url = 'https://www.bilibili.com/video/'+bvid      #需要下载的视频地址
    sys.argv = ['you-get','--playlist','-o',directory,url,]       #sys传递参数执行下载,就像在命令行一样
    you_get.main()    
def main():
    for i in range(1,6+1):
        get_bvid_raw(i)
        print("第1-{}页".format(i)+"有"+str(len(bvids)))        
    # print(bvids)
    print(len(bvids))
    # for bvid in bvids:
    #     download(bvid)
    with ProcessPoolExecutor(max_workers=48) as executor:
        executor.map(download,bvids)

if __name__=="__main__":
    main()
    
    

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值