python爬虫

import requests
import random
import os
import re

from lxml import etree
from multiprocessing.dummy import Pool

if not os.path.exists('视频2'):
    os.mkdir('视频2')

urls = []
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
}
url = 'https://www.pearvideo.com/category_5'
page_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(page_text)
li_list = tree.xpath('//ul[@id="listvideoListUl"]/li')
for li in li_list:
    detail_url = 'https://www.pearvideo.com/'+li.xpath('./div/a/@href')[0]
    name = li.xpath('./div/a/div[2]/text()')[0]+'.mp4'
    detail_page_text = requests.get(url=detail_url, headers=headers).text
    contId = re.findall("\d+", detail_url)[0]
    new_url = 'https://www.pearvideo.com/videoStatus.jsp'
    params = {
        'contId': contId,
        'mrd': str(random.random())
    }
    headers["Referer"] = detail_url
    video_json = requests.get(url=new_url, headers=headers, params=params).json()
    video_list = video_json['videoInfo']['videos']['srcUrl']
    video_title = '/'.join(video_list.split('/')[0:-1])
    video_footer = '-'.join(video_list.split('-')[1:])
    dic = {
        'name': name,
        'url': video_title +"/cont-" + contId + '-' + video_footer
    }
    urls.append(dic)


def get_video_data(dic):
    video_data = requests.get(url=dic['url'], headers=headers).content
    with open('./视频2/' + dic['name'], "wb") as fp:
        fp.write(video_data)
    print(dic['name'] + '下载成功!!!')


pool = Pool(4)
pool.map(get_video_data, urls)
pool.close()
pool.join()
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值