爬取阳光宽频网的视频

import requests
from lxml import etree
import json
import os

from selenium import webdriver
import time

class LoadVideos(object):
    def __init__(self):
        self.index_url = 'http://www.365yg.com/'
        self.json_url = 'http://www.365yg.com/api/pc/feed/?category=video&utm_source=toutiao&widen=1&max_behot_time=0&max_behot_time_tmp=0&tadrequire=true&as=A1654A545ACFD9C&cp=5A4A0F0D29FC7E1&_signature='
        self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'}
        self.webdriver = webdriver.PhantomJS('/Users/zhangninglei/Downloads/phantomjs-2.1.1-macosx/bin/phantomjs')
        self.video_list = {}

    def get_video_info(self):
        r = requests.get(url=self.json_url,headers=self.headers)
        obj = json.loads(r.text)

        for video in obj['data']:
            video_name = video['video_id']
            video_url = self.index_url+ video['source_url']
            self.video_list[video_name]=video_url


    def load_video_data(self):

        for i in self.video_list:
            url = self.video_list[i]
            #通过浏览器发送请求
            self.webdriver.get(url)
            #休眠一下,加载数据
            time.sleep(5)

            #得到网页源代码
            html = self.webdriver.page_source

            #解析页面,并下载
            html_tree = etree.HTML(html)
            video_src = html_tree.xpath('//video[@class="vjs-tech"]/source/@src')[0]
            print('开始加载'+i+'的数据!')
            r = requests.get(url = video_src,headers=self.headers)
            print(i + '的数据加载完毕!')
            #保存到本地
            print('将'+i+'保存到本地!')
            save_video(filename=i,data=r.content)
            print(i+'已成功保存!')

def save_video(filename,data):
    filepath = os.path.join(os.getcwd()+'/video/'+filename+'.mp4')
    with open(filepath,'wb') as f1:
        f1.write(data)

def main():
    loadvideo = LoadVideos()
    loadvideo.get_video_info()
    loadvideo.load_video_data()

if __name__ == '__main__':
    main()
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值