蓝墨云班课资源下载

看见有人详细讲解了下载文件的原理,在这里我就不赘述了,直接上写好的代码。可能乱了点。
有一点要提前说一下,做这个的时候,我想着只下载没有获得经验的文件。已经获得过经验的文件因为我用不到,所以就不用下,当然,改一下代码的话获得过经验的也能下。相关的代码在download_sours函数里面,jy表示的是经验,jy=N代表没获得经验的文件,改一下就行,去掉这个判断条件就能下载已经获得经验的资源了。

# coding=gbK
import os
import re
import time
import json
import requests
import threading
from lxml import etree

class Download_resours():
    def __init__(self):
        try:
            self.load()
        except:
            self.load_in()
        self.main()

    def main(self):
        self.download_sours()
        print('下载完成'.center(50, '*'))
        select = input('是否继续下载其他科目?[y/n]:')
        if select == 'y' or select == 'Y':
            self.main()
        else:
            pass

    # 第一次使用的时候登录并且获得课程列表并且把账号密码保存在account.json文件内,避免重复输账号密码
    def load_in(self):  # 当没有account文件的时候输入账号和密码登录,并且把账号和密码保存在account文件里
        self.session = requests.session()
        self.username = input("请输入账号:")
        self.userpw = input('请输入密码:')
        account = {'user_name': self.username, 'user_pw': self.userpw}
        with open('account.json', 'w') as f:
            json.dump(account, f)
            f.close()
        url = 'https://www.mosoteach.cn/web/index.php?c=passport&m=account_login'
        self.header = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'
        }

        post_data = {
            'account_name': '{}'.format(self.username),
            'user_pwd': '{}'.format(self.userpw),
            'remember_me': 'Y',
            'scene': '',
            'time': '',
            'sign': ''
        }
        response = self.session.post(url, data=post_data, headers=self.header)
        response_js = json.loads(response.text)
        if response_js['result_code'] == 0:
            print("登陆成功".center(90, "*"))
            url = "https://www.mosoteach.cn/web/index.php?c=clazzcourse&m=index"
            html = self.session.get(url, headers=self.header)
            cookie = self.session.cookies
            self.cookies = requests.utils.dict_from_cookiejar(cookie)
            html = etree.HTML(html.text)
            project_name = html.xpath('//div[@class = "class-info"]/span[@class = "class-info-subject"]/@title')
            class_now_id = html.xpath('//li[@class = "class-item "]/@data-id')
            class_complete_id = html.xpath('//li[@class = "class-item class-item-complete"]/@data-id')
            for i in range(len(class_complete_id)):
                class_now_id.append(class_complete_id[i])
            self.cours_id = class_now_id
            for i in range(len(project_name)):
                print(str(i) + '***************' + project_name[i])
        else:
            print("登录失败")
            self.session.close()
            self.load_in()

    # 当有account文件的时候直接从文件里读取账号密码并登录
    def load(self):
        self.session = requests.session()
        with open('account.json') as f:
            account = json.load(f)
            self.username = account['user_name']
            self.userpw = account['user_pw']
            f.close()
        url = 'https://www.mosoteach.cn/web/index.php?c=passport&m=account_login'
        self.header = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.116 Safari/537.36'
        }

        post_data = {
            'account_name': '{}'.format(self.username),
            'user_pwd': '{}'.format(self.userpw),
            'remember_me': 'Y',
            'scene': '',
            'time': '',
            'sign': ''
        }
        response = self.session.post(url, data=post_data, headers=self.header)
        response_js = json.loads(response.text)
        if response_js['result_code'] == 0:
            print("登陆成功".center(90, "*"))
            url = "https://www.mosoteach.cn/web/index.php?c=clazzcourse&m=index"
            html = self.session.get(url, headers=self.header)
            cookie = self.session.cookies
            self.cookies = requests.utils.dict_from_cookiejar(cookie)
            html = etree.HTML(html.text)
            project_name = html.xpath('//div[@class = "class-info"]/span[@class = "class-info-subject"]/@title')
            #课程的ID分为两类,一类是正在进行的课程,一类是已经结课的课程
            #获得正在进行的课程的课程ID
            class_now_id = html.xpath('//li[@class = "class-item "]/@data-id')
            #获得已经结束的课程的课程ID
            class_complete_id = html.xpath('//li[@class = "class-item class-item-complete"]/@data-id')#HUO
            
            #将两种课程ID放在一个列表内,并且把这个列表给self.cours_id以待选择课程后下载的时候取用
            for i in range(len(class_complete_id)):
                class_now_id.append(class_complete_id[i])
            self.cours_id = class_now_id
            
            #把课程的名字打印在屏幕上,供选择
            for i in range(len(project_name)):
                print(str(i) + '***************' + project_name[i])
        else:
            print("登录失败")
            self.session.close()
            self.load_in()

    def get_resours(self):
        try:
        
        #获得想要下载的课程在列表中的索引
            select = int(input('输入课程编号口牙:'))
            u = 'https://www.mosoteach.cn/web/index.php?c=res&m=index&clazz_course_id='
            
            #取出想要下载的课程的课程ID
            self.id = self.cours_id[select]
            
            #拼接成完整的资源所在页面的url并且对url进行请求
            url = u + self.cours_id[select]
            html = self.session.get(url, headers=self.header)
            html = etree.HTML(html.text)
            
            #找出资源的文件名
            title = html.xpath('//span[@class = "res-name"]/text()')
            #找出资源的下载地址
            download_url = html.xpath('//*[@id="res-list-box"]/div/div[2]/div/@data-href')
            #找出资源的类型(是视频还是其他类型)
            resource_type = html.xpath('//*[@id="res-list-box"]/div/div[2]/div/@data-mime')
            #找出资源的ID
            resource_id = html.xpath('//*[@id="res-list-box"]/div/div[2]/div/@data-value')
            #找出资源是否获得过经验的标志(N或Y,N代表这个资源你还没获得经验,Y表示这个资源你已经获得过资源了)
            jy = html.xpath('//span/@data-is-drag')
            #如果资源类型是视频,找出视频的时长用于后续的刷视频
            vido_time = html.xpath('//div[@class = "create-box manual-order-hide-part"]/span[3]/text()')

            return title, download_url, jy, resource_type, vido_time, resource_id
        except:
            print('出错啦,请重新打开软件')

    def download_sours(self):
        title, download_url, jy, resource_type, vido_time, resource_id = self.get_resours()
        path = input('输入保存路径:')
        path = path.replace('/', '\\')
        for i in range(len(title)):
        #将资源的文件名和文件的后缀分开并变成一个列表用来判断文件类型
            mid = title[i].split(".")
            
            #如果分开后的列表只有一个元素则说明不是视频也不是可下载的文件,而是一个用来跳转的网址,如果jy为Y则说明这个资源已经获得过经验了,自动跳过这两种情况
            if len(mid) == 1 or jy[i] == "Y":
                pass
            else:
            	
                if resource_type[i] == 'video':
                    id = self.id
                    url = download_url[i]
                    name = title[i]
                    v_time = vido_time[i]
                    vido_id = resource_id[i]
                    print('正在下载{}'.format(name))
                    self.download_vido(url, name, path, v_time, vido_id, id)
                else:
                    url = download_url[i]
                    name = title[i]
                    print('正在下载{}'.format(name))
                    self.download_application(url, name, path)

    # 下载视频
    def download_vido(self, url, name, path, v_time, vido_id, id):
        tody = time.strftime('%m-%d')
        path = path + '\\' + tody
        if os.path.exists(path):
            self.split_part(url, name, path, num_thread=5)
        else:
            os.mkdir(path)
            self.split_part(url, name, path, num_thread=5)

        self.kill_vido(v_time, vido_id, id)

    def write(self, start, end, r_url, path, name):
        headers = {'Range': 'bytes=%d-%d' % (start, end)}
        r = requests.get(r_url, headers=headers, stream=True)
        with open(path + '\\' + name, "r+b") as fp:
            fp.seek(start)
            var = fp.tell()
            fp.write(r.content)

    def split_part(self, url, name, path, num_thread):
        r = requests.head(url, cookies=self.cookies)
        r_url = r.headers['Location']
        r = requests.head(r_url)
        try:
            file_size = int(r.headers[
                                'content-length'])  # Content-Length获得文件主体的大小,当http服务器使用Connection:keep-alive时,不支持Content-Length
        except:
            print("检查URL,或不支持对线程下载")
            return
        fp = open(path + '\\' + name, "wb")
        fp.truncate(file_size)
        fp.close()
        part = file_size // num_thread
        # 如果不能整除,最后一块应该多几个字节
        for i in range(num_thread):
            end_str = '100%'
            start = part * i
            if i == num_thread - 1:  # 最后一块
                end = file_size
            else:
                end = start + part
            t = threading.Thread(target=self.write,
                                 kwargs={'start': start, 'end': end, 'r_url': r_url, 'path': path, 'name': name})
            t.setDaemon(True)
            t.start()
        # 等待所有线程下载完成

        main_thread = threading.current_thread()
        for t in threading.enumerate():
            if t is main_thread:
                continue
            t.join()

    def download_application(self, url, name, path):
        tody = time.strftime('%m-%d')
        path = path + '\\' + tody
        if os.path.exists(path):
            self.split_part(url, name, path, num_thread=5)
        else:
            os.mkdir(path)
            self.split_part(url, name, path, num_thread=5)

    # 刷视频
    def kill_vido(self, v_time, vido_id, course_id):
        url = 'https://www.mosoteach.cn/web/index.php?c=res&m=save_watch_to'
        time = re.findall('\d+\.\d', v_time)
        if len(time) == 0:
            time = re.findall('\d+', v_time)
        t = int(float(time[0]) * 60)

        post_data = {
            'clazz_course_id': '{}'.format(course_id),
            'res_id': '{}'.format(vido_id),
            'watch_to': '{}'.format(t),
            'duration': '{}'.format(t),
            'current_watch_to': '{}'.format(t),
        }
        response = self.session.post(url, data=post_data, headers=self.header)
        if response.text == "[\"success\"]":
            pass
        else:
            print(response.text[0])
            print('刷视频失败')


if __name__ == '__main__':
    run = Download_resours()






所有代码仅供学习使用,请勿用作商业或违法行为。

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值