问题贴,今天遇见了个问题,解决了一天都没解决

今天用多线程写了个类,获取ts文件内容,并用另一个线程写入对应的文档,但是预计了个奇怪问题,文件写入,只能写入获取文件数据一半的数据量,到现在也没搞明白怎么回事。

直接上代码:

import threading
import requests
import time
from queue import Queue
import urllib3
urllib3.disable_warnings()

#这个就是ts文件的列表
ts_list=list()
# base_url = 'https://v4.cdtlas.com/20220223/MKPjICFW/1100kb/hls/index.m3u8'

# 页面地址:https://www.acfun.cn/v/ac33003549
base_url = 'https://ali-safety-video.acfun.cn/mediacloud/acfun/acfun_video/c185ef40f16fac2e-c4c2fc4890da062a1d1e0f2eb10f0634-hls_1080p_2.m3u8?pkey=ABASsT6ICcJOVagTydVvSQcR4002-YEqr_dEGDVGKunt4KjMuu-qcHnk69tpuMrirtXONFSMWIk9nA46NCwXkEPvd87umIrlEzuGoW7c-iQiFqoSzt9XAXqSdaWBbqFg02fP3iObVSJn9ZlyZtjCzg6PeSzaFhs8WpWSLGIviiL82UZjxgkdfD6hurOBhfeyfnrzCgZTh-3X5S6bTj6XACT1IUDz38_GDfMtonlVfZ0C5g&safety_id=AAIC1Add8kyLWL7-w9SQPeRw'
ii =0
ij = 0

class Producer(threading.Thread):

    def __init__(self, url_queue, data_queue,name_queue, *args, **kwargs):
        super(Producer, self).__init__(*args, **kwargs)
        self.url_queue = url_queue
        self.data_queue = data_queue
        self.name_queue = name_queue

    def run(self) -> None:
        # 从url_page中拿url
        while True:
            if self.url_queue.empty():
                print("Produceer  完毕   !!")
                break
            url = self.url_queue.get()
            self.get_data(url)

    def get_data(self,url):
        print('*'*100+url +'*'*100)
        global ii
        # print("~~~~~~" + "拿到第{}个数据,******".format(ii) + "~~~~~~")
        ii = ii + 1
        res = requests.get(url,verify = False)
        data = res.content
        self.data_queue.put(data)
        time.sleep(2)


        # name = self.name_queue.get().split('.ts?')[0]
        # with open("{}.ts".format(name), 'ab+') as f:
        #     f.write(data)
        #     f.flush()
        #     f.close()



class Consumer(threading.Thread):

    def __init__(self, url_queue, data_queue, name_queue, *args, **kwargs):
        super(Consumer, self).__init__(*args, **kwargs)
        self.url_queue = url_queue
        self.data_queue = data_queue
        self.name_queue = name_queue
        print(self.name_queue)

    def run(self) -> None:
        while True:
            if self.data_queue.empty():
                if self.url_queue.empty():
                    print('写入完毕'*300)
                    return
            data = self.data_queue.get()
            print(self.name_queue.get())
            global  ij
            print("写入数据量:{}".format(ij))
            ij = ij +1
            name = self.name_queue.get().split('.ts?')[0]
            with open("{}.ts".format(ij), 'ab+') as f:
                f.write(data)
                f.flush()
                f.close()

def parse_ts(data):
    try:
        data = str(data)
        finish_data = []
        for line in data.split('\n'):
            if line.endswith('.ts'):
                finish_data.append(line)
            elif line.find('.ts?') >= 0:
                finish_data.append(line)
            else:
                print('*'*20+'\n'+'解析错误,未解析到需要的ts文件,具体内容如下:'+line+'\n'+'*'*20)
    except Exception as e:
        import traceback

        traceback.print_exc()
        print('*'*20+"报错" * 50+'*'*20)
    finally:
        return finish_data



def main():
    name_queue = Queue()
    url_queue = Queue()
    data_queue = Queue()

    # 准备url
    item = 0
    for x in ts_list:
        # 判断是否需要拼接,有的ts文件是直接自带全部链接的
        if x.find('http://') >=0 or x.find('https://') >= 0:
            url = x
        else:
            # 需要去除base_url最后一个字段,
            url = '{}/{}.ts'.format(base_url.rsplit('/',1)[0],x)
        # url_queue.put(url)
        url_queue.put(url)
        item = item+1
        # name_queue.put(x)
        name_queue.put(x)
    print("总数据量是:{}".format(item))


    # 创建生产者、消费者进程
    for x in range(8):
        t = Producer(url_queue, data_queue, name_queue)
        t.start()

    for x in range(8):
        c = Consumer(url_queue, data_queue,name_queue)
        c.start()



if __name__ == '__main__':
    # 获取m3u8的url,根据url获取ts列表
    res = requests.get(base_url,verify = False)
    if res.status_code == 200:
        ts_list = parse_ts(res.text)
        print(ts_list)
        main()
    else:
        print("请求m3u8文件错误")

先记录下,等过几天有空了,再详细解决下。 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值