Pyav代码分析

泰勒朗斯

已于 2023-07-11 17:01:48 修改

阅读量2.1k

点赞数 3

文章标签： python

于 2023-07-11 12:12:56 首次发布

本文链接：https://blog.csdn.net/weixin_43360707/article/details/131654650

版权

PyAV是一个用Cython封装ffmpeg的库，提供了Python接口来操作音视频。它包括对packet、frame、stream等关键结构体的封装，用于解码、编码等任务。文章通过示例展示了如何创建CodecContext进行解码，以及如何在Container中内置的解码器进行解码。需要注意的是，PyAV的解码过程不能像ffmpeg一样进行细粒度控制，且在硬件解码时需要考虑初始化时间。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

PyAV提供了ffmpeg的python接口，但实际它只是使用ffmpeg做后端，使用Cython封装了ffmpeg的接口，所以实际调用的还是ffmpeg。

也就是说，PyAV用类封装了ffmpeg提供的API，如果想要使用，关键还是要看懂其整体架构。
PYAV用类封装了ffmpeg的几个关键结构体

名称	作用
packet	封装了ffmpegde AVPacket
frame	封装了ffmpeg的AVframe
stream	封装了ffmpeg的AVStream
option	封装了ffmpeg的AVOption
InputContainer	封装了ffmpeg的avformat_open_input demux
OutputContainer	封装了ffmpeg的av_interleaved_write_frame mux
CodecContext	封装了ffmpeg codec相关代码

具体使用的，如果你有自己的ffmpeg，那么先编译安装自己的ffmpeg，然后：

pip install av --no-binary av

如果没有自己的ffmpeg:

pip install av

安装好之后就可以使用了。

下面先看几个简单的案例：

import os
import subprocess
import logging
import time

logging.basicConfig(level=logging.DEBUG)
logging.getLogger('libav').setLevel(logging.DEBUG)

import av
import av.datasets




# We want an H.264 stream in the Annex B byte-stream format.
# We haven't exposed bitstream filters yet, so we're gonna use the `ffmpeg` CLI.
h264_path = "libx264_640x360_baseline_5_frames.h264"
# if not os.path.exists(h264_path):
#     subprocess.check_call(
#         [
#             "ffmpeg",
#             "-i",
#             av.datasets.curated("zzsin_1920x1080_60fps_60s.mp4"),
#             "-vcodec",
#             "copy",
#             "-an",
#             "-bsf:v",
#             "h264_mp4toannexb",
#             h264_path,
#         ]
#     )




fh = open(h264_path, "rb")

codec = av.CodecContext.create("h264_efcodec", "r")
codec.options={"hw_id":"15"}
codec.open()
print(codec.name)
first= True
count=0
while True:
    chunk = fh.read(1 << 16)
    packets = codec.parse(chunk)
    print("Parsed {} packets from {} bytes:".format(len(packets), len(chunk)))

    for packet in packets:
        print("   ", packet)
        frames = codec.decode(packet)
        
        if first:
            time.sleep(2)
            first=False
        for frame in frames:
            print("       ", frame)
            count+=1
            print('--count:%d--'%count)
            frame.to_image().save("night-sky.{:04d}.jpg".format(count),quality=80,)

    # We wait until the end to bail so that the last empty `buf` flushes
    # the parser.
    if not chunk:
        break

p=av.Packet(None)
print("send eos:", p)
frames = codec.decode(p) 
for frame in frames:
    print("       ", frame)
    count+=1
    print('--count:%d--'%count)
    frame.to_image().save("night-sky.{:04d}.jpg".format(count),quality=80,)


print('all count:%d'%count)
codec.close()

上面是通过创建codec来进行解码的案例，可以在创建的时候指定解码器名称以及可以设置option。这里注意一点就是这里只能parse annexb格式的视频流 AVCC的视频流是不能在这里解析的。

下面是另外一个demux codec的案例：

import time

import av
import av.datasets

container = av.open('ocr_400_400_5_frames.mp4')
first=True

count=0
start_time = time.time()
for packet in container.demux():
    print(packet)
    for frame in packet.decode():
        print(frame)
        count+=1
        print('---frame:%d---'%count)
    if first:
        time.sleep(2)
        first=False

auto_time = time.time() - start_time
container.close()
print('all frame:%d',count)

这里的codec是container中内置的一个解码器，这里的解码器是无法自主选择具体使用那个解码器的。

综合上面两个案例，我们可以使用下面的方法来解码：

import os
import subprocess
import logging
import time

import av

logging.basicConfig(level=logging.DEBUG)
logging.getLogger('libav').setLevel(logging.DEBUG)

h264_path = "ocr_400_400_5_frames.mp4"
input_ = av.open(h264_path,options={"vsync":"0"})
in_stream = input_.streams.video[0]

codec = av.CodecContext.create("h264_efcodec", "r")
codec.options={"hw_id":"15"}
# codec.options={"device_id":"0"}
codec.open()
print(codec.name)
# print(codec.extradata_size)
codec.extradata =in_stream.codec_context.extradata

first=True
num = 0

for packet in input_.demux(in_stream):
    print('----packet---')
    packet.dts =0
    packet.pts = 0
    print("   ", packet)

    frames = codec.decode(packet)
    print('---after decode---')
    if first:
        time.sleep(2)
        first=False
    for frame in frames:
        print("       ", frame)
        num+=1
        print('-----frame:%d-----'%num)

print('all:%d'%num)
codec.close()

上面这个案例结合了第一个和第二个解码的使用方法，在这里我们采用demux+decode(自己设置的解码器)。不要觉得这里很简单，这是我看完整个封装源代码才搞清楚的，当然这里唯一的缺点是inpoutcontainer内部为了分配condec，多占用了一些内存，不过这也无所谓了。

看完上面实例，可能发现一个sleep(2)，为什么要加这句？主要是因为，我们硬件解码器open()的时候花费的时间较长，这里增加sleep函数来等待底下硬件解码器完全启动，不然会出现所有的输入数据送完了，解码器一帧数据都还没有解码出来。这里又引出PyAV的一个局限，它只能调用封装后的decode()接口，无法调用更加细粒度的ffmpeg接口，导致无法像ffmpeg那样通过循环调用avcodec_receive_frame（）来取解码后的数据。

static int decode(AVCodecContext *dec_ctx) {
   int ret;
    AVPacket packet;
    AVFrame *p_frame;
    int eos = 0;

    p_frame = av_frame_alloc();

    while(1) {
        ret = av_read_frame(g_ifmt_ctx, &packet);
        if (ret == AVERROR_EOF) {
            av_log(g_dec_ctx, AV_LOG_INFO, "av_read_frame got eof\n");
            eos = 1;
        } else if (ret < 0) {
            av_log(g_dec_ctx, AV_LOG_ERROR, "av_read_frame failed, ret(%d)\n", ret);
            goto fail;
        }

        if (packet.stream_index != video_stream_idx) {
            av_packet_unref(&packet);
            continue;
        }
        ret = avcodec_send_packet(dec_ctx, &packet);
        if (ret < 0) {
            av_log(dec_ctx, AV_LOG_ERROR,
                "send pkt failed, ret(%d), %s, %d\n", ret, __FILE__, __LINE__);
            goto fail;
        }
//这里就是最后循环取出解码器中的yuv数据
        while (ret >= 0 || eos) {
            ret = avcodec_receive_frame(dec_ctx, p_frame);
            if (ret == AVERROR_EOF) {
                av_log(g_dec_ctx, AV_LOG_INFO, "dec receive eos\n");
                av_frame_unref(p_frame);
                av_frame_free(&p_frame);
                return 0;
            } else if (ret == 0) {
                save_yuv_file(dec_ctx, p_frame);
                av_frame_unref(p_frame);
            } else if (ret < 0 && ret != AVERROR(EAGAIN)) {
                av_log(dec_ctx, AV_LOG_ERROR, "receive frame failed\n");
                goto fail;
            }
        }
        av_packet_unref(&packet);
    }

 fail:
    av_frame_free(&p_frame);
    return -1;
}

到这里为止，Pyav基础用法基本完成。接下来讲一下架构。
Packet类：
主要封装了AVPacket，提供了一些可以set/get packet成员的一些property，里面函数有to_bytes（）可以将data数据转为bytes对象，另外还有一个decode()，是通过其内部stream的指针去指向codec，然后去解码。

序号	Value
成员变量-1	AVPacket* ptr
成员变量-2	Stream _stream
Property	stream_index
Property	stream
Property	time_base
Property	pts
Property	dts
Property	pos
Property	size
Property	is_keyframe
Property	is_corrupt
Property	buffer_size(也就是packet的datasize)
Property	buffer_ptr
Property	to_bytes(将data转为python的bytes)
Fun	decode(self._stream.decode(self))
构造	self.ptr = lib.av_packet_alloc()
析构	lib.av_packet_free(&self.ptr)

Frame 类,这是一个基类，所以里面只有基础信息

序号	Value
成员变量-1	AVFrame *ptr
成员变量-2	int index
成员变量-3	AVRational _time_base
成员变量-3	_SideDataContainer _side_data
Property	dts
Property	pts
Property	time(The presentation time in seconds for this frame)
Property	time_base(fractions.Fraction)
Property	is_corrupt( Is this frame corrupt?)
Property	side_data
构造	self.ptr = lib.av_frame_alloc()
析构	lib.av_frame_free(&self.ptr)

VideoFrame 类：
该类继承了Frame类，除了提供了获取avframe类中的变量外，还提供了几个函数，可以csc颜色空间转换，保存jpg，或者从jpg，nump数组中转为Frame.

序号	Value
成员变量-1	VideoReformatter reformatter
成员变量-2	VideoFormat format
Property	width
Property	height
Property	key_frame
Property	interlaced_frame
Property	pict_type
Property	planes
Fun	to_rgb()( return self.reformat(format=“rgb24”, **kwargs))
Fun	to_image(可以保存为jpg)
Fun	to_ndarray()
Fun	from_image(img)
Fun	from_ndarray()

Stream类：
在stream类中还包含了两个其它的类：Container 和CodecContext

序号	Value
成员变量-1	AVStream *ptr
成员变量-2	Container container
成员变量-3	CodecContext codec_context
成员变量-4	dict metadata
Property	id
Property	profile
Property	index
Property	average_rate
Property	base_rate
Property	guessed_rate
Property	start_time
Property	duration
Property	frames(The number of frames this stream contains.)
Property	language
Property	Type（ Examples: `'audio'`, `'video'`, `'subtitle'`.）
Fun	encode()
Fun	decode()
Fun	get()/det() att

ContainerFormat 类：
这个类封装了ffmpeg的两个结构体：AVInputFormat和AVOutputFormat ，在类里提供了可以访问该结构体的一些属性，主要在后面的container类中使用

序号	Value
成员变量-1	AVInputFormat *iptr
成员变量-2	AVOutputFormat *optr
Property	descriptor
Property	options
Property	input
Property	output
Property	is_input
Property	is_output
Property	long_name
Property	extensions
Property	flags
构造	self.iptr = lib.av_find_input_format(name)/self.optr = lib.av_guess_format(name, NULL, NULL)
全局函数
全局函数	get_output_format_names() 获取所有ffmpeg支持的output names
全局函数	get_input_format_names()获取所有ffmpeg支持的input names

Container类：基类

序号	Value
成员变量-1	AVFormatContext *ptr
成员变量-2	dict options
成员变量-3	dict container_options
成员变量-4	list stream_options
成员变量-5	StreamContainer streams
成员变量-6	open_timeout
成员变量-7	read_timeout
成员变量-8	io_open
Fun	dumps_format()
Fun	set_timeout() self.interrupt_callback_info.timeout = timeout
Fun	start_timeout() self.interrupt_callback_info.start_time = clock()
构造函数	avformat_open_input()
析构函数	avformat_free_context(self.ptr)
全局函数	open()根据flag是r还是w选择return InputContainer(）/ return OutputContainer()

InputContainer(Container) 类：
InputContainer 这里有个创建codec的步骤，特别要注意，并且这里是采用avcodec_find_decoder（）意味着你无法在这里选择合适的解码器，所以我们上面第三个案例中采用了demux和单独codec create（）的做法

序号	Value
成员变量-1	AVStream *ptr
成员变量-2	Container container
Fun	demux()在里面对stream赋值packet._stream = self.streams[packet.ptr.stream_index]，stream中包含了py_codec_context
Fun	decode() 实际使用的是 packet.decode()
Fun	seek()
构造函数	avformat_find_stream_info（）->avcodec_find_decoder()->avcodec_alloc_context3()
析构函数	avformat_close_input()

OutputContainer(Container) 类：

序号	Value
成员变量-1	bint _started
成员变量-2	_done
Fun	add_stream() 这里创建一个输出流avformat_new_stream()->创建编码器CodecContext py_codec_context = wrap_codec_context(codec_context, codec)，注意这里的py_codec_context赋给stream中的相关变量，用来后期encode()
Fun	mux（） av_interleaved_write_frame(self.ptr, self.packet_ptr)

Codec 类：
该类主要封装了AVCodec，提供了相关获取的属性

序号	Value
成员变量-1	AVCodec *ptr
成员变量-2	AVCodecDescriptor *desc
Property	is_decoder
Property	descriptor
Property	name
Property	type
Property	frame_rates
Property	audio_rates
Property	video_formats
Property	audio_formats
Fun	create() 工厂函数return CodecContext.create(self)
构造函数	avcodec_find_encoder_by_name() /avcodec_find_decoder_by_name根据mode是w/r选择解码器或者编码器
析构函数
全局函数	cdef Codec wrap_codec(const lib.AVCodec *ptr)
全局函数	get_codec_names() 所有ffmpeg支持的codec names
全局函数	dump_codecs() ““Print information about available codecs.””

CodecContext类：

序号	Value
成员变量-1	AVCodecContext *ptr
成员变量-2	bint extradata_set
成员变量-3	int stream_index
成员变量-4	Codec codec
成员变量-5	dict options
Property	flags
Property	extradata set/get att 参见第三个sample中的设置
Property	extradata_size
Property	is_open
Property	is_encoder
Property	is_decoder
Property	skip_frame
Property	thread_type
Property	thread_count
Property	bit_rate_tolerance
Property	max_bit_rate
Property	bit_rate
Property	ticks_per_frame
Property	codec_tag
Property	time_base
Property	profile
Property	name
Fun	create()工厂函数
Fun	open()
Fun	close()
Fun	encode()这里会调用一次self.open(strict=False)，所以open()可以不用显示调用
Fun	decode() 这里会调用一次self.open(strict=False)，所以open()可以不用显示调用
Fun	parse 里面调用的是`av_parser_parse2()`
构造函数	self.codec = wrap_codec() ->self.options = {} 这个options在第一个decode()调用之前可以设置，具体参见第三个sample
析构函数	lib.avcodec_close(self.ptr)/lib.avcodec_free_context(&self.ptr)
全局函数	CodecContext wrap_codec_context(lib.AVCodecContext, const lib.AVCodec)

VideoCodecContext(CodecContext) 类：
该类也没做什么，主要对外提供属性接口

序号	Value
成员变量-1	VideoFormat _format
成员变量-2	VideoReformatter reformatter
成员变量-3	int encoded_frame_count (for encoding)
成员变量-4	VideoFrame next_frame (for decoding)
Property	format
Property	width
Property	height
Property	pix_fmt
Property	framerate
Property	rate
Property	gop_size
Property	sample_aspect_ratio
Property	display_aspect_ratio
Property	has_b_frames
Property	coded_width
Property	coded_height
Fun	_build_format()

VideoReformatter 类：
主要进行sws_cale的操作

序号	Value
成员变量-1	SwsContext *ptr
Fun	reformat()

其它辅助类：
VideoFormat 类：
该类主要封装了AVPixelFormat和AVPixFmtDescriptor

序号	Value
成员变量-1	AVPixelFormat pix_fmt
成员变量-2	AVPixFmtDescriptor *ptr
Property	name
Property	bits_per_pixel
Property	padded_bits_per_pixel
Property	is_big_endian
Property	has_palette
Property	is_bit_stream
Property	is_planar
Property	is_rgb
Fun	chroma_width
Fun	chroma_height
构造	self.pix_fmt = pix_fmt ，self.ptr = lib.av_pix_fmt_desc_get(pix_fmt)

PyIOFile 类：

序号	Value
成员变量-1	AVIOContext *iocontext
成员变量-2	unsigned char *buffer
成员变量-3	long pos
Property	fread
Property	fwrite
Property	fseek
Property	ftell
Property	fclose
Fun	pyio_read()
Fun	pyio_write()
Fun	pyio_seek()
Fun
Fun