【流媒体】基于RTP协议的H264播放器

基于RTP协议的H264播放器

1. 概述

前面记录了一篇基于RTP协议的H264的推流器、接收器的实现过程,但是没有加上解码播放,这里记录一下如何实现解码和播放,也是在前面的基础之上实现的。前一篇的记录为【开源项目】基于RTP协议的H264码流发送器和接收器

在前文中,接收器将接收到的一系列数据包进行解析,并分成了一个个完整的帧,存储在内存之中。下面要将这些完整的帧进行解码成为yuv,并且播放。因此,需要添加解码和播放部分的代码。工程的代码结构为

在这里插入图片描述
工程的核心函数是udp_receive_packet(),这个函数的主要工作流程为:
(1)使用recvfrom()来接收数据流
(2)使用check_fragment()对数据流进行解析,并且拷贝到本地内存中
(3)使用h264_parse_packet()来解码所获取的数据,并且使用SDL进行播放

在处理接收的数据流时,我是按照一整个压缩帧进行存储的,例如存储一个完整的Intra帧或者一个完整的P帧,不包含后续帧的信息。然而,使用av_parser_parse2()进行分析时,会首先去寻找下一帧的起始地址来确定当前帧是否完整的输入了,如果没有找到,则很多分析流程不会执行。我在这里用了一个小技巧,在数据内存的最末尾加上一个伪起始码,让av_parser_parse2()确认已经接受了所有的数据,从而进行后续的分析

PS:不过其实这样写并不通用,只是为了配合我的整帧存储方式进行的微调。如果要实现通用的解码,在接收时去掉RTP的头,送入到av_parser_parse2()就可以了

2.工程

头文件的定义中,包括rtp header和rtp packet的定义,还定义了一个全局上下文信息结构体rtp_sdl_context_t

#pragma once

#include <stdio.h>
#include <WinSock2.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>

extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavutil/imgutils.h"
#include "SDL2/SDL.h"
};

#undef main

#define RECV_DATA_SIZE			10000
#define MAX_BUFF_SIZE			32 * 1024 * 1024

#define RTP_MAX_PKT_SIZE        1400	// RTP数据包最大为
#define RTP_HEADER_SIZE			12
#define RTP_PADDING_SIZE		64

#define RTP_PACKET_START		1
#define RTP_PACKET_FRAGMENT		2
#define RTP_PACKET_END			3

#define RECV_STREAM_DOWNLOAD	0
#define RECV_YUV_DOWNLOAD		0

typedef struct rtp_header
{
	// 存储时高位存储的是version
	/* byte 0 */
	uint8_t csrc_len : 4;		/* expect 0 */
	uint8_t extension : 1;		/* expect 1 */
	uint8_t padding : 1;        /* expect 0 */
	uint8_t version : 2;        /* expect 2 */
	/* byte 1 */
	uint8_t payload_type : 7;
	uint8_t marker : 1;        /* expect 1 */
	/* bytes 2, 3 */
	uint16_t seq_num;
	/* bytes 4-7 */
	uint32_t timestamp;
	/* bytes 8-11 */
	uint32_t ssrc;            /* stream number is used here. */
}rtp_header_t;

typedef struct rtp_packet
{
	rtp_header_t rtp_h;
	uint8_t rtp_data[RTP_MAX_PKT_SIZE + RTP_PADDING_SIZE];
}rtp_packet_t;

typedef struct rtp_context
{
	int rtp_packet_cnt;
	int rtp_buffer_size;
	int rtp_frame_cnt;
	int packet_loc;				//
	uint8_t* rtp_buffer_data;
}rtp_context_t;

typedef struct rtp_sdl_context
{
	// video param
	const AVCodec* codec;
	AVCodecContext* codec_ctx;
	AVCodecParserContext* parser_ctx ;
	AVFrame* frame;
	SwsContext* img_convert_ctx;

	// SDL param
	SDL_Window* window;
	SDL_Renderer* render;
	SDL_Texture* texture;
	SDL_Rect rect;
}rtp_sdl_context_t;

cpp文件的定义和前文类似,只是增加了一些FFmpeg解码函数和SDL播放函数,重要部分有注释

#pragma warning(disable:4996)
#pragma comment(lib,"ws2_32.lib")

#include "include/udp_rtp_decode_sdl.h"

FILE* fp_yuv;

//int avc_init(const AVCodec* codec, AVCodecContext* codec_ctx, AVCodecParserContext* parser, AVFrame* frame)
int avc_init(rtp_sdl_context_t* rsc)
{
	AVCodecID codec_id = AV_CODEC_ID_H264;
	rsc->codec = avcodec_find_decoder(codec_id);
	if (!rsc->codec)
	{
		printf("find decoder failed\n");
		return -1;
	}

	rsc->codec_ctx = avcodec_alloc_context3(rsc->codec);
	if (!rsc->codec_ctx)
	{
		printf("alloc context3 failed\n");
		return -1;
	}

	rsc->parser_ctx = av_parser_init(codec_id);
	if (!rsc->parser_ctx)
	{
		printf("parser ctx init failed\n");
		return -1;
	}

	rsc->frame = av_frame_alloc();
	if (!rsc->frame)
	{
		printf("alloc frame failed\n");
		return -1;
	}

	if (avcodec_open2(rsc->codec_ctx, rsc->codec, NULL) < 0)
	{
		printf("Could not open codec\n");
		return -1;
	}

	return 0;
}

int sdl_init(rtp_sdl_context_t* rsc)
{
	if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
		printf("could not init sdl\n");
		return -1;
	}

	const int screen_w = 1280, screen_h = 720;
	const int pixel_w = 1280, pixel_h = 720;

	//SDL 2.0 Support for multiple windows
	rsc->window = SDL_CreateWindow("Play", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
		screen_w, screen_h, SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE);
	if (!rsc->window) {
		printf("SDL: could not create window - exiting:%s\n", SDL_GetError());
		return -1;
	}
	rsc->render = SDL_CreateRenderer(rsc->window, -1, 0);

	int pixformat = SDL_PIXELFORMAT_IYUV;
	rsc->texture = SDL_CreateTexture(rsc->render, pixformat, SDL_TEXTUREACCESS_STREAMING, pixel_w, pixel_h);

	int border = 0;
	rsc->rect.x = 0 + border;
	rsc->rect.y = 0 + border;
	rsc->rect.w = screen_w - border * 2;
	rsc->rect.h = screen_h - border * 2;
}

void av_free_all(rtp_sdl_context_t* rsc)
{
	avcodec_free_context(&rsc->codec_ctx);
	av_parser_close(rsc->parser_ctx);
	av_frame_free(&rsc->frame);
}

int check_nalu_header(uint8_t data0)
{
	int forbidden_zero_bit = data0 & 0x80; // 1bit
	int nal_ref_idc = data0 & 0x60; // 2 bit
	int nal_unit_type = data0 & 0x1F; // 5bit
	if ((data0 & 0x80) == 1)
	{
		printf("forbidden zero bit should be 0\n");
		return -1;
	}
	// printf("forbidden_zero_bit:%d, nal_ref_idc:%d, nal_unit_type:%d\n", forbidden_zero_bit, nal_ref_idc, nal_unit_type);
	return nal_unit_type;
}

int check_fragment_nalu_header(rtp_context_t* rtp_ctx, uint8_t data0, uint8_t data1)
{
	int nal_unit_type = check_nalu_header(data0);
	int s, e, type;
	int pos;
	if (nal_unit_type == 28) // H264
	{
		s = data1 & 0x80; // S
		e = data1 & 0x40; // E
		type = data1 & 0x1F; // type

		pos = data1 & 0xC0; // 1100 0000
		switch (pos)
		{
		case 0x80:
			rtp_ctx->packet_loc = RTP_PACKET_START;
			break;
		case 0x40:
			rtp_ctx->packet_loc = RTP_PACKET_END;
			break;
		case 0x00:
			rtp_ctx->packet_loc = RTP_PACKET_FRAGMENT;
			break;
		default: // error
			printf("invalid packet loc\n");
			return -1;
			break;
		}
	}
	return 0;
}

int find_nal_unit(uint8_t* buf, int size, int* nal_start, int* nal_end)
{
	int i;
	// find start
	*nal_start = 0;
	*nal_end = 0;

	i = 0;
	while (   //( next_bits( 24 ) != 0x000001 && next_bits( 32 ) != 0x00000001 )
		(buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01) &&
		(buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0 || buf[i + 3] != 0x01)
		)
	{
		i++; // skip leading zero
		if (i + 4 >= size) { return 0; } // did not find nal start
	}

	if (buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01) // ( next_bits( 24 ) != 0x000001 )
	{
		i++;
	}

	if (buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01) { /* error, should never happen */ return 0; }
	i += 3;
	*nal_start = i;

	while (   //( next_bits( 24 ) != 0x000000 && next_bits( 24 ) != 0x000001 )
		(buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0) &&
		(buf[i] != 0 || buf[i + 1] != 0 || buf[i + 2] != 0x01)
		)
	{
		i++;
		// FIXME the next line fails when reading a nal that ends exactly at the end of the data
		if (i + 3 >= size) { *nal_end = size; return -1; } // did not find nal end, stream ended first
	}

	*nal_end = i;
	return (*nal_end - *nal_start);
}

void set_default_rtp_context(rtp_context_t* rtp_ctx)
{
	memset(rtp_ctx->rtp_buffer_data, 0, sizeof(rtp_ctx->rtp_buffer_size));
	rtp_ctx->rtp_packet_cnt = 0;
	rtp_ctx->rtp_buffer_size = 0;
	rtp_ctx->packet_loc = 0;
}

// Check the data is fragment or not, if fragment, try to concate
int check_fragment(rtp_context_t* rtp_ctx, rtp_packet_t* rtp_pkt, uint8_t* data, int size)
{
	int nal_start, nal_end;
	int ret = 0;
	int data_size = size - RTP_HEADER_SIZE;
	find_nal_unit(data, data_size, &nal_start, &nal_end); // check NALU split pos

	uint8_t data0 = data[nal_start];
	uint8_t data1 = data[nal_start + 1];
	uint8_t fu_indicator, fu_header;

	if (nal_start > 0 && nal_start < 5) // single-fragment, maybe SPS, PPS or small size frame
	{
		fu_indicator = 0;
		fu_header = 0;
		ret = check_nalu_header(data0); // update nalu_type
		rtp_ctx->rtp_buffer_data = (uint8_t*)realloc(rtp_ctx->rtp_buffer_data, (rtp_ctx->rtp_buffer_size + data_size) * sizeof(uint8_t));
		memcpy(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, data, data_size);

#if STREAM_DOWNLOAD
		fwrite(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, 1, data_size, fp_in);
#endif

		fprintf(stdout, "rtp_ctx frame cnt:%d, frame_size:%d\n", rtp_ctx->rtp_frame_cnt, data_size);
		rtp_ctx->rtp_frame_cnt++;
		rtp_ctx->rtp_buffer_size += data_size;
	}
	else // multi-fragment
	{
		fu_indicator = data[0];
		fu_header = data[1];
		ret = check_fragment_nalu_header(rtp_ctx, fu_indicator, fu_header);
		if (ret < 0)
		{
			printf("invalid nalu header\n");
			return -1;
		}
		int real_data_size = data_size - 2;
		rtp_ctx->rtp_buffer_data = (uint8_t*)realloc(rtp_ctx->rtp_buffer_data, (rtp_ctx->rtp_buffer_size + real_data_size) * sizeof(uint8_t));
		if (!rtp_ctx->rtp_buffer_data)
		{
			printf("realloc rtp_buffer_data failed\n");
			return -1;
		}
		memcpy(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, data + 2, real_data_size); // plus 2 to skip fu_indicator and fu_header
#if STREAM_DOWNLOAD
		fwrite(rtp_ctx->rtp_buffer_data + rtp_ctx->rtp_buffer_size, 1, real_data_size, fp_in);
		fflush(fp_in);
#endif
		rtp_ctx->rtp_packet_cnt++;
		rtp_ctx->rtp_buffer_size += real_data_size;

		if (rtp_ctx->packet_loc == RTP_PACKET_END) // end of packet
		{
			fprintf(stdout, "rtp_ctx frame cnt:%d, frame_size:%d\n", rtp_ctx->rtp_frame_cnt, rtp_ctx->rtp_buffer_size);
			rtp_ctx->rtp_frame_cnt++;
		}
	}
	return 0;
}
// 伪造起始码
int forge_end_code(uint8_t* data, int size)
{
	data = (uint8_t*)realloc(data, (size + 6) * sizeof(uint8_t));
	if (!data)
	{
		printf("realloc end code failed\n");
		return -1;
	}

	data[size] = 0x00;
	data[size + 1] = 0x00;
	data[size + 2] = 0x00;
	data[size + 3] = 0x01;
	data[size + 4] = 0x41;
	data[size + 5] = 0x9A;
	size += 6;
	return size;
}

int h264_parse_packet(rtp_sdl_context_t* rsc, rtp_context_t* rtp_ctx, rtp_packet_t* rtp_pkt)
{
	AVPacket* packet;
	int ret = 0;
	
	packet = av_packet_alloc();
	if (!packet)
	{
		printf("alloc packet failed\n");
		return -1;
	}

	// 添加伪起始码
	uint8_t* buf_data = rtp_ctx->rtp_buffer_data;
	int data_size = rtp_ctx->rtp_buffer_size;
	data_size = forge_end_code(buf_data, data_size);

	ret = av_parser_parse2(rsc->parser_ctx, rsc->codec_ctx, &packet->data, &packet->size,
		buf_data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
	if (ret < 0) {
		printf("parse packet failed, err:%d\n", ret);
		return -1;
	}

	ret = avcodec_send_packet(rsc->codec_ctx, packet);
	if (ret < 0)
	{
		printf("send packet failed\n");
		return -1;
	}

	rsc->codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
	rsc->img_convert_ctx = sws_getContext(rsc->codec_ctx->width, rsc->codec_ctx->height, rsc->codec_ctx->pix_fmt,
		rsc->codec_ctx->width, rsc->codec_ctx->height, AV_PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);

	while (ret >= 0) {
		ret = avcodec_receive_frame(rsc->codec_ctx, rsc->frame);
		if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
			return -1;
		else if (ret < 0) {
			fprintf(stderr, "Error during decoding\n");
			exit(1);
		}

		//printf("saving frame:%d\n", rsc->codec_ctx->frame_num);
		fflush(stdout);

#if RECV_YUV_DOWNLOAD
		int size = rsc->frame->width * rsc->frame->height;
		fwrite(rsc->frame->data[0], 1, size, fp_yuv);//Y
		fwrite(rsc->frame->data[1], 1, size / 4, fp_yuv);//U
		fwrite(rsc->frame->data[2], 1, size / 4, fp_yuv);//V
		fflush(fp_yuv);
#endif

		sws_scale(rsc->img_convert_ctx, (const unsigned char* const*)rsc->frame->data, rsc->frame->linesize, 0, rsc->codec_ctx->height,
			rsc->frame->data, rsc->frame->linesize);
		// SDL播放
		SDL_UpdateYUVTexture(rsc->texture, &rsc->rect,
			rsc->frame->data[0], rsc->frame->linesize[0],
			rsc->frame->data[1], rsc->frame->linesize[1],
			rsc->frame->data[2], rsc->frame->linesize[2]);

		SDL_RenderClear(rsc->render);
		SDL_RenderCopy(rsc->render, rsc->texture, NULL, &rsc->rect);	
		SDL_RenderPresent(rsc->render);								
		SDL_Delay(40); // delay 40ms
	}
	return 0;
}
// 接收数据包
int udp_recevie_packet(rtp_sdl_context_t* rsc, const char* url, int port)
{
	WSADATA wsaData;
	WORD sockVersion = MAKEWORD(2, 2);
	int cnt = 0;

	if (WSAStartup(sockVersion, &wsaData) != 0)
	{
		return 0;
	}

	SOCKET ser_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
	if (ser_socket == INVALID_SOCKET)
	{
		ERROR("Invalid socket");
		return -1;
	}
	int on = 1;
	setsockopt(ser_socket, SOL_SOCKET, SO_REUSEADDR, (const char*)& on, sizeof(on));

	sockaddr_in ser_addr;
	ser_addr.sin_family = AF_INET;
	ser_addr.sin_port = htons(port);
	ser_addr.sin_addr.s_addr = inet_addr(url);

	if (bind(ser_socket, (sockaddr*)& ser_addr, sizeof(ser_addr)) == SOCKET_ERROR)
	{
		printf("Bind socket addr error\n");
		closesocket(ser_socket);
		return -1;
	}
	sockaddr_in remote_addr;
	int addr_len = sizeof(remote_addr);

	char recv_data[RECV_DATA_SIZE];
	rtp_context_t* rtp_ctx = (rtp_context_t*)calloc(1, sizeof(rtp_context_t));
	if (!rtp_ctx)
	{
		printf("alloc rtp_ctx failed\n");
		return -1;
	}
	rtp_packet_t* rtp_pkt = (rtp_packet_t*)calloc(1, sizeof(rtp_packet_t));
	if (!rtp_pkt)
	{
		printf("alloc rtp_pkt failed\n");
		return -1;
	}

	fprintf(stdout, "Listening on port:%d\n", port);
	while (1)
	{
		// recvfrom接收传输过来的数据
		int pkt_size = recvfrom(ser_socket, recv_data, RECV_DATA_SIZE, 0, (sockaddr*)& remote_addr, &addr_len);
		if (pkt_size > 0)
		{
			memcpy(rtp_pkt, recv_data, pkt_size);
			check_fragment(rtp_ctx, rtp_pkt, rtp_pkt->rtp_data, pkt_size); // check pkt data is fragment or not
			rtp_header_t rtp_h = rtp_pkt->rtp_h;
			char payload = rtp_h.payload_type;

			if (rtp_ctx->packet_loc == RTP_PACKET_END)
			{
				switch (payload)
				{
				case 33: // mpegts
					// mpegts_packet_parse((uint8_t*)rtp_data, parse_mpegts, payload, rtp_data_size); // TODO: add mpegts parser
					printf("MPEGTS type\n");
					break;
				case 96: // h264
					//printf("payload type:%s\n", "H264");
					// 进行h264码流的解析
					h264_parse_packet(rsc, rtp_ctx, rtp_pkt);
					break;
				default:
					printf("Unknown type\n");
					break;
				}
				// printf("[RTP PKT] %5d| %5s | %10u| %5d| %5d\n", cnt, payload_str, timestamp, seq_num, pkt_size);
				set_default_rtp_context(rtp_ctx); // set default rtp ctx value
			}
		}
	}
}

int main()
{
	rtp_sdl_context_t* rsc = (rtp_sdl_context_t*)malloc(sizeof(rtp_sdl_context_t));
	if (!rsc)
	{
		printf("malloc rsc failed\n");
		return -1;
	}
	memset(rsc, 0, sizeof(rtp_sdl_context_t));

	// 初始化参数
	avc_init(rsc);
	sdl_init(rsc);
	
	// 如果要存储yuv信息可以设置为1
#if RECV_YUV_DOWNLOAD
	fp_yuv = fopen("rtp_receive_yuv.yuv", "wb");
#endif
	// 开始接收数据包
	udp_recevie_packet(rsc, "127.0.0.1", 8880);
	av_free_all(rsc);

#if RECV_YUV_DOWNLOAD
	fclose(fp_yuv);
#endif

	return 0;
}

3.测试

发送端
在这里插入图片描述
接收端
在这里插入图片描述
接收端播放正常,感觉可以后续改一改SDL的逻辑,让窗口变成可移动和可缩放的
在这里插入图片描述

4.小结

总体来说,这个功能的实现是比较简单的,不过使用了一个小的技巧,伪造了一个起始地址,如果代码格式要求不严格,可以凑合着用。如果要做成大的工程,应该将多个packet直接送入解码器,这样比较合理,也更符合FFmpeg的设计原则

  • 14
    点赞
  • 15
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值