[FFMPEG-4]重构从mp3中获取pcm数据

最新推荐文章于 2022-02-23 15:02:25 发布

yxm410

最新推荐文章于 2022-02-23 15:02:25 发布

阅读量927

点赞数

分类专栏： FFMPEG 文章标签： c ffmpeg pcm

本文链接：https://blog.csdn.net/u010673462/article/details/43269809

版权

FFMPEG 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

一直以来都想把自己的代码打磨成工业级别的，但是奈何实力有限，憋了两天，还是这个一个烂烂的代码。还是又很多硬编码的地方，而且没经过充分测试。

马上就放假回家了，想着一定要有一个结尾，不能把事情拖到寒假或者明年，就先把这个烂代码放上。算是一个总结。

主要实现了从mp3中提取一段时长的pcm数据和wav头。这个需求是在做基于内容的音乐检索的时候遇到的。上代码。

#include    <stdlib.h>
#include    <stdio.h>
#include    <string.h>
#include    <math.h>
#include 	<stdint.h>
#include	<malloc.h>

#include  	<libavutil/avutil.h>
#include  	<libavutil/attributes.h>
#include    <libavutil/channel_layout.h>
#include    <libavutil/common.h>
#include    <libavutil/file.h>
#include    <libavutil/imgutils.h>
#include    <libavutil/mathematics.h>
#include    <libavutil/opt.h>
#include    <libavutil/samplefmt.h>
#include    <libavutil/timestamp.h>

#include    <libavformat/avformat.h>
#include    <libavformat/avio.h>

#include    <libavcodec/avcodec.h>

#include    <libswscale/swscale.h>

#include    <libswresample/swresample.h>

#include 	"AudioFormatUtils.h"

//#define LOG_TAG "GETSTREAM"
//#define LOGD(...)  __android_log_print(ANDROID_LOG_DEBUG,LOG_TAG,__VA_ARGS__) // 定义LOGD类型
//#define LOGI(...)  __android_log_print(ANDROID_LOG_INFO,LOG_TAG,__VA_ARGS__) // 定义LOGI类型
//#define LOGW(...)  __android_log_print(ANDROID_LOG_WARN,LOG_TAG,__VA_ARGS__) // 定义LOGW类型
//#define LOGE(...)  __android_log_print(ANDROID_LOG_ERROR,LOG_TAG,__VA_ARGS__) // 定义LOGE类型
//#define LOGF(...)  __android_log_print(ANDROID_LOG_FATAL,LOG_TAG,__VA_ARGS__) // 定义LOGF类型

#define LOG_TAG stderr
#define LOGD(...) fprintf(LOG_TAG, __VA_ARGS__)
#define LOGI(...) fprintf(LOG_TAG, __VA_ARGS__)
#define LOGW(...) fprintf(LOG_TAG, __VA_ARGS__)
#define LOGE(...) fprintf(LOG_TAG, __VA_ARGS__)
#define LOGF(...) fprintf(LOG_TAG, __VA_ARGS__)

uint8_t *resdatas = NULL;
unsigned char header[44];

int get_wav_header(unsigned char header[], WavHeader *wavHeader) {
	//unsigned char header[44];
	unsigned char* p = header;
	unsigned int bytes = (wavHeader->bits_per_sample + 7) / 8;
	float data_size = (float) bytes * wavHeader->total_samples;
	unsigned long word32;

	*p++ = 'R';
	*p++ = 'I';
	*p++ = 'F';
	*p++ = 'F'; //RIFF 4

	word32 = (unsigned long) data_size + (44 - 8);
	*p++ = (unsigned char) (word32 >> 0);
	*p++ = (unsigned char) (word32 >> 8);
	*p++ = (unsigned char) (word32 >> 16);
	*p++ = (unsigned char) (word32 >> 24); //数据长度 4+4

	*p++ = 'W';
	*p++ = 'A';
	*p++ = 'V';
	*p++ = 'E'; //WAVE 4+4+4

	*p++ = 'f';
	*p++ = 'm';
	*p++ = 't';
	*p++ = ' '; //fmt 4+4+4+4

	*p++ = 0x10;
	*p++ = 0x00;
	*p++ = 0x00;
	*p++ = 0x00; //16  4+4+4+4+4=20

	*p++ = 0x01;
	*p++ = 0x00; //1   4+4+4+4+4+2=22

	*p++ = (unsigned char) (wavHeader->channels >> 0);
	*p++ = (unsigned char) (wavHeader->channels >> 8); //声道数   4+4+4+4+4+2+2=24

	word32 = (unsigned long) (wavHeader->samplerate + 0.5);
	*p++ = (unsigned char) (word32 >> 0);
	*p++ = (unsigned char) (word32 >> 8);
	*p++ = (unsigned char) (word32 >> 16);
	*p++ = (unsigned char) (word32 >> 24); //采样率   4+4+4+4+4+2+2+4=28

	word32 = wavHeader->samplerate * bytes * wavHeader->channels;
	*p++ = (unsigned char) (word32 >> 0);
	*p++ = (unsigned char) (word32 >> 8);
	*p++ = (unsigned char) (word32 >> 16);
	*p++ = (unsigned char) (word32 >> 24); //每秒所需字节数  4+4+4+4+4+2+2+4=32

	word32 = bytes * wavHeader->channels;
	*p++ = (unsigned char) (word32 >> 0);
	*p++ = (unsigned char) (word32 >> 8); // blockAlign（2个字节） 4+4+4+4+4+2+2+4+2=34
	//每个采样需要的字节数，计算公式：声道数 * 每个采样需要的bit  / 8

	*p++ = (unsigned char) (wavHeader->bits_per_sample >> 0);
	*p++ = (unsigned char) (wavHeader->bits_per_sample >> 8); //bitPerSample（2个字节）4+4+4+4+4+2+2+4+2+2=36
	//每个采样需要的bit数，一般为8或16

	*p++ = 'd';
	*p++ = 'a';
	*p++ = 't';
	*p++ = 'a'; //data 4+4+4+4+4+2+2+4+2+2+4=40

	word32 = (unsigned long) data_size;
	*p++ = (unsigned char) (word32 >> 0);
	*p++ = (unsigned char) (word32 >> 8);
	*p++ = (unsigned char) (word32 >> 16);
	*p++ = (unsigned char) (word32 >> 24);
	//size2（4个字节）   4+4+4+4+4+2+2+4+2+2+4+4=44
	// 录音数据的长度，不包括头部长度

	return 0;
}
/**
 * 获取一定时长的pcm数据.
 * @param inFile 输入文件.
 * @param start_pos_s 起始位置，单位为秒.
 * @param fmt len_s 持续长度，单位为秒.
 * @param channel_num  声道数.
 * @param bit_rate  比特率.
 * @param sample_rate  采样率.
 * @return 成功时获取的数据长度，失败时返回-1.
 */
int get_wav_stream(const char * inFile, double start_pos_s, double len_s,
		int channel_num, int bit_rate, int sample_rate) {

	int res_data_size = 0;

	FILE *fout;
	AVPacket packet;
	AVFrame* frame = NULL;
	AVFormatContext* formatContext = NULL;
	AVStream* audioStream = NULL;
	AVCodecContext* codecContext = NULL;
	SwrContext* swrContext = NULL;
	frame = av_frame_alloc();
	if (!frame) {
		LOGE("Error allocating the frame");
		goto end;
	}

	if (avformat_open_input(&formatContext, inFile, NULL, NULL) != 0) {
		LOGE("Error opening the file");
		goto end;
	}

	if (avformat_find_stream_info(formatContext, NULL) < 0) {
		LOGE("Error finding the stream info");
		goto end;
	}


	double end_pos_s = (start_pos_s + len_s); //请求的时长，单位为内部时长
	double duration = formatContext->duration / AV_TIME_BASE; // AV_TIME_BASE; //文件总时长

	if (start_pos_s > duration) {
		LOGE("Start time is biger than the total time.");
		goto end;
	}

	if ((end_pos_s > duration) || len_s == 0) {
		end_pos_s = duration;
		len_s = duration - start_pos_s;
	}
	LOGE("len_s= %f\n", len_s);



	unsigned int i;
	for (i = 0; i < formatContext->nb_streams; ++i) {
		if (formatContext->streams[i]->codec->codec_type
				== AVMEDIA_TYPE_AUDIO) {
			audioStream = formatContext->streams[i];
			break;
		}
	}
	if (audioStream == NULL) {
		LOGE("Could not find any audio stream in the file");
		goto end;
	}

	codecContext = audioStream->codec;
	codecContext->codec = avcodec_find_decoder(codecContext->codec_id);
	if (codecContext->codec == NULL) {
		LOGE("Couldn't find a proper decoder");
		goto end;
	} else if (avcodec_open2(codecContext, codecContext->codec, NULL) != 0) {
		LOGE("Couldn't open the context with the decoder");
		goto end;
	}

	/*START*************************START***********************START*/
	/*设置输出格式，为了未来jni调用的简单，所以都传的int，反正是自己用的，直观一些*/
	int64_t outChannelLayout;
	if (channel_num == 1) {
		outChannelLayout = AV_CH_LAYOUT_MONO;
	} else {
		outChannelLayout = AV_CH_LAYOUT_STEREO;
	}

	enum AVSampleFormat outSampleFormat;
	if (bit_rate == 32) {
		outSampleFormat = AV_SAMPLE_FMT_S32;
	} else if (bit_rate == 8) {
		outSampleFormat = AV_SAMPLE_FMT_U8;
	} else {
		outSampleFormat = AV_SAMPLE_FMT_S16;
	}

	int outSampleRate = sample_rate;

	swrContext = swr_alloc_set_opts(NULL, outChannelLayout, outSampleFormat,
			outSampleRate,
			av_get_default_channel_layout(codecContext->channels),
			codecContext->sample_fmt, codecContext->sample_rate, 0, NULL);
	if (swrContext == NULL) {
		LOGE( "Couldn't create the SwrContext");
		goto end;
	}
	if (swr_init(swrContext) != 0) {
		LOGE("Couldn't initialize the SwrContext");
		goto end;
	}
	/*END**************************END*************************END*/

	/*START**************************Seek to pos*************************START*/
	int seek_pos = start_pos_s * AV_TIME_BASE;
	LOGE("AV_TIME_BASE=%d seek_pos=%d\n", AV_TIME_BASE, seek_pos);
	if (formatContext->start_time != AV_NOPTS_VALUE)
		seek_pos += formatContext->start_time;
	if (av_seek_frame(formatContext, -1, seek_pos, AVSEEK_FLAG_BACKWARD) < 0) {
		LOGE(
				"%s,av_seek_frame() seek to %.3f failed!", __FUNCTION__, (double)seek_pos/AV_TIME_BASE);
		goto end;
	}
	/*END**************************seek END*************************END*/

	int total_samples = len_s * sample_rate;
	LOGE("total_samples=%d\n", total_samples);
	int res_bufsize = av_samples_get_buffer_size(0, channel_num, total_samples,
			outSampleFormat, 1);
	if(!res_bufsize){
		LOGE("av_samples_get_buffer_size error");
		goto end;
	}
	LOGE("res_bufsize=%d\n", res_bufsize);

	//for transfout = fopen("fwrite.wav", "w");
	//fseek(fout, 44, SEEK_SET);


	WavHeader h;
	h.bits_per_sample = bit_rate;
	h.channels = channel_num;
	h.samplerate = sample_rate;
	h.total_samples = res_bufsize;
	get_wav_header(header, &h);

	av_init_packet(&packet);
	while (av_read_frame(formatContext, &packet) == 0) {
		if (packet.stream_index == audioStream->index) {
			AVPacket decodingPacket = packet;

			while (decodingPacket.size > 0) {
				// Try to decode the packet into a frame
				int frameFinished = 0;
				int result = avcodec_decode_audio4(codecContext, frame,
						&frameFinished, &decodingPacket);

				if (result < 0 || frameFinished == 0) {
					break;
				}

				int dst_data_size = (outSampleRate / frame->sample_rate)
						* frame->nb_samples * channel_num * (bit_rate + 7) / 8
						+ (bit_rate + 7) / 8;

				uint8_t buffer[dst_data_size];
				uint8_t* pointers[dst_data_size];
				pointers[0] = &buffer[0];

				int numSamplesOut = swr_convert(swrContext, pointers,
						dst_data_size / channel_num,
						(const unsigned char**) frame->extended_data,
						frame->nb_samples);
				int dst_bufsize = av_samples_get_buffer_size(0, channel_num,
						numSamplesOut, outSampleFormat, 1);

				///filefwrite(pointers[0], 1, dst_bufsize, fout);

				int tmp = res_data_size + dst_bufsize;

				int sign = 1;
				if (tmp > res_bufsize) {
					dst_bufsize -= (tmp - res_bufsize);
					sign = 0;
				}

				resdatas = realloc(resdatas, res_data_size + dst_bufsize);
				int i = 0;
				for (i = res_data_size; i < res_data_size + dst_bufsize; i++) {
					resdatas[i] = buffer[i - res_data_size];
				}

				if (!sign) {
					break;
				}

				res_data_size += dst_bufsize;
				decodingPacket.size -= result;
				decodingPacket.data += result;
			}

		}
		av_free_packet(&packet);
	}
	fprintf(stderr, "count=%d\n", res_data_size);

	//fseek(fout, 0, SEEK_SET);
	///filefwrite(&header, 1, 44, fout);

	end: if (frame) {
		av_free(frame);
	}
	if (codecContext) {
		avcodec_close(codecContext);
	}
	if (formatContext) {
		avformat_close_input(&formatContext);
	}
	if (swrContext) {
		swr_free(&swrContext);
	}

	return res_data_size > 0 ? res_data_size : -1;

}

int main(int argc, char** argv) {
	char *file = "ping.mp3";
	double start = 0;
	double len = 12;
	int channel = 2;
	int bit = 16;
	int sample = 44100;

	int res_size = get_wav_stream(file, start, len, channel, bit, sample);
	FILE *out_file = fopen("out.pcm", "w");
	fwrite(resdatas, 1, res_size, out_file);
	return 1;
}

对了，还有一个数据结构。

typedef struct {

unsigned long samplerate;
unsigned int bits_per_sample;
unsigned int channels;
unsigned long total_samples;

} WavHeader;

ok，就酱紫了先。

yxm410

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
[FFMPEG-4]重构从mp3中获取pcm数据

一直以来都想把自己的代码打磨成工业级别的，但是奈何实力有限，憋了两天，还是这个一个烂烂的代码。还是又很多硬编码的地方，而且没经过充分测试。马上就放假回家了，想着一定要有一个结尾，不能把事情拖到寒假或者明年，就先把这个烂代码放上。算是一个总结。主要实现了从mp3中提取一段时长的pcm数据和wav头。这个需求是在做基于内容的音乐检索的时候遇到的。上代码。#include #incl
复制链接

扫一扫