ffmpeg MP3转wav

最新推荐文章于 2022-04-26 22:36:51 发布
bixinwei
最新推荐文章于 2022-04-26 22:36:51 发布
阅读量5.3k
点赞数 1
分类专栏： ffmpeg 文章标签： ffmpeg c语言
本文链接：https://blog.csdn.net/bixinwei22/article/details/78604360
版权
ffmpeg 专栏收录该内容
28 篇文章 8 订阅
订阅专栏
#include <stdio.h>

/*
音频mp3->wav
*/
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>

};
#define MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio  

typedef struct WAVE_HEADER{//共占12字节
    char         fccID[4]; //4bytes  RIFF
    unsigned   long    dwSize; //文件字节数-8    8=fccID(4)+dwSize(4）
    char         fccType[4]; //4bytes WAVE
}WAVE_HEADER;

typedef struct WAVE_FMT{//共占24字节
    char         fccSub1ID[4]; //4bytes "fmt "后面是个空格
    unsigned   long       dwSub1Size;//16,存储该子块的字节数（不含fccSub1ID和dwSub1Size这8个字节）
    unsigned   short     wFormatTag;//存储音频文件的编码格式，例如若为PCM则其存储值为1，若为其他非PCM格式的则有一定的压缩。
    unsigned   short    numChannels;//通道数，单通道(Mono)值为1，双通道(Stereo)值为2，等等
    unsigned   long     sampleRate;//采样率，如8k，44.1k等
    unsigned   long      byteRate;//每秒存储的字节数，其值=SampleRate * NumChannels * BitsPerSample/8
    unsigned   short     wBlockAlign;//块对齐大小，其值=NumChannels * BitsPerSample/8
    unsigned   short     uiBitsPerSample;//每个采样点(sample)的bit数，一般为8,16,32等。
}WAVE_FMT;

typedef struct WAVE_DATA{//共占8个字节
    char       fccSub2ID[4];//4bytes "data"
    unsigned long dwSub2Size;//内容为接下来的正式的数据部分的字节数，其值=NumSamples * NumChannels * BitsPerSample/8
}WAVE_DATA;

//wav文件大小字节数=dwSize+8=44+dwSub2Size  => dwSize=dwSub2Size+36

#define SIZE_WAV_HEADER 12
#define SIZE_WAV_FMT 24
#define SIZE_WAV_DATA 8


/*
numSamples:单个通道的总采样点数的总和值
return wav的文件头部指针
*/
char* set_wav_parm(int numSamples){
    WAVE_HEADER*   wavHEADER;
    WAVE_FMT*   wavFMT;
    WAVE_DATA*   wavDATA;

    wavHEADER = (WAVE_HEADER*)malloc(SIZE_WAV_HEADER);
    char *p = wavHEADER->fccID;
    wavFMT = (WAVE_FMT*)(p + SIZE_WAV_HEADER);
    wavDATA = (WAVE_DATA*)(p + SIZE_WAV_HEADER + SIZE_WAV_FMT);

    //head
    memcpy(wavHEADER->fccID, "RIFF", 4);
    memcpy(wavHEADER->fccType, "WAVE", 4);

    //fmt
    memcpy(wavFMT->fccSub1ID, "fmt ",4);

    wavFMT->dwSub1Size = 16;
    wavFMT->wFormatTag = 1;
    wavFMT->numChannels = 2;
    wavFMT->sampleRate = 44100;
    wavFMT->uiBitsPerSample = 16;
    wavFMT->byteRate = (wavFMT->sampleRate*wavFMT->numChannels*wavFMT->uiBitsPerSample) / 8;
    wavFMT->wBlockAlign = (wavFMT->numChannels*wavFMT->uiBitsPerSample) / 8;

    //data
    memcpy(wavDATA->fccSub2ID, "data", 4);
    wavDATA->dwSub2Size = (numSamples*wavFMT->numChannels*wavFMT->uiBitsPerSample) / 8;

    wavHEADER->dwSize = wavDATA->dwSub2Size + 36;

    return p;


}

void main(int argc, char* argv[]){

    const char* input_file = "skycity1.mp3";
    av_register_all();
    avformat_network_init();
    AVFormatContext *pFormatCtx = avformat_alloc_context();

    if (avformat_open_input(&pFormatCtx, input_file, NULL, NULL) != 0){
        printf("Couldn't open input stream.\n");
        return;
    }

    if (avformat_find_stream_info(pFormatCtx, NULL) < 0){
        printf("Couldn't find stream information.\n");
        return;
    }
    //av_dump_format(pFormatCtx, 0, input_file, false);
    //获取音频流索引位置
    int i = 0, audio_stream_idx = -1;
    for (; i < pFormatCtx->nb_streams; i++){
        if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){
            audio_stream_idx = i;
            break;
        }
    }
    if (audio_stream_idx == -1){
        printf("Didn't find a audio stream.\n");
        return;
    }

    AVCodecContext *codecCtx = pFormatCtx->streams[audio_stream_idx]->codec;
    AVCodec *codec = avcodec_find_decoder(codecCtx->codec_id);
    if (codec == NULL){
        printf("Codec not found.\n");
        return;
    }

    if (avcodec_open2(codecCtx, codec, NULL) < 0){
        printf("Could not open codec.\n");
        return;
    }

    AVSampleFormat in_sample_fmt = codecCtx->sample_fmt;//输入的采样格式
    int in_sample_rate = codecCtx->sample_rate;//输入的采样率
    int channels = codecCtx->channels;


    printf("采样率:%d ,声道数:%d\n", in_sample_rate, channels);

    //重采样设置参数
    AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16; //输出采样格式16bit PCM
    //输出采样率
    int out_sample_rate = 44100;
    //输出的声道布局（立体声）
    uint64_t out_ch_layout = AV_CH_LAYOUT_STEREO;
    SwrContext *swrCtx = swr_alloc();
    swrCtx = swr_alloc_set_opts(swrCtx,
        out_ch_layout, out_sample_fmt, out_sample_rate,
        av_get_default_channel_layout(codecCtx->channels), in_sample_fmt, in_sample_rate,
        0, NULL);
    /*
    av_opt_set_int(swrCtx, "in_channel_layout", AV_CH_LAYOUT_MONO, 0);
    av_opt_set_int(swrCtx, "in_sample_rate", in_sample_rate, 0);
    av_opt_set_sample_fmt(swrCtx, "in_sample_fmt", in_sample_fmt, 0);
    av_opt_set_int(swrCtx, "out_channel_layout", out_ch_layout, 0);
    av_opt_set_int(swrCtx, "out_sample_rate", out_sample_rate, 0);
    av_opt_set_sample_fmt(swrCtx, "out_sample_fmt", out_sample_fmt, 0);*/
    swr_init(swrCtx);

    //输出的声道个数
    int out_channel_nb = av_get_channel_layout_nb_channels(out_ch_layout);

    printf("输出pcm： 采样率:%d , 声道数：%d\n", out_sample_rate, out_channel_nb);

    const char* output_wav = "out.wav";
    FILE *fp_wav = fopen(output_wav, "wb+");

    //16bit 44100 PCM 数据
    uint8_t *out_buffer = (uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE * 2);
    int got_frame = 0, framecnt = 0;
    AVPacket *packet = (AVPacket *)av_malloc(sizeof(AVPacket));
    av_init_packet(packet);
    AVFrame *frame = av_frame_alloc();

    int totalsamples=0;
    fseek(fp_wav, SIZE_WAV_HEADER+SIZE_WAV_FMT+SIZE_WAV_DATA, 1);
    while (av_read_frame(pFormatCtx, packet) >= 0){

        if (packet->stream_index == audio_stream_idx){
            //解码
            avcodec_decode_audio4(codecCtx, frame, &got_frame, packet);
            if (got_frame > 0){
                //解码得到的Frame数据，转成PCM
                swr_convert(swrCtx, &out_buffer, MAX_AUDIO_FRAME_SIZE, (const uint8_t **)frame->data, frame->nb_samples);
                //printf("index:%5d\t pts:%lld\t packet size:%d\n", framecnt, packet->pts, packet->size);
                //Write PCM

                totalsamples += frame->nb_samples;
                //音频文件字节大小= 采用率*时长*通道数*采样位数/8
                //计算一帧音频帧占用的字节数  通道数 * 采样点数* 采样位数/8 
                int out_buffer_size = av_samples_get_buffer_size(NULL, out_channel_nb,
                    frame->nb_samples, out_sample_fmt, 1);
                //frame->nb_samples 当前帧的一个通道的采样点数（经测试，可能有的帧值不等） 和codecCtx->frame_size（音频帧的一个通道的采样点数，是固定值）  
                //虽然两者都是表示音频的采样点数，但是我的理解，这里用frame->nb_samples更加科学。
                fwrite(out_buffer, 1, out_buffer_size, fp_wav);

                framecnt++;

            }
        }

        av_free_packet(packet);
    }
    swr_free(&swrCtx);
    av_frame_free(&frame);
    av_free(out_buffer);

    char *p = set_wav_parm(totalsamples);
    rewind(fp_wav);//文件指针回到头部
    fwrite(p, 1,SIZE_WAV_HEADER + SIZE_WAV_FMT + SIZE_WAV_DATA,fp_wav);//size取1个字节没问题，可能跟p是char类型有关，取大了，报错
    fclose(fp_wav);
    printf("success");
    avcodec_close(codecCtx);
    avformat_close_input(&pFormatCtx);
    getchar();

}