实验6——MPEG layer2音频编码

最新推荐文章于 2022-07-09 01:00:00 发布

就想改个名字为什么这么难

最新推荐文章于 2022-07-09 01:00:00 发布

阅读量456

点赞数

本文链接：https://blog.csdn.net/weixin_52189060/article/details/117716110

版权

一.实验要求

输出音频的采样率和目标码率，选择三个不同特性的音频文件（噪声、音乐、混合）进行测试，输出某个视频帧的相关信息：该帧所分配的比特数、该帧的比例因子、该帧的比特分配结果

二.实验原理

1.MPEG音频压缩系统流程图：

三.实验过程

1.main函数的解读


  while (get_audio (musicin, buffer, num_samples, nch, &header) > 0) {//每次循环操作一个帧
    if (glopts.verbosity > 1)
      if (++frameNum % 10 == 0)
	fprintf (stderr, "[%4u]\r", frameNum);
    fflush (stderr);
    win_buf[0] = &buffer[0][0];
    win_buf[1] = &buffer[1][0];

    adb = available_bits (&header, &glopts);
    lg_frame = adb / 8;//一帧的字节数
    if (header.dab_extension) {
      /* in 24 kHz we always have 4 bytes */
      if (header.sampling_frequency == 1)
	header.dab_extension = 4;
/* You must have one frame in memory if you are in DAB mode                 */
/* in conformity of the norme ETS 300 401 http://www.etsi.org               */
      /* see bitstream.c            */
      if (frameNum == 1)
	minimum = lg_frame + MINIMUM;
      adb -= header.dab_extension * 8 + header.dab_length * 8 + 16;
	}

    {
      int gr, bl, ch;
      /* New polyphase filter
	 Combines windowing and filtering. Ricardo Feb'03 */
      for( gr = 0; gr < 3; gr++ )
	for ( bl = 0; bl < 12; bl++ )
	  for ( ch = 0; ch < nch; ch++ )
	    WindowFilterSubband( &buffer[ch][gr * 12 * 32 + 32 * bl], ch,
				 &(*sb_sample)[ch][gr][bl][0] );
    }

#ifdef REFERENCECODE
    {
      /* Old code. left here for reference */
      int gr, bl, ch;
      for (gr = 0; gr < 3; gr++)
	for (bl = 0; bl < SCALE_BLOCK; bl++)
	  for (ch = 0; ch < nch; ch++) {
	    window_subband (&win_buf[ch], &(*win_que)[ch][0], ch);
	    filter_subband (&(*win_que)[ch][0], &(*sb_sample)[ch][gr][bl][0]);
	  }
    }
#endif


#ifdef NEWENCODE
    scalefactor_calc_new(*sb_sample, scalar, nch, frame.sblimit);
    find_sf_max (scalar, &frame, max_sc);
    if (frame.actual_mode == MPG_MD_JOINT_STEREO) {
      /* this way we calculate more mono than we need */
      /* but it is cheap */
      combine_LR_new (*sb_sample, *j_sample, frame.sblimit);
      scalefactor_calc_new (j_sample, &j_scale, 1, frame.sblimit);
    }
#else
    scale_factor_calc (*sb_sample, scalar, nch, frame.sblimit);//比例因子提取
    pick_scale (scalar, &frame, max_sc);//比例因子选择
    if (frame.actual_mode == MPG_MD_JOINT_STEREO) {
      /* this way we calculate more mono than we need */
      /* but it is cheap */
      combine_LR (*sb_sample, *j_sample, frame.sblimit);
      scale_factor_calc (j_sample, &j_scale, 1, frame.sblimit);
    }
#endif


	//心理声学模型
    if ((glopts.quickmode == TRUE) && (++psycount % glopts.quickcount != 0)) {
      /* We're using quick mode, so we're only calculating the model every
         'quickcount' frames. Otherwise, just copy the old ones across */
      for (ch = 0; ch < nch; ch++) {
	for (sb = 0; sb < SBLIMIT; sb++)
	  smr[ch][sb] = smrdef[ch][sb];
      }
    } else {
      /* calculate the psymodel */
      switch (model) {
      case -1:
	psycho_n1 (smr, nch);
	break;
      case 0:	/* Psy Model A */
	psycho_0 (smr, nch, scalar, (FLOAT) s_freq[header.version][header.sampling_frequency] * 1000);	
	break;
      case 1:
	psycho_1 (buffer, max_sc, smr, &frame);//进入这个模型
	break;
      case 2:
	for (ch = 0; ch < nch; ch++) {
	  psycho_2 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], //snr32,
		     (FLOAT) s_freq[header.version][header.sampling_frequency] *
		     1000, &glopts);
	}
	break;
      case 3:
	/* Modified psy model 1 */
	psycho_3 (buffer, max_sc, smr, &frame, &glopts);
	break;
      case 4:
	/* Modified Psycho Model 2 */
	for (ch = 0; ch < nch; ch++) {
	  psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,
		     (FLOAT) s_freq[header.version][header.sampling_frequency] *
		     1000, &glopts);
	}
	break;	
      case 5:
	/* Model 5 comparse model 1 and 3 */
	psycho_1 (buffer, max_sc, smr, &frame);
	fprintf(stdout,"1 ");
	smr_dump(smr,nch);
	psycho_3 (buffer, max_sc, smr, &frame, &glopts);
	fprintf(stdout,"3 ");
	smr_dump(smr,nch);
	break;
      case 6:
	/* Model 6 compares model 2 and 4 */
	for (ch = 0; ch < nch; ch++) 
	  psycho_2 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], //snr32,
		    (FLOAT) s_freq[header.version][header.sampling_frequency] *
		    1000, &glopts);
	fprintf(stdout,"2 ");
	smr_dump(smr,nch);
	for (ch = 0; ch < nch; ch++) 
	  psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,
		     (FLOAT) s_freq[header.version][header.sampling_frequency] *
		     1000, &glopts);
	fprintf(stdout,"4 ");
	smr_dump(smr,nch);
	break;
      case 7:
	fprintf(stdout,"Frame: %i\n",frameNum);
	/* Dump the SMRs for all models */	
	psycho_1 (buffer, max_sc, smr, &frame);
	fprintf(stdout,"1");
	smr_dump(smr, nch);
	psycho_3 (buffer, max_sc, smr, &frame, &glopts);
	fprintf(stdout,"3");
	smr_dump(smr,nch);
	for (ch = 0; ch < nch; ch++) 
	  psycho_2 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], //snr32,
		    (FLOAT) s_freq[header.version][header.sampling_frequency] *
		    1000, &glopts);
	fprintf(stdout,"2");
	smr_dump(smr,nch);
	for (ch = 0; ch < nch; ch++) 
	  psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,
		     (FLOAT) s_freq[header.version][header.sampling_frequency] *
		     1000, &glopts);
	fprintf(stdout,"4");
	smr_dump(smr,nch);
	break;
      case 8:
	/* Compare 0 and 4 */	
	psycho_n1 (smr, nch);
	fprintf(stdout,"0");
	smr_dump(smr,nch);

	for (ch = 0; ch < nch; ch++) 
	  psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,
		     (FLOAT) s_freq[header.version][header.sampling_frequency] *
		     1000, &glopts);
	fprintf(stdout,"4");
	smr_dump(smr,nch);
	break;
      default:
	fprintf (stderr, "Invalid psy model specification: %i\n", model);
	exit (0);
      }

      if (glopts.quickmode == TRUE)
	/* copy the smr values and reuse them later */
	for (ch = 0; ch < nch; ch++) {
	  for (sb = 0; sb < SBLIMIT; sb++)
	    smrdef[ch][sb] = smr[ch][sb];
	}

      if (glopts.verbosity > 4) 
	smr_dump(smr, nch);
     
      


    }

#ifdef NEWENCODE
    sf_transmission_pattern (scalar, scfsi, &frame);
    main_bit_allocation_new (smr, scfsi, bit_alloc, &adb, &frame, &glopts);
    //main_bit_allocation (smr, scfsi, bit_alloc, &adb, &frame, &glopts);

    if (error_protection)
      CRC_calc (&frame, bit_alloc, scfsi, &crc);

    write_header (&frame, &bs);
    //encode_info (&frame, &bs);
    if (error_protection)
      putbits (&bs, crc, 16);
    write_bit_alloc (bit_alloc, &frame, &bs);
    //encode_bit_alloc (bit_alloc, &frame, &bs);
    write_scalefactors(bit_alloc, scfsi, scalar, &frame, &bs);
    //encode_scale (bit_alloc, scfsi, scalar, &frame, &bs);
    subband_quantization_new (scalar, *sb_sample, j_scale, *j_sample, bit_alloc,
    			  *subband, &frame);
    //subband_quantization (scalar, *sb_sample, j_scale, *j_sample, bit_alloc,
    //	  *subband, &frame);
    write_samples_new(*subband, bit_alloc, &frame, &bs);
    //sample_encoding (*subband, bit_alloc, &frame, &bs);
#else
    transmission_pattern (scalar, scfsi, &frame);
    main_bit_allocation (smr, scfsi, bit_alloc, &adb, &frame, &glopts);//动态比特分配，分配的bit数存储在bit_alloc中
    if (error_protection)
      CRC_calc (&frame, bit_alloc, scfsi, &crc);
    encode_info (&frame, &bs);
    if (error_protection)
      encode_CRC (crc, &bs);
    encode_bit_alloc (bit_alloc, &frame, &bs);
    encode_scale (bit_alloc, scfsi, scalar, &frame, &bs);
    subband_quantization (scalar, *sb_sample, j_scale, *j_sample, bit_alloc,
			  *subband, &frame);//量化
    sample_encoding (*subband, bit_alloc, &frame, &bs);
#endif


    /* If not all the bits were used, write out a stack of zeros */
    for (i = 0; i < adb; i++)
      put1bit (&bs, 0);
    if (header.dab_extension) {
      /* Reserve some bytes for X-PAD in DAB mode */
      putbits (&bs, 0, header.dab_length * 8);
      
      for (i = header.dab_extension - 1; i >= 0; i--) {
	CRC_calcDAB (&frame, bit_alloc, scfsi, scalar, &crc, i);
	/* this crc is for the previous frame in DAB mode  */
	if (bs.buf_byte_idx + lg_frame < bs.buf_size)
	  bs.buf[bs.buf_byte_idx + lg_frame] = crc;
	/* reserved 2 bytes for F-PAD in DAB mode  */
	putbits (&bs, crc, 8);
      }
      putbits (&bs, 0, 16);
    }

    frameBits = sstell (&bs) - sentBits;

    if (frameBits % 8) {	/* a program failure */
      fprintf (stderr, "Sent %ld bits = %ld slots plus %ld\n", frameBits,
	       frameBits / 8, frameBits % 8);
      fprintf (stderr, "If you are reading this, the program is broken\n");
      fprintf (stderr, "email [mfc at NOTplanckenerg.com] without the NOT\n");
      fprintf (stderr, "with the command line arguments and other info\n");
      exit (0);
    }

    sentBits += frameBits;


  }//while循环结束

main函数的主体为该while循环，每次循环操作一个帧，get_audio的作用为从码流中读出一个帧的数据，判断码流为单声道还是双声道，将1声道读入buffer[0][]中，将2声道读入buffer[1][]。scale_factor_calc函数为比例因子提取函数，pick_scale函数为比例因子选择函数。从心理声学模型开始主要为流程图的下分枝的实现。main_bit_allocation函数为实现动态比特分配的函数，将分配的比特数存储在bit_alloc中。

2.scale_factor_calc函数

void scale_factor_calc (double sb_sample[][3][SCALE_BLOCK][SBLIMIT],
			unsigned int scalar[][3][SBLIMIT], int nch,
			int sblimit)
{
  /* Optimized to use binary search instead of linear scan through the
     scalefactor table; guarantees to find scalefactor in only 5
     jumps/comparisons and not in {0 (lin. best) to 63 (lin. worst)}.
     Scalefactors for subbands > sblimit are no longer computed.
     Uses a single sblimit-loop.
     Patrick De Smet Oct 1999.
   */
  int k, t;
  /* Using '--' loops to avoid possible "cmp value + bne/beq" compiler  */
  /* inefficiencies. Below loops should compile to "bne/beq" only code  */
  for (k = nch; k--;)
    for (t = 3; t--;) {
      int i;
      for (i = sblimit; i--;) {
	int j;
	unsigned int l;
	register double temp;
	unsigned int scale_fac;
	/* Determination of max. over each set of 12 subband samples:  */
	/* PDS TODO: maybe this could/should ??!! be integrated into   */
	/* the subband filtering routines?                             */
	register double cur_max = fabs (sb_sample[k][t][SCALE_BLOCK - 1][i]);//fabs()取绝对值
	for (j = SCALE_BLOCK - 1; j--;) {
	  if ((temp = fabs (sb_sample[k][t][j][i])) > cur_max)
	    cur_max = temp;
	}//找出每个子带中的最大值
	/* PDS: binary search in the scalefactor table: */
	/* This is the real speed up: */
	for (l = 16, scale_fac = 32; l; l >>= 1) {//16转换为二进制为10000，移位判断可进行五次判断，multiple共64个值，进行五次判断刚好可以找到multiple表中与cur_max最接近的值
	  if (cur_max <= multiple[scale_fac])
	    scale_fac += l;
	  else
	    scale_fac -= l;
	}
	if (cur_max > multiple[scale_fac])//在multiple表中寻找的是最接近且不小于cur_max的值
	  scale_fac--;
	scalar[k][t][i] = scale_fac;//scalar中存储的是在multiple表中的索引号，而非实际比例因子
      }
    }
}

一帧音频数据一共有两个声道，每个声道有32个子带，每个子带对应有三组数据，每组数据有12个。scale_factor_calc函数的具体操作过程是找出每个子带每组12个数据中的最大值，将其与比例因子表中的数进行比较，将表中与其最接近的值作为该组数据的比例因子。具体实现过程可参考注释内容。

四.实验结果

1.添加的代码：

//添加
  int bit_total=0;
  char *output_filename;
  FILE *fp_output;
  output_filename="C:\\Users\\admin\\Desktop\\实验6_MPG音频编码\\m2aenc\\test\\test.txt";
  fp_output=fopen(output_filename,"wb");

//添加代码
	if(frameNum==10)//选取第11帧
	{
		//输出规定码率及采样率
		fprintf(fp_output,"规定的目标码率为%dkbit/s\r\n",bitrate[header.version][header.bitrate_index]);
		fprintf(fp_output,"采样率为%fkHz\r\n",s_freq[header.version][header.sampling_frequency]);
		//输出数据帧分配的总bit数,及bit分配
		for(i=0;i<2;i++)
		{
			for(j=0;j<32;j++)
			{
				bit_total+=bit_alloc[i][j];
				fprintf(fp_output,"%d声道%d子带分配的bit数为%d\r\n",i+1,j+1,bit_alloc[i][j]);
			}
		}
		fprintf(fp_output,"数据帧分配的总bit数:%dbit\r\n",bit_total);
		//输出该帧的比例因子
		for(i=0;i<2;i++)
		{
			for(j=0;j<3;j++)
			{
				int k;
				for(k=0;k<32;k++)
				{
					fprintf(fp_output,"%d声道第%d组数据第%d个声带的比例因子为%d\r\n",i+1,j+1,k+1,scalar[i][j][k]);
				}
			}
		}

2.输出的文件：

音乐：