数据压缩第八次作业

最新推荐文章于 2024-09-15 01:19:00 发布

Halo_G

最新推荐文章于 2024-09-15 01:19:00 发布

阅读量99

点赞数

文章标签：大数据

本文链接：https://blog.csdn.net/Halo_G/article/details/125568631

版权

一、理解感知音频编码的设计思想
1.两条线

①主干：子带分解

以1152个样本为单位，输入到滤波器组中进行32个子带的分解，即以32个样本为一个时间窗口，做36次自带分解，形成36个样本，形成以12个样本为单位的3个块（以12个样本为观察组，之间的差别不大），以此进行比例因子的提取和选择，通过频域分析线路的辅助，进行后续的量化等工作。

②辅助：频域分析

通过1024点的FFT输出FFT结果，和比例因子共同输入到心理声学模型中，输出SMR并生成听阈曲线，再结合码率限制进行动态比特分配，使整帧和每个子带的总噪声-掩蔽比最小，最终结合主干线路进行装帧。

2.时-频分析的矛盾

32个子带样本在时域上的分析较为清晰，不能精确地反映人耳的听觉特性，且如果频域进行32点FFT，分辨率不高；而FFT也仍然存在问题，其只能说明哪些频率成分存在，不能说明这些频率成分在什么时间出现。

二、理解心理声学模型的实现过程
1.临界频带的概念

临界频带是指当某个纯音被以它为中心频率、且具有一定带宽的连续噪声所掩蔽时，如果该纯音刚好被听到时的功率等于这一频带内的噪声功率，这个带宽为临界频带宽度。

2.掩蔽值计算的思路

音频信号可以分解为乐音和噪声，两者共有四种掩蔽组合，即“乐音-乐音”、“乐音-噪声”、“噪声-乐音”、“噪声-噪声”。另有安静时的阈值为绝对阈值。

某一频率点i的总掩蔽阈值可通过该点的绝对掩蔽阈值与单独掩蔽阈值相加获得。

第一项为绝对阈值、第二项为乐音掩蔽、第三项为噪音掩蔽。

三、实验代码

int main (int argc, char **argv)
{
typedef double SBS[2][3][SCALE_BLOCK][SBLIMIT];
SBS *sb_sample;
typedef double JSBS[3][SCALE_BLOCK][SBLIMIT];
JSBS *j_sample;
typedef double IN[2][HAN_SIZE];
IN *win_que;
typedef unsigned int SUB[2][3][SCALE_BLOCK][SBLIMIT];
SUB *subband;

//add sentence
FILE* outfile = NULL;
unsigned char* outtxt = NULL;
outfile = fopen("output2.txt", "w");
//add finish

frame_info frame;
frame_header header;
char original_file_name[MAX_NAME_SIZE];
char encoded_file_name[MAX_NAME_SIZE];
short **win_buf;
static short buffer[2][1152];
static unsigned int bit_alloc[2][SBLIMIT], scfsi[2][SBLIMIT];
static unsigned int scalar[2][3][SBLIMIT], j_scale[3][SBLIMIT];
static double smr[2][SBLIMIT], lgmin[2][SBLIMIT], max_sc[2][SBLIMIT];
// FLOAT snr32[32];
short sam[2][1344];       /* was [1056]; */
int model, nch, error_protection;
static unsigned int crc;
int sb, ch, adb;
unsigned long frameBits, sentBits = 0;
unsigned long num_samples;
int lg_frame;
int i;

/* Used to keep the SNR values for the fast/quick psy models */
static FLOAT smrdef[2][32];

static int psycount = 0;
extern int minimum;

time_t start_time, end_time;
int total_time;

sb_sample = (SBS *) mem_alloc (sizeof (SBS), "sb_sample");
j_sample = (JSBS *) mem_alloc (sizeof (JSBS), "j_sample");
win_que = (IN *) mem_alloc (sizeof (IN), "Win_que");
subband = (SUB *) mem_alloc (sizeof (SUB), "subband");
win_buf = (short **) mem_alloc (sizeof (short *) * 2, "win_buf");

/* clear buffers */
memset ((char *) buffer, 0, sizeof (buffer));
memset ((char *) bit_alloc, 0, sizeof (bit_alloc));
memset ((char *) scalar, 0, sizeof (scalar));
memset ((char *) j_scale, 0, sizeof (j_scale));
memset ((char *) scfsi, 0, sizeof (scfsi));
memset ((char *) smr, 0, sizeof (smr));
memset ((char *) lgmin, 0, sizeof (lgmin));
memset ((char *) max_sc, 0, sizeof (max_sc));
//memset ((char *) snr32, 0, sizeof (snr32));
memset ((char *) sam, 0, sizeof (sam));

global_init ();

header.extension = 0;
frame.header = &header;
frame.tab_num = -1;       /* no table loaded */
frame.alloc = NULL;
header.version = MPEG_AUDIO_ID;   /* Default: MPEG-1 */

total_time = 0;

time(&start_time);

programName = argv[0];
if (argc == 1)       /* no command-line args */
short_usage ();
else
parse_args (argc, argv, &frame, &model, &num_samples, original_file_name,
       encoded_file_name);
print_config (&frame, &model, original_file_name, encoded_file_name);

/* this will load the alloc tables and do some other stuff */
hdr_to_frps (&frame);
nch = frame.nch;
error_protection = header.error_protection;

while (get_audio (musicin, buffer, num_samples, nch, &header) > 0) {
if (glopts.verbosity > 1)
if (++frameNum % 10 == 0)
   fprintf (stderr, "[%4u]\r", frameNum);
fflush (stderr);
win_buf[0] = &buffer[0][0];
win_buf[1] = &buffer[1][0];

adb = available_bits (&header, &glopts);
lg_frame = adb / 8;
if (header.dab_extension) {
/* in 24 kHz we always have 4 bytes */
if (header.sampling_frequency == 1)
   header.dab_extension = 4;
/* You must have one frame in memory if you are in DAB mode */
/* in conformity of the norme ETS 300 401 http://www.etsi.org */
/* see bitstream.c */
if (frameNum == 1)
   minimum = lg_frame + MINIMUM;
adb -= header.dab_extension * 8 + header.dab_length * 8 + 16;
}

{
int gr, bl, ch;
/* New polyphase filter
   Combines windowing and filtering. Ricardo Feb'03 */
for( gr = 0; gr < 3; gr++ )
   for ( bl = 0; bl < 12; bl++ )
   for ( ch = 0; ch < nch; ch++ )
   WindowFilterSubband( &buffer[ch][gr * 12 * 32 + 32 * bl], ch,
               &(*sb_sample)[ch][gr][bl][0] );
}

#ifdef REFERENCECODE
{
/* Old code. left here for reference */
int gr, bl, ch;
for (gr = 0; gr < 3; gr++)
   for (bl = 0; bl < SCALE_BLOCK; bl++)
   for (ch = 0; ch < nch; ch++) {
   window_subband (&win_buf[ch], &(*win_que)[ch][0], ch);
   filter_subband (&(*win_que)[ch][0], &(*sb_sample)[ch][gr][bl][0]);
   }
}
#endif

#ifdef NEWENCODE
scalefactor_calc_new(*sb_sample, scalar, nch, frame.sblimit);
find_sf_max (scalar, &frame, max_sc);
if (frame.actual_mode == MPG_MD_JOINT_STEREO) {
/* this way we calculate more mono than we need */
/* but it is cheap */
combine_LR_new (*sb_sample, *j_sample, frame.sblimit);
scalefactor_calc_new (j_sample, &j_scale, 1, frame.sblimit);
}
#else
scale_factor_calc (*sb_sample, scalar, nch, frame.sblimit);
pick_scale (scalar, &frame, max_sc);
if (frame.actual_mode == MPG_MD_JOINT_STEREO) {
/* this way we calculate more mono than we need */
/* but it is cheap */
combine_LR (*sb_sample, *j_sample, frame.sblimit);
scale_factor_calc (j_sample, &j_scale, 1, frame.sblimit);
}
#endif

if ((glopts.quickmode == TRUE) && (++psycount % glopts.quickcount != 0)) {
/* We're using quick mode, so we're only calculating the model every
'quickcount' frames. Otherwise, just copy the old ones across */
for (ch = 0; ch < nch; ch++) {
   for (sb = 0; sb < SBLIMIT; sb++)
   smr[ch][sb] = smrdef[ch][sb];
}
} else {
/* calculate the psymodel */
switch (model) {
case -1:
   psycho_n1 (smr, nch);
   break;
case 0:   /* Psy Model A */
   psycho_0 (smr, nch, scalar, (FLOAT) s_freq[header.version][header.sampling_frequency] * 1000);
   break;
case 1:
   psycho_1 (buffer, max_sc, smr, &frame);
   break;
case 2:
   for (ch = 0; ch < nch; ch++) {
   psycho_2 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], //snr32,
       (FLOAT) s_freq[header.version][header.sampling_frequency] *
       1000, &glopts);
   }
   break;
case 3:
   /* Modified psy model 1 */
   psycho_3 (buffer, max_sc, smr, &frame, &glopts);
   break;
case 4:
   /* Modified Psycho Model 2 */
   for (ch = 0; ch < nch; ch++) {
   psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,
       (FLOAT) s_freq[header.version][header.sampling_frequency] *
       1000, &glopts);
   }
   break;
case 5:
   /* Model 5 comparse model 1 and 3 */
   psycho_1 (buffer, max_sc, smr, &frame);
   fprintf(stdout,"1 ");
   smr_dump(smr,nch);
   psycho_3 (buffer, max_sc, smr, &frame, &glopts);
   fprintf(stdout,"3 ");
   smr_dump(smr,nch);
   break;
case 6:
   /* Model 6 compares model 2 and 4 */
   for (ch = 0; ch < nch; ch++)
   psycho_2 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], //snr32,
       (FLOAT) s_freq[header.version][header.sampling_frequency] *
       1000, &glopts);
   fprintf(stdout,"2 ");
   smr_dump(smr,nch);
   for (ch = 0; ch < nch; ch++)
   psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,
       (FLOAT) s_freq[header.version][header.sampling_frequency] *
       1000, &glopts);
   fprintf(stdout,"4 ");
   smr_dump(smr,nch);
   break;
case 7:
   fprintf(stdout,"Frame: %i\n",frameNum);
   /* Dump the SMRs for all models */
   psycho_1 (buffer, max_sc, smr, &frame);
   fprintf(stdout,"1");
   smr_dump(smr, nch);
   psycho_3 (buffer, max_sc, smr, &frame, &glopts);
   fprintf(stdout,"3");
   smr_dump(smr,nch);
   for (ch = 0; ch < nch; ch++)
   psycho_2 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], //snr32,
       (FLOAT) s_freq[header.version][header.sampling_frequency] *
       1000, &glopts);
   fprintf(stdout,"2");
   smr_dump(smr,nch);
   for (ch = 0; ch < nch; ch++)
   psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,
       (FLOAT) s_freq[header.version][header.sampling_frequency] *
       1000, &glopts);
   fprintf(stdout,"4");
   smr_dump(smr,nch);
   break;
case 8:
   /* Compare 0 and 4 */
   psycho_n1 (smr, nch);
   fprintf(stdout,"0");
   smr_dump(smr,nch);

   for (ch = 0; ch < nch; ch++)
   psycho_4 (&buffer[ch][0], &sam[ch][0], ch, &smr[ch][0], // snr32,
       (FLOAT) s_freq[header.version][header.sampling_frequency] *
       1000, &glopts);
   fprintf(stdout,"4");
   smr_dump(smr,nch);
   break;
default:
   fprintf (stderr, "Invalid psy model specification: %i\n", model);
   exit (0);
}

if (glopts.quickmode == TRUE)
   /* copy the smr values and reuse them later */
   for (ch = 0; ch < nch; ch++) {
   for (sb = 0; sb < SBLIMIT; sb++)
   smrdef[ch][sb] = smr[ch][sb];
   }

if (glopts.verbosity > 4)
   smr_dump(smr, nch);



}

#ifdef NEWENCODE
sf_transmission_pattern (scalar, scfsi, &frame);
main_bit_allocation_new (smr, scfsi, bit_alloc, &adb, &frame, &glopts);
//main_bit_allocation (smr, scfsi, bit_alloc, &adb, &frame, &glopts);

if (error_protection)
CRC_calc (&frame, bit_alloc, scfsi, &crc);

write_header (&frame, &bs);
//encode_info (&frame, &bs);
if (error_protection)
putbits (&bs, crc, 16);
write_bit_alloc (bit_alloc, &frame, &bs);
//encode_bit_alloc (bit_alloc, &frame, &bs);
write_scalefactors(bit_alloc, scfsi, scalar, &frame, &bs);
//encode_scale (bit_alloc, scfsi, scalar, &frame, &bs);
subband_quantization_new (scalar, *sb_sample, j_scale, *j_sample, bit_alloc,
           *subband, &frame);
//subband_quantization (scalar, *sb_sample, j_scale, *j_sample, bit_alloc,
//   *subband, &frame);
write_samples_new(*subband, bit_alloc, &frame, &bs);
//sample_encoding (*subband, bit_alloc, &frame, &bs);
#else
transmission_pattern (scalar, scfsi, &frame);
main_bit_allocation (smr, scfsi, bit_alloc, &adb, &frame, &glopts);
if (error_protection)
CRC_calc (&frame, bit_alloc, scfsi, &crc);
encode_info (&frame, &bs);
if (error_protection)
encode_CRC (crc, &bs);
encode_bit_alloc (bit_alloc, &frame, &bs);
encode_scale (bit_alloc, scfsi, scalar, &frame, &bs);
subband_quantization (scalar, *sb_sample, j_scale, *j_sample, bit_alloc,
           *subband, &frame);
sample_encoding (*subband, bit_alloc, &frame, &bs);
#endif

/* If not all the bits were used, write out a stack of zeros */
for (i = 0; i < adb; i++)
put1bit (&bs, 0);
if (header.dab_extension) {
/* Reserve some bytes for X-PAD in DAB mode */
putbits (&bs, 0, header.dab_length * 8);

for (i = header.dab_extension - 1; i >= 0; i--) {
   CRC_calcDAB (&frame, bit_alloc, scfsi, scalar, &crc, i);
   /* this crc is for the previous frame in DAB mode */
   if (bs.buf_byte_idx + lg_frame < bs.buf_size)
   bs.buf[bs.buf_byte_idx + lg_frame] = crc;
   /* reserved 2 bytes for F-PAD in DAB mode */
   putbits (&bs, crc, 8);
}
putbits (&bs, 0, 16);
}

frameBits = sstell (&bs) - sentBits;

if (frameBits % 8) {   /* a program failure */
fprintf (stderr, "Sent %ld bits = %ld slots plus %ld\n", frameBits,
   frameBits / 8, frameBits % 8);
fprintf (stderr, "If you are reading this, the program is broken\n");
fprintf (stderr, "email [mfc at NOTplanckenerg.com] without the NOT\n");
fprintf (stderr, "with the command line arguments and other info\n");
exit (0);
}

sentBits += frameBits;
}

close_bit_stream_w (&bs);

if ((glopts.verbosity > 1) && (glopts.vbr == TRUE)) {
int i;
#ifdef NEWENCODE
extern int vbrstats_new[15];
#else
extern int vbrstats[15];
#endif
fprintf (stdout, "VBR stats:\n");
for (i = 1; i < 15; i++)
fprintf (stdout, "%4i ", bitrate[header.version][i]);
fprintf (stdout, "\n");
for (i = 1; i < 15; i++)
#ifdef NEWENCODE
fprintf (stdout,"%4i ",vbrstats_new[i]);
#else
fprintf (stdout, "%4i ", vbrstats[i]);
#endif
fprintf (stdout, "\n");
}

fprintf (stderr,
   "Avg slots/frame = %.3f; b/smp = %.2f; bitrate = %.3f kbps\n",
   (FLOAT) sentBits / (frameNum * 8),
   (FLOAT) sentBits / (frameNum * 1152),
   (FLOAT) sentBits / (frameNum * 1152) *
   s_freq[header.version][header.sampling_frequency]);

if (fclose (musicin) != 0) {
fprintf (stderr, "Could not close \"%s\".\n", original_file_name);
exit (2);
}

fprintf (stderr, "\nDone\n");

time(&end_time);
total_time = end_time - start_time;
printf("total time is %d\n", total_time);

//add new sentence
fprintf(outfile, "音频声道数：%d\n", nch);
fprintf(outfile, "观测第%d帧\n", frameNum);
fprintf(outfile, "本帧所分配比特：%d bits\n", adb);
fprintf(outfile, "该帧比例因子和比特分配结果如下：\n");
for (ch = 0; ch < nch; ch++)
{
   fprintf(outfile, "声道%2d\n", ch + 1);
   for (sb = 0; sb < frame.sblimit; sb++)
   {
       fprintf(outfile, "子带[%2d]比例因子：\t", sb + 1);
       for (int gr = 0; gr < 3; gr++)
       {
           fprintf(outfile, "%2d\t", scalar[ch][gr][sb]);
       }
       fprintf(outfile, "\n");
       fprintf(outfile, "子带[%2d]比特分配表：\t%2d\n", sb + 1, bit_alloc[ch][sb]);
       fprintf(outfile, "\n");
   }
}
//add finish

exit (0);
}
四、实验结果