音频分帧-重叠overlap

语音是实时变化的,在不重叠的帧之间插入一些帧来提取特征参数
分帧效果


%% audio enframe
clc;
clear all;
close all;
[audiodata,fs]=audioread("test.wav");
 % we can divided into fn enframe:fn=(N-overlap)/inc
 % startIndex= 1, inc, 2*inc....
 audiolen=length(audiodata);
 audiot=audiolen/fs;
 time= (0:audiolen-1)/fs;
 wlen=fs*0.5;%there should be  0.05 but for figure
 inc=0.5*wlen;
 fn=floor((audiolen-(wlen-inc))/inc);
 enframeaudio=zeros(wlen,fn);
 figure(1);
 subplot(fn+1,1,1);
 plot(time,audiodata);
 title("原始音频时域图形");
 for i=1:fn
     startindex=inc*(i-1)+1;
     enframeaudio(:,i)=audiodata(startindex:startindex+wlen-1);
     enframetime=(startindex:startindex+wlen-1)/fs;
     subplot(fn+1,1,i+1);
     plot(enframetime,enframeaudio(:,i));
     xlim([0,audiot]);
     title(sprintf("第%d帧音频波形",i));
 end

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是一个简单的C语言程序,用于对WAV音频进行分帧加窗、谱减法处理和反变换: ```c #include <stdio.h> #include <stdlib.h> #include <math.h> #define PI 3.14159265358979323846 #define FRAME_SIZE 256 #define FRAME_SHIFT 128 #define WINDOW_TYPE 0 // 0: Hamming window, 1: Hanning window, 2: Rectangular window int main(int argc, char **argv) { FILE *fp_in, *fp_out; short int inbuf[FRAME_SIZE]; double window[FRAME_SIZE]; double mag[FRAME_SIZE]; double noise[FRAME_SIZE]; double phase[FRAME_SIZE]; double re[FRAME_SIZE], im[FRAME_SIZE]; double spec_re[FRAME_SIZE], spec_im[FRAME_SIZE]; double noise_spec_re[FRAME_SIZE], noise_spec_im[FRAME_SIZE]; double noise_mean[FRAME_SIZE], noise_var[FRAME_SIZE]; int i, j, k, nframes; double frame_sum; if (argc != 3) { fprintf(stderr, "Usage: %s <input file> <output file>\n", argv[0]); exit(1); } fp_in = fopen(argv[1], "rb"); if (fp_in == NULL) { fprintf(stderr, "Cannot open input file %s\n", argv[1]); exit(1); } fp_out = fopen(argv[2], "wb"); if (fp_out == NULL) { fprintf(stderr, "Cannot open output file %s\n", argv[2]); exit(1); } // Initialize window switch (WINDOW_TYPE) { case 0: // Hamming window for (i = 0; i < FRAME_SIZE; i++) { window[i] = 0.54 - 0.46 * cos(2 * PI * i / (FRAME_SIZE - 1)); } break; case 1: // Hanning window for (i = 0; i < FRAME_SIZE; i++) { window[i] = 0.5 - 0.5 * cos(2 * PI * i / (FRAME_SIZE - 1)); } break; case 2: // Rectangular window for (i = 0; i < FRAME_SIZE; i++) { window[i] = 1.0; } break; } // Initialize noise mean and variance for (i = 0; i < FRAME_SIZE; i++) { noise_mean[i] = 0.0; noise_var[i] = 0.0; } // Process each frame nframes = 0; while (fread(inbuf, sizeof(short int), FRAME_SHIFT, fp_in) == FRAME_SHIFT) { // Apply window for (i = 0; i < FRAME_SIZE; i++) { re[i] = inbuf[i] * window[i]; im[i] = 0.0; } // Compute FFT for (i = 0; i < FRAME_SIZE; i++) { spec_re[i] = 0.0; spec_im[i] = 0.0; for (j = 0; j < FRAME_SIZE; j++) { spec_re[i] += re[j] * cos(2 * PI * i * j / FRAME_SIZE); spec_im[i] -= re[j] * sin(2 * PI * i * j / FRAME_SIZE); } mag[i] = sqrt(spec_re[i] * spec_re[i] + spec_im[i] * spec_im[i]); phase[i] = atan2(spec_im[i], spec_re[i]); } // Compute noise spectrum mean and variance if (nframes < 10) { // Use first 10 frames as noise estimate for (i = 0; i < FRAME_SIZE; i++) { noise_spec_re[i] = spec_re[i]; noise_spec_im[i] = spec_im[i]; } } else { for (i = 0; i < FRAME_SIZE; i++) { noise_spec_re[i] = 0.9 * noise_spec_re[i] + 0.1 * spec_re[i]; noise_spec_im[i] = 0.9 * noise_spec_im[i] + 0.1 * spec_im[i]; } } // Compute noise mean and variance for (i = 0; i < FRAME_SIZE; i++) { noise_mean[i] = 0.95 * noise_mean[i] + 0.05 * mag[i]; noise_var[i] = 0.95 * noise_var[i] + 0.05 * (mag[i] - noise_mean[i]) * (mag[i] - noise_mean[i]); noise[i] = noise_mean[i] + 3 * sqrt(noise_var[i]); } // Perform spectral subtraction for (i = 0; i < FRAME_SIZE; i++) { mag[i] = mag[i] - noise[i]; if (mag[i] < 0) { mag[i] = 0; } spec_re[i] = mag[i] * cos(phase[i]); spec_im[i] = mag[i] * sin(phase[i]); } // Compute IFFT for (i = 0; i < FRAME_SIZE; i++) { re[i] = 0.0; im[i] = 0.0; for (j = 0; j < FRAME_SIZE; j++) { re[i] += spec_re[j] * cos(2 * PI * i * j / FRAME_SIZE) - spec_im[j] * sin(2 * PI * i * j / FRAME_SIZE); im[i] += spec_re[j] * sin(2 * PI * i * j / FRAME_SIZE) + spec_im[j] * cos(2 * PI * i * j / FRAME_SIZE); } re[i] /= FRAME_SIZE; im[i] /= FRAME_SIZE; } // Overlap-add and write output frame_sum = 0.0; for (i = 0; i < FRAME_SIZE; i++) { frame_sum += window[i] * window[i]; inbuf[i] = (short int)(re[i] / window[i]); } fwrite(inbuf, sizeof(short int), FRAME_SHIFT, fp_out); nframes++; } fclose(fp_in); fclose(fp_out); return 0; } ``` 该程序使用Hamming窗口对音频进行分帧,然后进行谱减法处理。谱减法使用估计的噪声能量谱对原始信号的能量谱进行减法处理,以去除噪声。在每个帧中,程序估计噪声能量谱的均值和方差,然后使用这些统计量来计算每个频率上的噪声能量值。对于低于估计噪声能量的频率,程序将能量值设置为0,以避免误差。程序在谱减法处理后,使用逆FFT将处理后的频谱转换回时域信号,然后进行重叠相加以合成输出信号。 请注意,该程序只是一个简单的实现示例,并且可能需要进行修改以适应特定的应用场景。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值