语音是实时变化的,在不重叠的帧之间插入一些帧来提取特征参数
%% audio enframe
clc;
clear all;
close all;
[audiodata,fs]=audioread("test.wav");
% we can divided into fn enframe:fn=(N-overlap)/inc
% startIndex= 1, inc, 2*inc....
audiolen=length(audiodata);
audiot=audiolen/fs;
time= (0:audiolen-1)/fs;
wlen=fs*0.5;%there should be 0.05 but for figure
inc=0.5*wlen;
fn=floor((audiolen-(wlen-inc))/inc);
enframeaudio=zeros(wlen,fn);
figure(1);
subplot(fn+1,1,1);
plot(time,audiodata);
title("原始音频时域图形");
for i=1:fn
startindex=inc*(i-1)+1;
enframeaudio(:,i)=audiodata(startindex:startindex+wlen-1);
enframetime=(startindex:startindex+wlen-1)/fs;
subplot(fn+1,1,i+1);
plot(enframetime,enframeaudio(:,i));
xlim([0,audiot]);
title(sprintf("第%d帧音频波形",i));
end