数字信号处理-钢琴音识别

闻柯生

已于 2024-07-29 23:09:14 修改

阅读量238

点赞数 8

分类专栏：实验报告开源文章标签：信号处理 matlab 算法开源

于 2024-07-20 21:04:30 首次发布

本文链接：https://blog.csdn.net/fpbnb/article/details/140577190

版权

实验报告开源专栏收录该内容

2 篇文章 0 订阅

订阅专栏

前言

能自己尽量就自己做，万老师讲的非常好，但是我当时花了一个周末才做出来的，如果你要答辩的话你必须要整点新活出来，因为我这个版本是基础，谁都能做出来的那种，如果你做的没我好或者和我差不多，那我劝你最好省省吧。 我的这个识别小星星的正确率有0.94，没有用到什么算法，~~没用算法都有0.94的正确率bushi~~。 唯一有参考性的地方就是前面提取峰值的那些滤波参数，直接看代码。~~如果你真的啥也不会，直接复制粘贴也能写实验报告。~~

代码

clc;
% clear;
format compact;
[x,Fs]=audioread('D:\lab\2023年秋季数字信号处理实验报告\音频文件\star_69.m4a');%这里改成音频文件的存储路径
music=x(:,1)./max(x(:,1)); %归一化
Frame_Num = 100; %帧长参数如何选择请自行研究对比
a=floor(length(music)/Frame_Num);
envelope1=zeros(1,a);
for i=1:a % i是以Frame_Num个点分帧后的帧号
    %每Frame_Num点是一个临时帧
    temp = music((i-1)*Frame_Num+1:i*Frame_Num);
    %取出当前帧的峰峰值作为该帧的帧峰值
    %并保存在envelope包络中
    envelope1(:,i)=max(temp)-min(temp);
end
Beat_Min=69;%60秒多少拍
envelope3 = smooth(envelope1,30);
if(Beat_Min==69)%69拍做220次20点滑动平均
    for i=1:220
        envelope3 = smooth(envelope3,20);
    end
elseif(Beat_Min==92)%92拍做17次30点滑动平均且帧长要改为300
    for i=1:17
        envelope3 = smooth(envelope3,20);
    end
elseif(Beat_Min==115)%115拍做145次20点滑动平均
    for i=1:145
        envelope3 = smooth(envelope3,20);
    end
else%138拍做70次20点滑动平均
    for i=1:70
        envelope3 = smooth(envelope3,20);
    end
end
envelope4 = envelope3/max(envelope3);
n=1:Frame_Num:a*Frame_Num;
Min_sub_beat=4;%最小节拍数
Min_Gap=floor(60/Beat_Min*Fs/Min_sub_beat/Frame_Num);%最少多少点一个音符
[peaks,locs] = findpeaks(envelope4,'minpeakheight',0.2,'MinPeakDistance',Min_Gap);%寻找帧峰
locs=locs-30;%修正起始点位置
F0=0.1; %观察的谱间隔暂定为0.1Hz
N=Fs/F0; %FFT点数
Amp_MS=zeros(N,length(locs));Amp_MS_standard=zeros(N,length(locs));
%% 1. 取音程段
for i=1:length(locs)
    if i<length(locs) 
        %若不是最后一段则取当前段与后一段的前2/3的信号
        %X_temp=music(locs(i)*Frame_Num:locs(i)*Frame_Num+5000);
        X_temp = music(locs(i)*Frame_Num:floor(locs(i)+(locs(i+1)-locs(i))*1/2)*Frame_Num );
    else
        %若是最后一段，则按照前一段的长度取（因为最后一段后面没有下一段的标记了）
        %X_temp=music(locs(i)*Frame_Num:locs(i)*Frame_Num+5000);
        X_temp = music(locs(i)*Frame_Num:floor(locs(i)+(locs(i)-locs(i-1))*1/2)*Frame_Num );
    end
    X_temp = X_temp.*hamming(length(X_temp)); % 2. 乘以窗函数（暂定汉宁窗）
    Amp_temp = fft(X_temp,N); % 3. 做FFT（N比X_temp的长度要长，相当于添零了）
    Amp_MS(:,i)= abs(Amp_temp); %保存本段音程的谱分析结果  
    Amp_MS_standard(:,i)=Amp_MS(:,i)/max(Amp_MS(:,i));
end
% [m,p]=max(Amp_MS_standard(:,4));
% figure(1);%无用
% subplot(211);plot(1:length(music),music);xlabel("n");ylabel("幅度");title("star69.m4a的时域图和包络线");
% hold on;plot(n,envelope1);
% subplot(212);plot(n,envelope1);xlabel("n");ylabel("幅度");title("star69.m4a的包络线");
% figure(2);%无用
% subplot(411);plot(n,envelope1);xlabel("n");ylabel("幅度");title("对star69.m4a中值滤波");
% subplot(412);plot(n,envelope2);xlabel("n");ylabel("幅度");title("对star69.m4a中值滤波");axis([0,2000000,0,2]);
% subplot(413);plot(n,envelope3);xlabel("n");ylabel("幅度");title("对star69.m4a均值滤波");axis([0,2000000,0,2]);
% subplot(414);plot(n,envelope4);xlabel("n");ylabel("幅度");title("对star69.m4a均值滤波后归一化");axis([0,2000000,0,2]);
figure(3);clf;%有用
subplot(311);plot(1:length(music),music);hold on;plot(n,envelope4);
subplot(312);plot(1:length(music),music);hold on;plot(n,envelope4);stem((locs-1)*Frame_Num+1,peaks);
% figure(4);clf;%无用
% subplot(211);plot(1:length(music),music);hold on;plot(n,envelope4);stem((locs2-1)*100+1,peaks2);
% subplot(212);plot(1:length(music),music);hold on;plot(n,envelope4);stem((locs2-1)*100+1,music2);
figure(4);
k=0:N-1;
for i=1:6
    for j=1:7
        if((i-1)*7+j<=length(locs)) %小于现有音程的段数
            subplot(6,7,(i-1)*7+j);
            plot(k*Fs/N, Amp_MS(:,(i-1)*7+j)/max(Amp_MS(:,(i-1)*7+j)));title((i-1)*7+j);
            axis([0,2000,0,1]);
        end
    end
end
test_Num=4000; %画谱图的临时帧长（非前面计算包络的帧长Frame_Num） F0=1; %观察谱间隔暂定1 Hz
frame_N=floor(length(music)/test_Num); %帧数
frame_t=[0:frame_N-1]*test_Num/Fs; %谱图横坐标单位调为时间(s)
fft_N=Fs; %每一帧FFT的点数，暂设为44100点
frame_f=[0:floor((fft_N-1))]*F0; %谱图纵坐标单位为Hz
frame_A=zeros(fft_N,frame_N); % frame_A记录某一帧的某一频率处的值，后面用颜色显示
for i=1:frame_N-1
    %每一帧400点，均乘400点哈明窗后，做44100点FFT
    frame_A(:,i)= abs(fft(music((i-1)*test_Num+1:i*test_Num).*hamming(test_Num),fft_N));
end
figure(5);
subplot(211);
imagesc(frame_t,frame_f,frame_A);
axis xy; % axis xy ：笛卡尔轴模式，原点在左下角，y轴是竖直的，由底至顶标数，x轴是水平的，从左往右标数
axis([0,ceil(length(music)/44100),0,200]); % M_T是音乐总的时间，M_T = ceil(length(音频点数)/44100);
% %% 图6
% figure(6);
% starti=locs(6)*Frame_Num;
% % 时域长度为400点
% test_N = 400;
% i=starti:starti+test_N;
% test_music=music(i);
% subplot(331);
% plot(test_music);title('实际长度：400点');
% % 矩形窗
% test_music_FFT=fft(test_music,44100);
% subplot(334);
% stem(abs(test_music_FFT)/max(abs(test_music_FFT)),'.');
% title('矩形窗谱图，44100点FFT');axis([0,1000,0,1]);
% % 汉明窗
% test_music_FFT=fft(test_music.*hamming(length(test_music)),44100);
% subplot(337);
% stem(abs(test_music_FFT)/max(abs(test_music_FFT)),'.');
% title('哈明窗谱图，44100点FFT');axis([0,1000,0,1]);
% % 时域长度为4000点
% test_N = 4000;
% i=starti:starti+test_N;
% test_music=music(i);
% subplot(332);
% plot(test_music);title('实际长度：4000点');
% % 矩形窗
% test_music_FFT=fft(test_music,44100);
% subplot(335);
% stem(abs(test_music_FFT)/max(abs(test_music_FFT)),'.');
% title('矩形窗谱图，44100点FFT');axis([0,1000,0,1]);
% % 汉明窗
% test_music_FFT=fft(test_music.*hamming(length(test_music)),44100);
% subplot(338);
% stem(abs(test_music_FFT)/max(abs(test_music_FFT)),'.');
% title('哈明窗谱图，44100点FFT');axis([0,1000,0,1]);
% % 时域长度为2/3
% test_N=round((locs(2)*Frame_Num-locs(1)*Frame_Num+1)*2/3);
% i=starti:starti+test_N;
% test_music=music(i);
% subplot(333);
% plot(test_music);title('实际长度：一个音程*1/2');
% % 矩形窗
% test_music_FFT=fft(test_music,44100);
% subplot(336);
% stem(abs(test_music_FFT)/max(abs(test_music_FFT)),'.');
% title('矩形窗谱图，44100点FFT');axis([0,1000,0,1]);
% % 汉明窗
% test_music_FFT=fft(test_music.*hamming(length(test_music)),44100);
% subplot(339);
% stem(abs(test_music_FFT)/max(abs(test_music_FFT)),'.');
% title('哈明窗谱图，44100点FFT');axis([0,1000,0,1]);
%% 整首曲目识别
Piano_F=xlsread('D:\fpbworld\lab\2023年秋季数字信号处理实验报告\Piano_Key_F.xlsx');
Piano_F=Piano_F(1:88);
key=1:88;
delter_fq=zeros(1,length(Piano_F)-1);
fdown=zeros(1,length(Piano_F));
fup=zeros(1,length(Piano_F));
for i=2:length(Piano_F)
    delter_fq(1,i-1)=Piano_F(i)-Piano_F(i-1);
end
for i=1:length(Piano_F)
    if(i==1)
        fdown(:,i)=Piano_F(i,:)-delter_fq(:,i)/2;
        fup(:,i)=Piano_F(i,:)+delter_fq(:,i)/2;
    elseif(i==length(Piano_F))
        fdown(:,i)=Piano_F(i,:)-delter_fq(:,i-1)/2;
        fup(:,i)=Piano_F(i,:)+delter_fq(:,i-1)/2;
    else
        fdown(:,i)=Piano_F(i,:)-delter_fq(:,i-1)/2;
        fup(:,i)=Piano_F(i,:)+delter_fq(:,i)/2;
    end
end
KeyEnergy_MS=zeros(88,length(locs));%能量谱
KeyEnergy_MS_kl=zeros(88,length(locs));%傀儡变量
Key_Rec=zeros(2,length(locs));%我识别出来的
rightnum=0;%正确数量
Keyright=[60,60,67,67,69,69,67, 65,65,64,64,62,62,60, 67,67,65,65,64,64,62, 67,67,65,65,64,64,62, 60,60,67,67,69,69,67, 65,65,64,64,62,62,60;
          48,20,48,20,53,20,48, 53,20,48,20,55,20,48, 48,20,53,20,48,20,55, 48,20,53,20,48,20,55, 48,20,55,20,53,20,48, 53,20,48,20,55,20,48];
Key_Rec(3,:)=Keyright(1,:);
Key_Rec(4,:)=Keyright(2,:);
for i=1:length(locs)
    for j=1:88
        KeyEnergy_MS(j,i) = mean(Amp_MS(round((fdown(:,j)/F0):round(fup(:,j)/F0)),i));        
    end
    KeyEnergy_MS(:,i)=KeyEnergy_MS(:,i)/max(KeyEnergy_MS(:,i));
    [m,Key_Rec(1,i)]=max(KeyEnergy_MS(:,i));
end
%识别策略
for i=1:6
    for j=1:7
        m=KeyEnergy_MS(40,(i-1)*7+j);
        p1=40;
        for k=40:49
            if(m<KeyEnergy_MS(k,(i-1)*7+j))
                m=KeyEnergy_MS(k,(i-1)*7+j);
                p1=k;
            end
        end
        Key_Rec(1,(i-1)*7+j)=p1;
        m=KeyEnergy_MS(28,(i-1)*7+j);
        p1=28;
        for k=28:36
            if(m<KeyEnergy_MS(k,(i-1)*7+j))
                m=KeyEnergy_MS(k,(i-1)*7+j);
                p1=k;
            end
        end
        Key_Rec(2,(i-1)*7+j)=p1;
        if(mod(j,2)==0)
            Key_Rec(2,(i-1)*7+j)=0;
        end
        Key_Rec(1,(i-1)*7+j)=Key_Rec(1,(i-1)*7+j)+20;
        Key_Rec(2,(i-1)*7+j)=Key_Rec(2,(i-1)*7+j)+20;
        if(Key_Rec(1,(i-1)*7+j)==Keyright(1,(i-1)*7+j))
            rightnum=rightnum+1;
        end
        if(Key_Rec(2,(i-1)*7+j)==Keyright(2,(i-1)*7+j))
            rightnum=rightnum+1;
        end
    end
end
figure(7);
for i=1:6
    for j=1:7
        if((i-1)*7+j<=length(locs))%有42个音程
            subplot(6,7,(i-1)*7+j);
            plot(key+20,KeyEnergy_MS(:,(i-1)*7+j));title((i-1)*7+j);
            if(mod(j,2)==0)
                text(Key_Rec(1,(i-1)*7+j),KeyEnergy_MS(Key_Rec(1,(i-1)*7+j)-20,(i-1)*7+j),num2str(Key_Rec(1,(i-1)*7+j)));
            else
                text(Key_Rec(1,(i-1)*7+j),KeyEnergy_MS(Key_Rec(1,(i-1)*7+j)-20,(i-1)*7+j),num2str(Key_Rec(1,(i-1)*7+j)));
                text(Key_Rec(2,(i-1)*7+j),KeyEnergy_MS(Key_Rec(2,(i-1)*7+j)-20,(i-1)*7+j),num2str(Key_Rec(2,(i-1)*7+j)));
            end
            axis([20,108,0,1.2]);
        end
    end
end 
figure(8);
Key_Rec_show=zeros(88,length(locs));
Key_Rec_show2=zeros(88,length(locs));
for i=1:length(locs)
    Key_Rec_show(Key_Rec(1,i)-20,i)=1;
end
for i=1:6
    for j=1:7
        if(mod(j,2)==1)
            Key_Rec_show2(Key_Rec(2,(i-1)*7+j)-20,(i-1)*7+j)=1;
        end
    end
end  
imagesc(1:length(locs),21:108,Key_Rec_show);
axis xy;
figure(9);
for i=1:6
    for j=1:7
        if((i-1)*7+j<=length(locs))%有42个音程
            subplot(6,7,(i-1)*7+j);
            stem(key+20,Key_Rec_show(:,(i-1)*7+j));text(Key_Rec(1,(i-1)*7+j),1,num2str(Key_Rec(1,(i-1)*7+j)));hold on;
            if(mod(j,2)~=0)
                stem(key+20,Key_Rec_show2(:,(i-1)*7+j));text(Key_Rec(2,(i-1)*7+j),1,num2str(Key_Rec(2,(i-1)*7+j)));
            end
            title((i-1)*7+j);
            axis([0,110,0,1.2]);
        end
    end
end  
acc=rightnum/length(locs)/2
%% 节奏识别
% ideal_Beat_T为标准节拍
ideal_Beat_T = [1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 1 1 1 1 1];
% Real_Beat_T保存实际节拍
Real_Beat_T = zeros(1,length(locs)-1);
% Err_Beat_T保存节拍误差值
Err_Beat_T = zeros(1,length(locs)-1);
sumerr=0;%平均误差
for i=1:length(locs)-1
    Real_Beat_T(1,i)=(locs(i+1)-locs(i))*Frame_Num/Fs*Beat_Min/60;
    Err_Beat_T(1,i)=Real_Beat_T(1,i)-ideal_Beat_T(1,i);
    sumerr=abs(Err_Beat_T(1,i))+sumerr;
end
sumerr=sumerr/41
figure(11);
subplot(211);
plot(1:length(locs)-1,Real_Beat_T,'bo');hold on;title(Beat_Min+"拍真实节拍与标准节拍的对比");
plot(1:length(locs)-1,ideal_Beat_T,'r*');grid on;
axis([1,length(locs)-1,0,2.2]);
subplot(212);
plot(1:length(locs)-1,Err_Beat_T,'bx-');grid on;title(Beat_Min+"真实节拍与标准节拍的差值");
axis([1,length(locs)-1,-0.5,0.5]);

闻柯生

关注

8
点赞
踩
5

收藏

觉得还不错? 一键收藏
0
评论
数字信号处理-钢琴音识别

能自己尽量就自己做，万老师讲的非常好，但是我当时花了一个周末才做出来的，如果你要答辩的话你必须要整点新活出来，因为我这个版本是基础，谁都能做出来的那种，如果你做的没我好或者和我差不多，那我劝你最好省省吧。我的这个识别小星星的正确率有0.94，没有用到什么算法，没用算法都有0.94的正确率bushi。唯一有参考性的地方就是前面提取峰值的那些滤波参数，直接看代码。如果你真的啥也不会，直接复制粘贴也能写实验报告。
复制链接

扫一扫