function audio_pitch_correction
% 创建主GUI界面
fig = uifigure('Name', '音频音准矫正系统', 'Position', [100 100 900 700]);
% 创建音频选择区域
uilabel(fig, 'Position', [50 680 300 20], 'Text', '待矫正音频来源:', 'FontWeight', 'bold');
% 创建录音选项按钮组
source_btn_group = uibuttongroup(fig, 'Position', [50 630 300 40], 'Title', '');
uibutton(source_btn_group, 'Position', [10 10 130 30], 'Text', '导入音频文件', ...
'ButtonPushedFcn', @(btn,event) select_audio(fig, 'source'));
uibutton(source_btn_group, 'Position', [160 10 130 30], 'Text', '录制音频', ...
'ButtonPushedFcn', @(btn,event) record_audio(fig));
% 创建参考音频选择按钮
uilabel(fig, 'Position', [400 680 300 20], 'Text', '参考音频来源:', 'FontWeight', 'bold');
uibutton(fig, 'Position', [400 630 150 30], 'Text', '导入参考音频', ...
'ButtonPushedFcn', @(btn,event) select_audio(fig, 'reference'));
% 创建处理按钮
process_btn = uibutton(fig, 'Position', [600 630 150 30], ...
'Text', '开始矫正', 'Enable', 'off', ...
'ButtonPushedFcn', @(btn,event) process_audio(fig));
% 创建播放和保存按钮
uibutton(fig, 'Position', [50 580 150 30], 'Text', '播放原始音频', ...
'ButtonPushedFcn', @(btn,event) play_audio(fig, 'source'));
uibutton(fig, 'Position', [250 580 150 30], 'Text', '播放矫正音频', ...
'ButtonPushedFcn', @(btn,event) play_audio(fig, 'corrected'));
uibutton(fig, 'Position', [450 580 150 30], 'Text', '保存矫正音频', ...
'ButtonPushedFcn', @(btn,event) save_audio(fig));
% 创建录音状态显示
recording_label = uilabel(fig, 'Position', [650 580 200 30], ...
'Text', '准备录音', 'FontColor', [0 0.5 0]);
% 创建波形显示区域
ax_source = uiaxes(fig, 'Position', [50 350 800 150]);
title(ax_source, '待矫正音频波形');
ax_reference = uiaxes(fig, 'Position', [50 180 800 150]);
title(ax_reference, '参考音频波形');
ax_corrected = uiaxes(fig, 'Position', [50 10 800 150]);
title(ax_corrected, '矫正后音频波形');
% 存储数据
fig.UserData.source_audio = [];
fig.UserData.reference_audio = [];
fig.UserData.corrected_audio = [];
fig.UserData.fs = 44100; % 默认采样率
fig.UserData.process_btn = process_btn;
fig.UserData.axes = struct('source', ax_source, 'reference', ax_reference, 'corrected', ax_corrected);
fig.UserData.recording_label = recording_label;
fig.UserData.recorder = []; % 录音器对象
fig.UserData.timer = []; % 计时器对象
fig.UserData.f0_source = []; % 存储原始音高数据
fig.UserData.f0_ref = []; % 存储参考音高数据
fig.UserData.f0_corrected = []; % 存储矫正后音高数据
end
function select_audio(fig, audio_type)
[file, path] = uigetfile({'*.wav;*.mp3;*.ogg;*.flac', ...
'音频文件 (*.wav,*.mp3,*.ogg,*.flac)'});
if isequal(file, 0)
return;
end
filename = fullfile(path, file);
[audio, fs] = audioread(filename);
% 处理立体声:转换为单声道
if size(audio, 2) > 1
audio = mean(audio, 2);
end
% 截取前20秒
max_samples = min(20*fs, length(audio));
audio = audio(1:max_samples);
% 存储数据
fig.UserData.([audio_type '_audio']) = audio;
fig.UserData.fs = fs;
% 更新波形显示
ax = fig.UserData.axes.(audio_type);
plot(ax, (1:length(audio))/fs, audio);
xlabel(ax, '时间 (s)'); ylabel(ax, '幅度');
% 启用处理按钮
if ~isempty(fig.UserData.source_audio) && ~isempty(fig.UserData.reference_audio)
fig.UserData.process_btn.Enable = 'on';
end
end
function record_audio(fig)
% 创建录音界面
record_fig = uifigure('Name', '音频录制', 'Position', [300 300 400 200]);
% 录音时长设置
uilabel(record_fig, 'Position', [50 150 100 20], 'Text', '录音时长 (秒):');
duration_edit = uieditfield(record_fig, 'numeric', ...
'Position', [160 150 100 20], 'Value', 5, 'Limits', [1 30]);
% 采样率设置
uilabel(record_fig, 'Position', [50 120 100 20], 'Text', '采样率:');
fs_dropdown = uidropdown(record_fig, ...
'Position', [160 120 100 20], ...
'Items', {'8000', '16000', '44100', '48000'}, ...
'Value', '44100');
% 控制按钮
record_btn = uibutton(record_fig, 'Position', [50 70 100 30], ...
'Text', '开始录音', ...
'ButtonPushedFcn', @(btn,event) start_recording(fig, duration_edit.Value, str2double(fs_dropdown.Value)));
uibutton(record_fig, 'Position', [160 70 100 30], ...
'Text', '停止录音', ...
'ButtonPushedFcn', @(btn,event) stop_recording(fig));
uibutton(record_fig, 'Position', [270 70 100 30], ...
'Text', '关闭', ...
'ButtonPushedFcn', @(btn,event) close(record_fig));
end
function start_recording(fig, duration, fs)
% 更新状态
fig.UserData.recording_label.Text = '录音中...';
fig.UserData.recording_label.FontColor = [1 0 0];
drawnow;
% 创建录音器对象
recorder = audiorecorder(fs, 16, 1); % 16-bit, 单声道
% 设置录音时长
fig.UserData.recorder = recorder;
fig.UserData.fs = fs;
% 开始录音
record(recorder, duration);
% 创建计时器显示剩余时间
t = timer('ExecutionMode', 'fixedRate', 'Period', 1, ...
'TasksToExecute', duration, ...
'TimerFcn', @(t,~) update_recording_timer(fig, t, duration));
start(t);
% 存储计时器
fig.UserData.t = t;
end
function update_recording_timer(fig, t, total_duration)
elapsed = t.TasksExecuted;
remaining = total_duration - elapsed;
fig.UserData.recording_label.Text = sprintf('录音中: %d秒', remaining);
% 录音结束时自动停止
if remaining <= 0
stop_recording(fig);
end
end
function stop_recording(fig)
if ~isempty(fig.UserData.recorder) && isrecording(fig.UserData.recorder)
stop(fig.UserData.recorder);
end
% 停止计时器
if ~isempty(fig.UserData.timer) && isvalid(fig.UserData.timer)
stop(fig.UserData.timer);
delete(fig.UserData.timer);
fig.UserData.timer = [];
end
% 获取录音数据
audio = getaudiodata(fig.UserData.recorder);
fs = fig.UserData.fs;
% 更新状态
fig.UserData.recording_label.Text = '录音完成!';
fig.UserData.recording_label.FontColor = [0 0.5 0];
% 存储为待矫正音频
fig.UserData.source_audio = audio;
% 更新波形显示
ax = fig.UserData.axes.source;
plot(ax, (1:length(audio))/fs, audio);
title(ax, '录制音频波形');
xlabel(ax, '时间 (s)'); ylabel(ax, '幅度');
% 启用处理按钮
if ~isempty(fig.UserData.reference_audio)
fig.UserData.process_btn.Enable = 'on';
end
end
function process_audio(fig)
source = fig.UserData.source_audio;
reference = fig.UserData.reference_audio;
fs = fig.UserData.fs;
% 确保主图窗存在
if ~isvalid(fig)
errordlg('主窗口已关闭,无法处理音频!', '处理错误');
return;
end
% 创建处理进度对话框
h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音频对齐...', 'Indeterminate', 'on');
% 步骤1:音频对齐
try
[aligned_source, aligned_ref] = improved_align_audio(source, reference, fs);
catch ME
close(h);
errordlg(['音频对齐失败: ' ME.message], '处理错误');
return;
end
% 步骤2:基频提取
h.Message = '提取音高...';
try
[f0_source, time_source] = extract_pitch(aligned_source, fs);
[f0_ref, time_ref] = extract_pitch(aligned_ref, fs);
catch ME
close(h);
errordlg(['音高提取失败: ' ME.message], '处理错误');
return;
end
% 存储音高数据用于后续对比
fig.UserData.f0_source = f0_source;
fig.UserData.f0_ref = f0_ref;
fig.UserData.time_source = time_source;
fig.UserData.time_ref = time_ref;
fig.UserData.aligned_source = aligned_source; % 存储对齐后的音频
fig.UserData.fs = fs; % 存储采样率
% 步骤3:初始音调矫正
h.Message = '初始矫正音调...';
try
corrected = correct_pitch(fig, aligned_source, fs, f0_source, f0_ref, time_source, time_ref);
catch ME
close(h);
errordlg(['音高校正失败: ' ME.message], '处理错误');
return;
end
% 关闭进度对话框
close(h);
% 保存初始结果
fig.UserData.corrected_audio = corrected;
fig.UserData.initial_corrected = corrected; % 存储初始矫正结果
% 更新波形显示
plot(fig.UserData.axes.corrected, (1:length(corrected))/fs, corrected);
xlabel(fig.UserData.axes.corrected, '时间 (s)');
ylabel(fig.UserData.axes.corrected, '幅度');
title(fig.UserData.axes.corrected, '矫正后音频波形');
% 提取矫正后音频的音高
[f0_corrected, time_corrected] = extract_pitch(corrected, fs);
fig.UserData.f0_corrected = f0_corrected;
fig.UserData.time_corrected = time_corrected;
% 绘制完整音高对比图
plot_pitch_comparison(fig);
% ============== 添加实时预览控件 ==============
% 创建预览面板
preview_panel = uipanel(fig, 'Title', '实时预览参数调整', ...
'Position', [0.65 0.02 0.32 0.25], ...
'BackgroundColor', [0.95 0.95 0.95]);
% 平滑强度滑块
uilabel(preview_panel, 'Position', [20 120 100 20], 'Text', '平滑强度:', 'FontWeight', 'bold');
smooth_slider = uislider(preview_panel, ...
'Position', [20 100 250 20], ...
'Limits', [1 15], 'Value', 5, ...
'ValueChangedFcn', @(sld,event) update_preview(fig, event.Value, max_ratio_slider.Value));
% 最大比例限制滑块
uilabel(preview_panel, 'Position', [20 70 100 20], 'Text', '最大比例:', 'FontWeight', 'bold');
max_ratio_slider = uislider(preview_panel, ...
'Position', [20 50 250 20], ...
'Limits', [1.5 3.0], 'Value', 2.0, ...
'ValueChangedFcn', @(sld,event) update_preview(fig, smooth_slider.Value, event.Value));
% 应用参数按钮
apply_btn = uibutton(preview_panel, ...
'Position', [100 10 100 30], ...
'Text', '应用参数', ...
'ButtonPushedFcn', @(btn,event) apply_parameters(fig));
% 重置按钮
reset_btn = uibutton(preview_panel, ...
'Position', [210 10 70 30], ...
'Text', '重置', ...
'ButtonPushedFcn', @(btn,event) reset_preview(fig));
% 状态标签
preview_status = uilabel(preview_panel, ...
'Position', [20 150 260 20], ...
'Text', '当前参数: 平滑强度=5, 最大比例=2.0', ...
'FontColor', [0 0.5 0], 'FontWeight', 'bold');
% 存储预览控件
fig.UserData.preview_controls = struct(...
'smooth_slider', smooth_slider, ...
'max_ratio_slider', max_ratio_slider, ...
'preview_status', preview_status, ...
'apply_btn', apply_btn, ...
'reset_btn', reset_btn);
% 存储初始参数
fig.UserData.current_smooth = 5;
fig.UserData.current_max_ratio = 2.0;
end
% ============== 实时预览子函数 ==============
function update_preview(fig, smooth_strength, max_ratio)
% 更新参数状态显示
fig.UserData.preview_controls.preview_status.Text = ...
sprintf('当前参数: 平滑强度=%d, 最大比例=%.1f', smooth_strength, max_ratio);
% 创建进度对话框
h = uiprogressdlg(fig, 'Title', '实时预览', 'Message', '重新计算矫正音频...');
% 获取存储的数据
aligned_source = fig.UserData.aligned_source;
fs = fig.UserData.fs;
f0_source = fig.UserData.f0_source;
f0_ref = fig.UserData.f0_ref;
time_source = fig.UserData.time_source;
time_ref = fig.UserData.time_ref;
% 使用新参数重新计算矫正
try
% 创建临时副本避免修改原始数据
temp_fig = struct();
temp_fig.UserData = fig.UserData;
% 更新参数
temp_fig.UserData.current_smooth = smooth_strength;
temp_fig.UserData.current_max_ratio = max_ratio;
% 重新计算矫正
recorrected = recalc_correction(temp_fig, aligned_source, fs, f0_source, f0_ref, time_source, time_ref);
% 更新显示
plot(fig.UserData.axes.corrected, (1:length(recorrected))/fs, recorrected);
title(fig.UserData.axes.corrected, '矫正后音频波形 (预览)');
xlabel(fig.UserData.axes.corrected, '时间 (s)');
ylabel(fig.UserData.axes.corrected, '幅度');
% 存储临时结果
fig.UserData.temp_corrected = recorrected;
% 提取预览音高
[f0_preview, time_preview] = extract_pitch(recorrected, fs);
fig.UserData.f0_preview = f0_preview;
fig.UserData.time_preview = time_preview;
catch ME
errordlg(['预览更新失败: ' ME.message], '预览错误');
end
% 关闭进度对话框
close(h);
end
function recorrected = recalc_correction(fig, audio, fs, f0_src, f0_ref, time_src, time_ref)
% 基于新参数重新计算音高校正
% 从UserData获取当前参数
SMOOTH_WIN = fig.UserData.current_smooth;
MAX_RATIO = fig.UserData.current_max_ratio;
% 统一帧参数
FRAME_LEN = 0.05; % 50ms帧
HOP_RATIO = 0.5; % 50%跳跃
MIN_RATIO = 0.5;
frame_len = round(FRAME_LEN * fs);
hop_size = round(HOP_RATIO * frame_len);
% 确保音频长度匹配帧参数
n_frames = floor((length(audio)-frame_len)/hop_size) + 1;
if isempty(f0_src) || isempty(f0_ref) || n_frames < 1
recorrected = audio;
return;
end
recorrected = zeros(size(audio));
win = hann(frame_len, 'periodic');
% 预处理:确保时间向量匹配
time_src = time_src(:)'; % 行向量
time_ref = time_ref(:)'; % 行向量
% 预计算目标比例数组
target_ratios = ones(1, n_frames);
valid_frames = false(1, n_frames);
for i = 1:n_frames
t_frame = (i-1)*hop_size/fs + frame_len/(2*fs); % 帧中心时间
% 找到最接近的音高点
[~, idx_src] = min(abs(time_src - t_frame));
[~, idx_ref] = min(abs(time_ref - t_frame));
if idx_ref <= length(f0_ref) && idx_src <= length(f0_src) && ...
f0_src(idx_src) > 0 && f0_ref(idx_ref) > 0
target_ratios(i) = f0_ref(idx_ref) / f0_src(idx_src);
valid_frames(i) = true;
end
end
% 应用中值滤波平滑比例因子
target_ratios_smoothed = target_ratios;
for i = 1:n_frames
if valid_frames(i)
win_start = max(1, i - floor(SMOOTH_WIN/2));
win_end = min(n_frames, i + floor(SMOOTH_WIN/2));
win_vals = target_ratios(win_start:win_end);
win_vals = win_vals(valid_frames(win_start:win_end));
if ~isempty(win_vals)
target_ratios_smoothed(i) = median(win_vals);
end
end
end
% 限制比例范围
target_ratios_smoothed = min(MAX_RATIO, max(MIN_RATIO, target_ratios_smoothed));
% 分帧处理
for i = 1:n_frames
start_idx = (i-1)*hop_size + 1;
end_idx = start_idx + frame_len - 1;
if end_idx > length(audio)
break;
end
frame = audio(start_idx:end_idx);
% 使用平滑后的比例因子
target_ratio = target_ratios_smoothed(i);
% 使用稳定的相位声码器
corrected_frame = stable_phase_vocoder(frame, target_ratio, fs, frame_len);
% 加窗并叠加
corrected_frame = corrected_frame .* win;
recorrected(start_idx:start_idx+frame_len-1) = ...
recorrected(start_idx:start_idx+frame_len-1) + corrected_frame;
end
% 归一化处理
max_amp = max(abs(recorrected));
if max_amp > 0
recorrected = recorrected / max_amp;
end
% 裁剪到原始长度
recorrected = recorrected(1:length(audio));
end
function apply_parameters(fig)
% 应用当前预览参数到最终结果
if isfield(fig.UserData, 'temp_corrected')
fig.UserData.corrected_audio = fig.UserData.temp_corrected;
fig.UserData.f0_corrected = fig.UserData.f0_preview;
fig.UserData.time_corrected = fig.UserData.time_preview;
% 更新显示
plot(fig.UserData.axes.corrected, (1:length(fig.UserData.corrected_audio))/fig.UserData.fs, ...
fig.UserData.corrected_audio);
title(fig.UserData.axes.corrected, '矫正后音频波形 (已应用新参数)');
% 更新音高对比图
plot_pitch_comparison(fig);
msgbox('新参数已应用到最终结果!', '参数应用成功');
else
errordlg('没有可应用的预览结果!', '应用错误');
end
end
function reset_preview(fig)
% 重置到初始矫正结果
if isfield(fig.UserData, 'initial_corrected')
fig.UserData.corrected_audio = fig.UserData.initial_corrected;
% 提取矫正后音频的音高
[f0_corrected, time_corrected] = extract_pitch(fig.UserData.initial_corrected, fig.UserData.fs);
fig.UserData.f0_corrected = f0_corrected;
fig.UserData.time_corrected = time_corrected;
% 更新显示
plot(fig.UserData.axes.corrected, (1:length(fig.UserData.initial_corrected))/fig.UserData.fs, ...
fig.UserData.initial_corrected);
title(fig.UserData.axes.corrected, '矫正后音频波形 (已重置)');
% 更新音高对比图
plot_pitch_comparison(fig);
% 重置滑块
fig.UserData.preview_controls.smooth_slider.Value = 5;
fig.UserData.preview_controls.max_ratio_slider.Value = 2.0;
fig.UserData.preview_controls.preview_status.Text = ...
'当前参数: 平滑强度=5, 最大比例=2.0 (已重置)';
msgbox('已重置到初始矫正结果!', '重置成功');
else
errordlg('没有初始矫正结果可重置!', '重置错误');
end
end
function plot_pitch_comparison(fig)
% 创建新的图窗显示完整音高对比
pitch_fig = figure('Name', '音高对比分析', 'Position', [100 100 800 800]);
% 子图1: 原始音高 vs 参考音高
subplot(3,1,1);
plot(fig.UserData.time_source, fig.UserData.f0_source, 'b', ...
fig.UserData.time_ref, fig.UserData.f0_ref, 'r');
title('原始音高 vs 参考音高');
legend('原始音高', '参考音高', 'Location', 'best');
xlabel('时间 (s)'); ylabel('频率 (Hz)');
grid on;
ylim([min([fig.UserData.f0_source; fig.UserData.f0_ref])*0.8, ...
max([fig.UserData.f0_source; fig.UserData.f0_ref])*1.2]);
% 子图2: 矫正后音高 vs 参考音高
subplot(3,1,2);
plot(fig.UserData.time_corrected, fig.UserData.f0_corrected, 'g', ...
fig.UserData.time_ref, fig.UserData.f0_ref, 'r');
title('矫正后音高 vs 参考音高');
legend('矫正后音高', '参考音高', 'Location', 'best');
xlabel('时间 (s)'); ylabel('频率 (Hz)');
grid on;
ylim([min([fig.UserData.f0_corrected; fig.UserData.f0_ref])*0.8, ...
max([fig.UserData.f0_corrected; fig.UserData.f0_ref])*1.2]);
% 子图3: 原始音高 vs 矫正后音高
subplot(3,1,3);
plot(fig.UserData.time_source, fig.UserData.f0_source, 'b:', ...
fig.UserData.time_corrected, fig.UserData.f0_corrected, 'g-');
title('原始音高 vs 矫正后音高');
legend('原始音高', '矫正后音高', 'Location', 'best');
xlabel('时间 (s)'); ylabel('频率 (Hz)');
grid on;
ylim([min([fig.UserData.f0_source; fig.UserData.f0_corrected])*0.8, ...
max([fig.UserData.f0_source; fig.UserData.f0_corrected])*1.2]);
% 添加对比分析按钮
uicontrol(pitch_fig, 'Style', 'pushbutton', 'String', '显示偏差分析', ...
'Position', [350 20 100 30], ...
'Callback', @(src,event) show_deviation_analysis(fig));
end
function show_deviation_analysis(fig)
% 确保有足够的数据
if isempty(fig.UserData.f0_source) || isempty(fig.UserData.f0_ref) || ...
isempty(fig.UserData.f0_corrected)
errordlg('音高数据不足,无法进行偏差分析!', '分析错误');
return;
end
% 计算原始音高偏差
orig_dev = zeros(size(fig.UserData.f0_source));
for i = 1:length(fig.UserData.f0_source)
% 找到最近的时间点(确保索引有效)
[min_diff, idx] = min(abs(fig.UserData.time_ref - fig.UserData.time_source(i)));
% 只考虑时间接近的点(阈值0.1秒)
if idx <= length(fig.UserData.f0_ref) && min_diff < 0.1
orig_dev(i) = fig.UserData.f0_source(i) - fig.UserData.f0_ref(idx);
else
orig_dev(i) = NaN; % 标记无效点
end
end
% 计算矫正后音高偏差
corr_dev = zeros(size(fig.UserData.f0_corrected));
for i = 1:length(fig.UserData.f0_corrected)
[min_diff, idx] = min(abs(fig.UserData.time_ref - fig.UserData.time_corrected(i)));
if idx <= length(fig.UserData.f0_ref) && min_diff < 0.1
corr_dev(i) = fig.UserData.f0_corrected(i) - fig.UserData.f0_ref(idx);
else
corr_dev(i) = NaN;
end
end
% 移除无效点
orig_dev = orig_dev(~isnan(orig_dev));
corr_dev = corr_dev(~isnan(corr_dev));
% 检查是否有有效数据
if isempty(orig_dev) || isempty(corr_dev)
errordlg('没有足够匹配的时间点进行偏差分析!', '分析错误');
return;
end
% 创建偏差分析图
analysis_fig = figure('Name', '音高偏差分析', 'Position', [150 150 800 600], 'NumberTitle', 'off');
% 原始音高偏差
subplot(2,1,1);
plot(fig.UserData.time_source(1:length(orig_dev)), orig_dev, 'b');
title('原始音高偏差');
xlabel('时间 (s)'); ylabel('偏差 (Hz)');
grid on;
hold on;
line([min(fig.UserData.time_source), max(fig.UserData.time_source)], [0, 0], 'Color', 'r', 'LineStyle', '--');
ylim([-100, 100]); % 固定Y轴范围便于比较
% 矫正后音高偏差
subplot(2,1,2);
plot(fig.UserData.time_corrected(1:length(corr_dev)), corr_dev, 'g');
title('矫正后音高偏差');
xlabel('时间 (s)'); ylabel('偏差 (Hz)');
grid on;
hold on;
line([min(fig.UserData.time_corrected), max(fig.UserData.time_corrected)], [0, 0], 'Color', 'r', 'LineStyle', '--');
ylim([-100, 100]); % 固定Y轴范围便于比较
% 添加统计信息
uicontrol(analysis_fig, 'Style', 'text', ...
'Position', [50 30 300 40], ...
'String', sprintf('原始平均绝对偏差: %.2f Hz\n矫正后平均绝对偏差: %.2f Hz', ...
mean(abs(orig_dev)), mean(abs(corr_dev))), ...
'FontSize', 10, 'BackgroundColor', [1 1 1]);
% 添加关闭按钮
uicontrol(analysis_fig, 'Style', 'pushbutton', 'String', '关闭', ...
'Position', [700 30 80 30], ...
'Callback', 'close(gcf)');
% 确保窗口显示在最前
figure(analysis_fig);
end
function [aligned_src, aligned_ref] = improved_align_audio(src, ref, fs)
% 统一帧参数(与音高提取一致)
FRAME_LEN = 0.03; % 30ms帧
HOP_RATIO = 0.2; % 20%跳跃
frame_len = round(FRAME_LEN * fs);
hop_size = round(HOP_RATIO * frame_len);
% 计算源音频的频谱图
[S_src, ~, t_src] = spectrogram(src, frame_len, frame_len-hop_size, [], fs);
% 计算参考音频的频谱图
[S_ref, ~, t_ref] = spectrogram(ref, frame_len, frame_len-hop_size, [], fs);
% 计算互相关(使用频谱幅度)
n_frames = min(length(t_src), length(t_ref));
corr_vals = zeros(1, n_frames);
for i = 1:n_frames
spec_src = abs(S_src(:, i));
spec_ref = abs(S_ref(:, i));
corr_vals(i) = dot(spec_src, spec_ref) / (norm(spec_src) * norm(spec_ref));
end
% 找到最大相关帧
[~, max_idx] = max(corr_vals);
time_diff = t_src(max_idx) - t_ref(max_idx);
sample_diff = round(time_diff * fs);
% 对齐音频
if sample_diff > 0
aligned_src = src(1:end-sample_diff);
aligned_ref = ref(sample_diff+1:end);
else
aligned_src = src(-sample_diff+1:end);
aligned_ref = ref(1:end+sample_diff);
end
% 确保等长并匹配帧边界
min_len = min(length(aligned_src), length(aligned_ref));
frame_samples = frame_len; % 确保是帧长的整数倍
min_len = floor(min_len/frame_samples) * frame_samples;
aligned_src = aligned_src(1:min_len);
aligned_ref = aligned_ref(1:min_len);
end
function [f0, time] = extract_pitch(audio, fs)
% 统一帧参数
FRAME_LEN = 0.03; % 30ms帧
HOP_RATIO = 0.2; % 20%跳跃
frame_size = round(FRAME_LEN * fs);
hop_size = round(HOP_RATIO * frame_size);
% 确保音频长度是帧长的整数倍
n_frames = floor((length(audio)-frame_size)/hop_size) + 1;
if n_frames < 1
f0 = [];
time = [];
return;
end
% 提取基频
[f0, time] = pitch(audio, fs, ...
'WindowLength', frame_size, ...
'OverlapLength', frame_size - hop_size, ... % 修正重叠计算
'Range', [50, 1000], ...
'Method', 'NCF');
% 确保时间向量与音频对齐
time = time(:); % 确保列向量
end
function corrected = correct_pitch(fig, audio, fs, f0_src, f0_ref, time_src, time_ref)
% 调整参数 ==============================
FRAME_LEN = 0.05; % 增加帧长到50ms,提高稳定性
HOP_RATIO = 0.5; % 增加跳跃比例到50%,减少重叠
MIN_RATIO = 0.5; % 放宽最小比例限制
MAX_RATIO = 2.0; % 收紧最大比例限制
SMOOTH_WIN = 5; % 平滑窗口大小
% 移除非线性增强参数(改用平滑处理)
% ========================================
h = uiprogressdlg(fig, 'Title', '处理中', 'Message', '音高校正...');
frame_len = round(FRAME_LEN * fs);
hop_size = round(HOP_RATIO * frame_len);
% 确保音频长度匹配帧参数
n_frames = floor((length(audio)-frame_len)/hop_size) + 1;
if isempty(f0_src) || isempty(f0_ref) || n_frames < 1
corrected = audio;
close(h);
return;
end
corrected = zeros(size(audio));
win = hann(frame_len, 'periodic');
% 预处理:确保时间向量匹配
time_src = time_src(:)'; % 行向量
time_ref = time_ref(:)'; % 行向量
% 预计算目标比例数组
target_ratios = ones(1, n_frames);
valid_frames = false(1, n_frames);
for i = 1:n_frames
t_frame = (i-1)*hop_size/fs + frame_len/(2*fs); % 帧中心时间
% 找到最接近的音高点
[~, idx_src] = min(abs(time_src - t_frame));
[~, idx_ref] = min(abs(time_ref - t_frame));
if idx_ref <= length(f0_ref) && idx_src <= length(f0_src) && ...
f0_src(idx_src) > 0 && f0_ref(idx_ref) > 0
target_ratios(i) = f0_ref(idx_ref) / f0_src(idx_src);
valid_frames(i) = true;
end
end
% 应用中值滤波平滑比例因子
target_ratios_smoothed = target_ratios;
for i = 1:n_frames
if valid_frames(i)
win_start = max(1, i - floor(SMOOTH_WIN/2));
win_end = min(n_frames, i + floor(SMOOTH_WIN/2));
win_vals = target_ratios(win_start:win_end);
win_vals = win_vals(valid_frames(win_start:win_end));
if ~isempty(win_vals)
target_ratios_smoothed(i) = median(win_vals);
end
end
end
% 限制比例范围
target_ratios_smoothed = min(MAX_RATIO, max(MIN_RATIO, target_ratios_smoothed));
% 分帧处理
for i = 1:n_frames
start_idx = (i-1)*hop_size + 1;
end_idx = start_idx + frame_len - 1;
if end_idx > length(audio)
break;
end
frame = audio(start_idx:end_idx);
% 使用平滑后的比例因子
target_ratio = target_ratios_smoothed(i);
% 使用更稳定的相位声码器
corrected_frame = stable_phase_vocoder(frame, target_ratio, fs, frame_len);
% 加窗并叠加
corrected_frame = corrected_frame .* win;
corrected(start_idx:start_idx+frame_len-1) = ...
corrected(start_idx:start_idx+frame_len-1) + corrected_frame;
h.Value = i/n_frames;
h.Message = sprintf('处理进度: %d/%d 帧 (比例: %.2f)', i, n_frames, target_ratio);
end
close(h);
% 归一化处理
max_amp = max(abs(corrected));
if max_amp > 0
corrected = corrected / max_amp;
end
% 裁剪到原始长度
corrected = corrected(1:length(audio));
end
% ============== 稳定的相位声码器实现 ==============
function y = stable_phase_vocoder(x, ratio, fs, frame_len)
% 更稳定的相位声码器实现
% 使用固定窗口长度以保持稳定性
n = frame_len; % 使用传入的帧长度
hop_in = round(n/4);
hop_out = round(hop_in * ratio);
% 确保跳跃步长合理
hop_in = max(hop_in, 32);
hop_out = max(hop_out, 32);
% 使用Hann窗(比Blackman-Harris更少人工痕迹)
w = hann(n, 'periodic');
% 零填充处理短信号
if length(x) < n
x_padded = [x; zeros(n - length(x), 1)];
else
x_padded = x;
end
% 计算STFT
S = stft(x_padded, 'Window', w, 'OverlapLength', n-hop_in, 'FFTLength', n);
% 相位处理
Y = basic_phase_vocoder(S, hop_in, hop_out, fs);
% 重建信号
y = istft(Y, 'Window', w, 'OverlapLength', n-hop_out, 'FFTLength', n, ...
'ConjugateSymmetric', true, 'Method', 'wola');
% 裁剪到原始长度
if length(y) > length(x)
y = y(1:length(x));
end
% 轻微归一化
y = y / max(abs(y) + eps);
end
function Y = basic_phase_vocoder(X, hop_in, hop_out, fs)
% 简化的相位声码器处理
[n_bins, n_frames] = size(X);
Y = zeros(size(X));
% 频率向量
omega = 2*pi * (0:n_bins-1)' * fs / (2*(n_bins-1));
% 初始化相位累积
phase_accum = angle(X(:,1));
prev_phase = angle(X(:,1));
for i = 1:n_frames
mag = abs(X(:,i));
phase = angle(X(:,i));
if i > 1
% 计算相位差
delta_phase = phase - prev_phase;
% 相位展开
delta_phase = delta_phase - hop_in * omega;
delta_phase = mod(delta_phase + pi, 2*pi) - pi;
% 瞬时频率
inst_freq = omega + delta_phase / hop_in;
else
inst_freq = omega;
end
% 更新相位累积
phase_accum = phase_accum + hop_out * inst_freq;
prev_phase = phase;
% 合成新帧
Y(:,i) = mag .* exp(1j * phase_accum);
end
end
function play_audio(fig, audio_type)
if ~isvalid(fig)
errordlg('主窗口无效!', '播放错误');
return;
end
switch audio_type
case 'source'
audio = fig.UserData.source_audio;
title_text = '播放原始音频';
if isempty(audio)
errordlg('未找到原始音频数据!', '播放错误');
return;
end
case 'corrected'
audio = fig.UserData.corrected_audio;
title_text = '播放矫正音频';
if isempty(audio)
errordlg('请先完成音高校正!', '播放错误');
return;
end
otherwise
return;
end
fs = fig.UserData.fs;
player = audioplayer(audio, fs);
% 创建播放控制界面
play_fig = uifigure('Name', title_text, 'Position', [500 500 300 150]);
% 播放进度条
ax = uiaxes(play_fig, 'Position', [50 100 200 20]);
prog_line = line(ax, [0 0], [0 0], 'Color', 'b', 'LineWidth', 2);
xlim(ax, [0 1]);
ylim(ax, [0 1]);
set(ax, 'XTick', [], 'YTick', []);
% 播放时间显示
time_label = uilabel(play_fig, 'Position', [50 80 200 20], ...
'Text', '00:00 / 00:00', 'HorizontalAlignment', 'center');
% 控制按钮
uibutton(play_fig, 'Position', [50 30 60 30], 'Text', '播放', ...
'ButtonPushedFcn', @(btn,event) play(player));
uibutton(play_fig, 'Position', [120 30 60 30], 'Text', '暂停', ...
'ButtonPushedFcn', @(btn,event) pause(player));
uibutton(play_fig, 'Position', [190 30 60 30], 'Text', '停止', ...
'ButtonPushedFcn', @(btn,event) stop(player));
% 总时长计算
total_time = length(audio)/fs;
mins = floor(total_time/60);
secs = round(total_time - mins*60);
total_str = sprintf('%02d:%02d', mins, secs);
% 更新播放进度
player.TimerFcn = {@update_playback, play_fig, time_label, total_str, prog_line, length(audio)};
player.StopFcn = {@stop_playback, play_fig};
end
function update_playback(player, ~, play_fig, time_label, total_str, prog_line, total_samples)
if ~isvalid(play_fig) || player.CurrentSample <= 0
return;
end
% 计算当前时间
current_time = player.CurrentSample/player.SampleRate;
mins = floor(current_time/60);
secs = round(current_time - mins*60);
current_str = sprintf('%02d:%02d', mins, secs);
% 更新显示
time_label.Text = [current_str ' / ' total_str];
% 更新进度条
progress = player.CurrentSample / total_samples;
prog_line.XData = [0 progress];
prog_line.YData = [0.5 0.5];
end
function stop_playback(player, ~, play_fig)
if isvalid(play_fig)
close(play_fig);
end
end
function save_audio(fig)
if ~isvalid(fig) || isempty(fig.UserData.corrected_audio)
errordlg('无有效音频数据可保存!', '保存错误');
return;
end
[file, path] = uiputfile('*.wav', '保存矫正音频');
if isequal(file, 0), return; end
audiowrite(fullfile(path, file), fig.UserData.corrected_audio, fig.UserData.fs);
msgbox('音频保存成功!', '完成');
end
实时调整模块位置是不是不太对,没有显示出来,另外调整过渡了吧,我需要根据每一帧的值,将待修改音频的音高就改至尽量与参考音频尽量一致就可以
最新发布