使用faster-whisper音频转asr字幕（亲测可行）

批量小王子

已于 2025-05-06 01:00:20 修改

阅读量563

点赞数 9

分类专栏： 05_python库文章标签： whisper 音视频 python

于 2025-04-11 23:12:29 首次发布

本文链接：https://blog.csdn.net/m0_58149406/article/details/147155963

版权

05_python库专栏收录该内容

35 篇文章

订阅专栏

在这里插入图片描述重点说明，下载模型时需科学上网或者用国内huggingface镜像网站下载。

from faster_whisper import WhisperModel
from pydub import AudioSegment
from zhconv import convert  # 简繁转换库
import os
import datetime
import torch


def format_time(seconds):
    """将秒数格式化为SRT时间格式 (HH:MM:SS,mmm)"""
    td = datetime.timedelta(seconds=seconds)
    hours, remainder = divmod(int(td.total_seconds()), 3600)
    minutes, seconds = divmod(remainder, 60)
    milliseconds = int((td.microseconds / 1000))
    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"


def audio_to_srt_fast(audio_path, output_srt_path, model_size="small", device="cpu"):
    """
    高效本地音频转SRT（强制简体中文输出）
    :param audio_path: 音频文件路径
    :param output_srt_path: 输出SRT路径
    :param model_size: 模型大小 (tiny, base, small, medium, large)
    :param device: 运行设备 (cpu 或 cuda)
    """
    # 转换为16kHz WAV格式
    print("正在预处理音频...")
    audio = AudioSegment.from_file(audio_path)
    audio = audio.set_frame_rate(16000).set_channels(1)
    temp_wav = "temp_16k.wav"
    audio.export(temp_wav, format="wav")

    # 加载Whisper模型
    print(f"正在加载Whisper模型({model_size})...")
    try:
        model_path = "C:/whisper_models/faster-whisper-small"  # 替换为你的实际路径
        model = WhisperModel(model_path, device=device, compute_type="int8")
        # model = WhisperModel(model_size, device=device, compute_type="int8", local_files_only=False)

    except Exception as e:
        print(f"模型加载失败: {str(e)}")
        return False

    # 转录音频（强制指定简体中文）
    print("开始语音识别...")
    segments, info = model.transcribe(
        temp_wav,
        beam_size=5,
        language="zh",
        initial_prompt="请用简体中文转写以下内容"  # 重要提示词
    )

    print(f"检测到语言: {info.language}, 概率: {info.language_probability:.2f}")

    # 写入SRT文件（确保简体中文）
    with open(output_srt_path, 'w', encoding='utf-8') as srt_file:
        for i, segment in enumerate(segments, 1):
            start_time = format_time(segment.start)
            end_time = format_time(segment.end)
            text = convert(segment.text.strip(), 'zh-cn')  # 强制转换为简体

            srt_file.write(f"{i}\n")
            srt_file.write(f"{start_time} --> {end_time}\n")
            srt_file.write(f"{text}\n\n")

    # 清理临时文件
    os.remove(temp_wav)
    print(f"简体中文SRT生成完成: {output_srt_path}")


if __name__ == "__main__":
    # 测试配置
    audio_file = "c.mp3"  # 替换为您的音频文件
    output_srt = "c.srt"

    # 安装简繁转换库（如果尚未安装）
    try:
        import zhconv
    except ImportError:
        print("正在安装简繁转换库...")
        import subprocess

        subprocess.run(["pip", "install", "zhconv"])
        import zhconv

    # 选择模型大小（中文推荐small或以上）
    model_size = "small"

    # 选择设备
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"使用设备: {device}")

    print("=== 开始生成简体中文字幕 ===")
    audio_to_srt_fast(audio_file, output_srt, model_size, device)