适配过程
# 使用寒武纪官网下载的pytorch镜像起容器,进入容器后
# git代码
git clone https://github.com/SYSTRAN/faster-whisper.git
# 模型转换
cd /whisper
python /torch/src/catch/tools/torch_gpu2mlu/torch_gpu2mlu.py -i faster_whisper/
# 获得faster_whisper_mlu代码文件夹
下载asteroid-filterbanks代码
git clone https://github.com/asteroid-team/asteroid-filterbanks.git
python /torch/src/catch/tools/torch_gpu2mlu/torch_gpu2mlu.py -i asteroid-filterbanks/
cd astroid-filterbanks_mlu
# 注释掉requirements.txt中的# torch>=1.8.0
pip install -e .
下载与torch对应版本的lightning
对应版本可以查看lightning官网Versioning Policy — PyTorch Lightning 2.4.0 documentation 进行查看
pip install pytorch-lightning==2.3.0
下载pytorch_metric_learning代码
git clone -b v2.5.0 https://gitclone.com/github.com/KevinMusgrave/pytorch-metric-learning.git
python /torch/src/catch/tools/torch_gpu2mlu/torch_gpu2mlu.py -i pytorch-metric-learning/
cd pytorch-metric-learning_mlu
# 注释掉requirements.txt中的# torch>=1.8.0
pip install -e .
安装tochaudio
# 因为使用的torch镜像为2.3的,安装指定的torchaudio版本
pip install torchaudio==2.3.0
下载speechbrain代码
git clone -b v0.5.14 https://gitclone.com/github.com/speechbrain/speechbrain.git
python /torch/src/catch/tools/torch_gpu2mlu/torch_gpu2mlu.py -i speechbrain/
cd speechbrain_mlu/
# 注释掉requirements.txt中的# torch>=1.8.# torchaudio>=0.9.0,<2.0
pip install -e .
下载pyannote-audio代码
git clone https://github.com/github.com/pyannote/pyannote-audio.git
python /torch/src/catch/tools/torch_gpu2mlu/torch_gpu2mlu.py -i pyannote-audio/
cd pyannote-audio_mlu
安装faster_whisper的依赖
cd faster_whisper_mlu
pip install -e .
下载模型
# 这里以faster-whisper-small为例
export HF_ENDPOINT=https://hf-mirror.com
huggingface-cli download --resume-download guillaumekln/faster-whisper-small --local-dir faster-whisper-small
编写测试脚本
from faster_whisper import WhisperModel
model_size = "large-v3"
path = r"/workspace_faster_whisper/model/faster-whisper-small/"
# Run on GPU with FP16
model = WhisperModel(model_size_or_path=path, compute_type="float32", local_files_only=True)
# or run on GPU with INT8
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
# or run on CPU with INT8
# model = WhisperModel(model_size, device="cpu", compute_type="int8")
segments, info = model.transcribe("/workspace_faster_whisper/audio/audio.wav", beam_size=5, language="zh", vad_filter=True, vad_parameters=dict(min_silence_duration_ms=1000))
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
for segment in segments:
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
注意
值得注意的是一定要注意下载faster-whisper与torch和各个依赖之间的版本对应关系
测试实时转录asr_voice_api.py
安装依赖
pip install fastapi
pip install opencc
# 使用的模型是faster-whisper-small
本地端口映射
# 本地cmd输入,之后不要关闭cmd窗口
ssh -L port:localhost:port user@服务ip地址
适配过程中遇到的error
Traceback (most recent call last):
File "/workspace_faster_whisper/test.py", line 16, in <module>
segments, info = model.transcribe("/workspace_faster_whisper/audio/audio.wav")
File "/workspace_faster_whisper/faster-whisper_mlu/faster_whisper/transcribe.py", line 928, in transcribe
encoder_output = self.encode(segment)
File "/workspace_faster_whisper/faster-whisper_mlu/faster_whisper/transcribe.py", line 1423, in encode
features = get_ctranslate2_storage(features)
File "/workspace_faster_whisper/faster-whisper_mlu/faster_whisper/transcribe.py", line 2099, in get_ctranslate2_storage
segment = ctranslate2.StorageView.from_array(
ValueError: Object does not implement the array interface
# 格式不对
# 打开./faster-whisper_mlu/faster_whisper/transcribe.py
# 修改2100行为
segment.cpu().numpy() if segment.is_mlu else segment.numpy()