wavlm
参考代码:
transformers中已经有封装:
https://huggingface.co/docs/transformers/model_doc/wavlm#transformers.WavLMForXVector
一个示例:
from transformers import WavLMConfig, WavLMModel
# Initializing a WavLM facebook/wavlm-base-960h style configuration
configuration = WavLMConfig()
# Initializing a model (with random weights) from the facebook/wavlm-base-960h style configuration
model = WavLMModel(configuration)
# Accessing the model configuration
configuration = model.config
from transformers import AutoProcessor, WavLMModel
import torch
from datasets import load_dataset
dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
dataset = dataset.sort("id")
sampling_rate = dataset.features["audio"].sampling_rate
processor = AutoProcessor.from_pretrained("patrickvonplaten/wavlm-libri-clean-100h-base-plus")
model = WavLMModel.from_pretrained("patrickvonplaten/wavlm-libri-clean-100h-base-plus")
# audio file is decoded on the fly
inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
last_hidden_states = outputs.last_hidden_state
list(last_hidden_states.shape)
预训练模型下载地址:
https://huggingface.co/models?other=wavlm