0. 下载数据
使用多线程下载器下载
apt-get update -y
apt-get install aria2 -y
开始下载数据(-s 16表示开启16个线程,-x 16表示每个url分配16个线程)
下载到/root/autodl-tmp/3D-Speaker/egs/cnceleb/sv-cam++/data/download_data
目录中
aria2c -s 16 -x 16 -c https://openslr.elda.org/resources/17/musan.tar.gz
aria2c -s 16 -x 16 -c https://us.openslr.org/resources/28/rirs_noises.zip
aria2c -s 16 -x 16 -c https://www.openslr.org/resources/82/cn-celeb_v2.tar.gz
aria2c -s 16 -x 16 -c https://www.openslr.org/resources/82/cn-celeb2_v2.tar.gzaa
aria2c -s 16 -x 16 -c https://www.openslr.org/resources/82/cn-celeb2_v2.tar.gzab
aria2c -s 16 -x 16 -c https://www.openslr.org/resources/82/cn-celeb2_v2.tar.gzac
注意后面三个是分块下载的,下载完成之后需要合并在一起。
cat cn-celeb2_v2.tar.gza* >cn-celeb2_v2.tar.gz
如果你使用的python
版本是3.12
,那么按照依赖的时候会报下面错误,这是因为python
版本过高的原因,使用python>=3.8 && python<=3.11
。
root@autodl-container-b3e54da89e-80ab5eb3:~/autodl-tmp/3D-Speaker# pip install -r requirements.txt
Looking in indexes: http://mirrors.aliyun.com/pypi/simple
Requirement already satisfied: tqdm>=4.42.0 in /root/miniconda3/lib/python3.12/site-packages (from -r requirements.txt (line 1)) (4.66.2)
Collecting scipy>=1.7.0 (from -r requirements.txt (line 2))
Downloading http://mirrors.aliyun.com/pypi/packages/c0/53/eaada1a414c026673eb983f8b4a55fe5eb172725d33d62c1b21f63ff6ca4/scipy-1.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.3 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 37.3/37.3 MB 10.3 MB/s eta 0:00:00
Collecting numpy<1.24,>=1.20.0 (from -r requirements.txt (line 3))
Using cached http://mirrors.aliyun.com/pypi/packages/42/38/775b43da55fa7473015eddc9a819571517d9a271a9f8134f68fb9be2f212/numpy-1.23.5.tar.gz (10.7 MB)
Installing build dependencies ... done
Getting requirements to build wheel ... error
error: subprocess-exited-with-error
× Getting requirements to build wheel did not run successfully.
│ exit code: 1
╰─> [33 lines of output]
Traceback (most recent call last):
File "/root/miniconda3/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 353, in <module>
main()
File "/root/miniconda3/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 335, in main
json_out['return_val'] = hook(**hook_input['kwargs'])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniconda3/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 112, in get_requires_for_build_wheel
backend = _build_backend()
^^^^^^^^^^^^^^^^
File "/root/miniconda3/lib/python3.12/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 77, in _build_backend
obj = import_module(mod_path)
^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniconda3/lib/python3.12/importlib/__init__.py", line 90, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "<frozen importlib._bootstrap>", line 1387, in _gcd_import
File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
File "<frozen importlib._bootstrap>", line 1310, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "<frozen importlib._bootstrap>", line 1387, in _gcd_import
File "<frozen importlib._bootstrap>", line 1360, in _find_and_load
File "<frozen importlib._bootstrap>", line 1331, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 935, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 995, in exec_module
File "<frozen importlib._bootstrap>", line 488, in _call_with_frames_removed
File "/tmp/pip-build-env-vkyrguzl/overlay/lib/python3.12/site-packages/setuptools/__init__.py", line 16, in <module>
import setuptools.version
File "/tmp/pip-build-env-vkyrguzl/overlay/lib/python3.12/site-packages/setuptools/version.py", line 1, in <module>
import pkg_resources
File "/tmp/pip-build-env-vkyrguzl/overlay/lib/python3.12/site-packages/pkg_resources/__init__.py", line 2172, in <module>
register_finder(pkgutil.ImpImporter, find_on_path)
^^^^^^^^^^^^^^^^^^^
AttributeError: module 'pkgutil' has no attribute 'ImpImporter'. Did you mean: 'zipimporter'?
[end of output]
note: This error originates from a subprocess, and is likely not a problem with pip.
error: subprocess-exited-with-error
× Getting requirements to build wheel did not run successfully.
│ exit code: 1
╰─> See above for output.
note: This error originates from a subprocess, and is likely not a problem with pip.
1. 创建环境
git clone https://github.com/modelscope/3D-Speaker.git && cd 3D-Speaker
conda create -n 3D-Speaker python=3.8
conda activate 3D-Speaker
pip install -r requirements.txt
注意这里我把requirements.txt
中的torch
和torchaudio
删了,需要执行下面命令安装。
pip3 install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 -f https://mirrors.aliyun.com/pytorch-wheels/cu118
pip install scipy
2. 修改配置
根据自己租的显卡数量来修改run.sh
脚本,并且修改conf/cam++.yaml
配置文件。
下面是run.sh
脚本。
#!/bin/bash
# Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
set -e
. ./path.sh || exit 1
stage=1
stop_stage=5
data=data
exp=exp
exp_name=cam++
gpus="0 1 2 3 4 5"
. utils/parse_options.sh || exit 1
exp_dir=$exp/$exp_name
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# In this stage we prepare the raw datasets, including CNCeleb1 and CNCeleb2.
echo "Stage1: Preparing CN-Celeb dataset..."
./local/prepare_data_cncb.sh --stage 1 --stop_stage 4 --data ${data}
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# In this stage we prepare the data index files for training.
echo "Stage2: Preparing training data index files..."
python local/prepare_data_csv.py --data_dir $data/cnceleb_train
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# Train the speaker embedding model.
echo "Stage3: Training the speaker model..."
num_gpu=$(echo $gpus | awk -F ' ' '{print NF}')
torchrun --nproc_per_node=$num_gpu --master_port=29501 speakerlab/bin/train.py --config conf/cam++.yaml --gpu $gpus \
--data $data/cnceleb_train/train.csv --noise $data/musan/wav.scp --reverb $data/rirs/wav.scp --exp_dir $exp_dir
fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
# Extract embeddings of test datasets.
echo "Stage4: Extracting speaker embeddings..."
nj=12
torchrun --nproc_per_node=$nj --master_port=29501 speakerlab/bin/extract.py --exp_dir $exp_dir \
--data $data/cnceleb_test/wav.scp --use_gpu --gpu $gpus
fi
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
# Output score metrics.
echo "Stage5: Computing score metrics..."
trials="$data/cnceleb_test/trials"
python speakerlab/bin/compute_score_metrics.py --enrol_data $exp_dir/embeddings --test_data $exp_dir/embeddings \
--scores_dir $exp_dir/scores --trials $trials
fi
下面是conf/cam++.yaml
配置文件。
# Training config
# inputs
data:
noise:
reverb:
# outputs
exp_dir:
# basic
num_epoch: 75
save_epoch_freq: 5
log_batch_freq: 100
wav_len: 3.0 # duration(s) for each training sample.
sample_rate: 16000
aug_prob: 0.8
speed_pertub: True
lr: 0.2
min_lr: 0.00005
# dataloader
batch_size: 1024
num_workers: 16
# model
fbank_dim: 80
embedding_size: 512
num_classes: 2793
wav_reader:
obj: speakerlab.process.processor.WavReader
args:
duration: <wav_len>
sample_rate: <sample_rate>
speed_pertub: <speed_pertub>
label_encoder:
obj: speakerlab.process.processor.SpkLabelEncoder
args:
data_file: <data>
feature_extractor:
obj: speakerlab.process.processor.FBank
args:
n_mels: <fbank_dim>
sample_rate: <sample_rate>
mean_nor: True
augmentations:
obj: speakerlab.process.processor.SpkVeriAug
args:
aug_prob: <aug_prob>
noise_file: <noise>
reverb_file: <reverb>
preprocessor:
wav_reader: <wav_reader>
label_encoder: <label_encoder>
augmentations: <augmentations>
feature_extractor: <feature_extractor>
epoch_counter:
obj: speakerlab.utils.epoch.EpochCounter
args:
limit: <num_epoch>
dataset:
obj: speakerlab.dataset.dataset.WavSVDataset
args:
data_file: <data>
preprocessor: <preprocessor>
dataloader:
obj: torch.utils.data.DataLoader
args:
dataset: <dataset>
batch_size: <batch_size>
num_workers: <num_workers>
pin_memory: True
drop_last: True
embedding_model:
obj: speakerlab.models.campplus.DTDNN.CAMPPlus
args:
feat_dim: <fbank_dim>
embedding_size: <embedding_size>
classifier:
obj: speakerlab.models.campplus.classifier.CosineClassifier
args:
input_dim: <embedding_size>
out_neurons: <num_classes>
optimizer:
obj: torch.optim.SGD
args:
params:
lr: <lr>
momentum: 0.9
nesterov: True
weight_decay: 0.0001
lr_scheduler:
obj: speakerlab.process.scheduler.WarmupCosineScheduler
args:
optimizer: <optimizer>
min_lr: <min_lr>
max_lr: <lr>
warmup_epoch: 5
fix_epoch: <num_epoch>
step_per_epoch:
loss:
obj: speakerlab.loss.margin_loss.ArcMarginLoss
args:
scale: 32.0
margin: 0.3
easy_margin: False
margin_scheduler:
obj: speakerlab.process.scheduler.MarginScheduler
args:
criterion: <loss>
initial_margin: 0.0
final_margin: 0.3
increase_start_epoch: 20
fix_epoch: 50
step_per_epoch:
checkpointer:
obj: speakerlab.utils.checkpoint.Checkpointer
args:
checkpoints_dir: <exp_dir>/models
recoverables:
embedding_model: <embedding_model>
classifier: <classifier>
epoch_counter: <epoch_counter>
3. 训练
训练之前需要安装sox
。
sudo apt-get install sox libsox-dev -y
执行./run.sh
。
4. 其它
点击这里访问我的博客。
如需有偿微调欢迎联系我。