1.deepsignal是基于Python3构建的。
conda create -n deepsignalenv python=3.6 #创建python版本为3.6的deepsignalenv的虚拟环境
conda activate deepsignalenv #进入虚拟环境
git clone https://github.com/bioinfomaticsCSU/deepsignal.git #从GitHub上下载deepsignal
cd deepsignal
pip install deepsignal #安装deepsignal
pip install ont-tombo #安装tombo
conda deactivate #退出虚拟环境
2.软件参数。
deepsignal -h
usage: deepsignal [-h] {extract,call_mods,train} ...
detecting base modifications from Nanopore sequencing reads, deepsignal contains three modules:
deepsignal extract: extract features from corrected (tombo) fast5s for training or testing
deepsignal call_mods: call modifications
deepsignal train: train a model, need two independent datasets for training and validating
optional arguments:
-h, --help show this help message and exit
modules:
{extract,call_mods,train}
deepsignal modules, use -h/--help for help
3. 示例。
1.re-squiggle
# cmd: tombo resquiggle $fast5_dir $reference_fa
tombo resquiggle fast5s.al GCF_000146045.2_R64_genomic.fna --processes 10 --corrected-group RawGenomeCorrected_001 --basecall-group Basecall_1D_000 --overwrite
2.extract features
deepsignal extract --fast5_dir fast5s.al/ --reference_path GCF_000146045.2_R64_genomic.fna --write_path fast5s.al.CpG.signal_features.17bases.rawsignals_360.tsv --corrected_group RawGenomeCorrected_001 --nproc 10
3.call modifications
# the CpGs are called by using the CpG model of HX1 R9.4 1D
deepsignal call_mods --input_path fast5s.al.CpG.signal_features.17bases.rawsignals_360.tsv --model_path model.CpG.R9.4_1D.human_hx1.bn17.sn360/bn_17.sn_360.epoch_7.ckpt --result_file fast5s.al.CpG.call_mods.tsv --nproc 10 --is_gpu no
deepsignal call_mods --input_path fast5s.al/ --model_path model.CpG.R9.4_1D.human_hx1.bn17.sn360/bn_17.sn_360.epoch_7.ckpt --result_file fast5s.al.CpG.call_mods.tsv --reference_path GCF_000146045.2_R64_genomic.fna --corrected_group RawGenomeCorrected_001 --nproc 10 --is_gpu no
python /path/to/deepsignal/scripts/call_modification_frequency.py --input_path fast5s.al.CpG.call_mods.tsv --result_file fast5s.al.CpG.call_mods.frequency.tsv --prob_cf 0
4. train
# need two independent datasets for training and validating
# use deepsignal train -h/--help for more details
deepsignal train --train_file /path/to/train_data/file --valid_file /path/to/valid_data/file --model_dir /dir/to/save/the/new/model