AI生物信息分析策略与项目实战@2024——Day4【T2T基因组组装分析流程】

利用之前已经封装好环境的bioi_t2t_v1.tar镜像进行下列分析

数据准备

数据存放在:
/home/ATCG03/day3_t2t/T2T/data

请添加图片描述

0.0 Kmer分析

  1. 使用jellyfish对二代/HiFi测序数据进行kmer分析
workdir=/work/T2T/myt2t
datadir=/work/T2T/data
mkdir $workdir/0.kmer
cd $workdir/0.kmer

以二代测序数据为例,生成reads解压脚本

ls $datadir/illu_*.gz | awk '{print "gzip -dc "$0 }' > generate.file
#进行kmer分析
jellyfish count -t 4 -C -m 21 -s 1G -g generate.file -G 2 -o Kmer_21
#统计kmer数目
jellyfish stats -o Kmer_21.stat Kmer_21
#生成kmer统计频数表
jellyfish histo -v -t 4 -h 10000000 -o Kmer_21.histo Kmer_21

  1. 基因组大小估计计数
genomescope.R -i Kmer_21.histo -o genoscope_out -p 2 -k 21 -m 10000000

0.1 ONF和HiFi数据组装合并/混合组装

方法一:ONF和HiFi数据分别组装后合并
1.HiFi基因组组装

#路径设定
workdir=/work/T2T/myt2t
datadir=/work/T2T/data
#HiFi基因组组装
mkdir $workdir/1.1_hifi_assembly_hifiasm
cd $workdir/1.1_hifi_assembly_hifiasm
hifiasm -o hifi.asm -t 8 $datadir/hifi.fastq.gz && awk '/^S{print ">"$2;print $3} ' hifi.asm.bp.p_ctg.gfa > hifi.fa
#结果统计
[root@b7395b3b4b89  18:55:50 /work/T2T/myt2t/1.1_hifi_assembly_hifiasm]# assembly-stats hifi.fa
stats for hifi.fa
sum = 0, n = 0, ave = 0.00, largest = 0
N50 = 0, n = 0
N60 = 0, n = 0
N70 = 0, n = 0
N80 = 0, n = 0
N90 = 0, n = 0
N100 = 0, n = 0
N_count = 0
Gaps = 0
  1. nextdenovo软件组装ONT基因组
mkdir $workdir/1.2_ONT_assembly_nextdenovo
cd $workdir/1.2_ONT_assembly_nextdenovo
find $datadir -name ont.fastq.gz > input.fofn
#导入输入文件

配置.cfg文件,命名为run.cfg

[General]
job_type = local
#local,slurm,sge,pbs,lsf
job_prefix = nextDenovo
task = all 
#all,correct,assemble
rewrite = yes
#yes/no
deltmp = yes
#删除临时文件
parallel_jobs = 2
#并行任务数,默认10
input_type = raw
#raw,corrected
read_type =ont
#clr,ont,hifi
input_fofn =input.fofn
#reads文件路径列表
workdir = out
#指定输出路径

[correct_option]
read_cutoff = 5k
#长度大于5k才会被组装
genome_size = 146m
#基因组预估大小
pa_correction = 4
#correct步骤并行任务数,内存
sort_options = -m 2g -t 5 
minimap2_options_raw = -t 5
correction_options = -p 5

[assemble_option]
minimap2_options_cns = -t 5
nextgraphoptions = -a 1
nextDenovo run.cfg
  1. 组装后的ONT数据的nextPolish
mkdir $workdir/1.3_ONT_polish_nextpolish
cd $workdir/1.3_ONT_polish_nextpolish
#创建软链接
ln -s ../1.2_ONT_assembly_nextdenovo/out/03.ctg_graph/nd.asm.fasta contig.fa

#利用hifi数据,ont数据,illunima数据polish
find $datadir -name hifi.fastq.gz > ./hifi.fofn
find $datadir -name ont.fastq.gz > ./lgs.fofn
find $datadir -name illu_\* > ./sgs.fofn
#运行nextPolish

配置.cfg文件,命名为run.cfg

[General]
job_type = local
#运行环境,可选local,rm,sge,pbs;
job_prefix = nextPolish
task = best 
rewrite = yes
rerun = 3
parallel_jobs = 6
#并行任务数,默认10
multithread_jobs = 5
genome = contig.fa
genome_size = auto
workdir = ./output
#指定输出路径
polish_options = -p {multithread jobs}

{sgs_option}
sgs_fofn = ./sgs.fofn
sgs_options = -max_depth 100 -bwa

[lgs option]
lgs_fofn = ./lgs.fofn
lgs_options = -min_read_len 1k -max_depth 100
lgs_minimap2_options = -x map-ont -t
{multithread_jobs}

[hifi option]
hifi_fofn = ./hifi.fofn
hifi_options = -min_read_len 1k -max_depth 100
hifi_minimap2_options = -x map-hifi
#运行nextPolish
nextPolish run.cfg
  1. Polish后的ONTHiFi数据合并

未完待续!!!

方法二:ONTHiFi原始数据混合组装

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值