人类和小鼠转录组上游分析

基础软件

conda install cutadapt, trimmomatic, samtools, hisat2, subread, deeptools -y

人类转录组上游分析

# 样本名称
sample_name=sample

# 线程
threads=4

# 双端测序原始fastq1和fastq2路径
fastq1_path=/path/${sample_name}_1.fq.gz
fastq2_path=/path/${sample_name}_2.fq.gz

# 双端测序clean fastq1和fastq2路径
clean_fastq1_path=/path/${sample_name}_clean_1.fq.gz
clean_fastq2_path=/path/${sample_name}_clean_2.fq.gz

# 双端测序clean filter fastq1和fastq2路径
filter_fastq_path=/path/${sample_name}_filter.fq.gz
filter_fastq1_path=/path/${sample_name}_filter_1.fq.gz
filter_fastq2_path=/path/${sample_name}_filter_2.fq.gz

# 输出SAM文件路径
sam_path=/path/${sample_name}.sam

# 输出BAM文件路径
bam_path=/path/${sample_name}.bam
bam_sorted_path=/path/${sample_name}.sorted.bam

# 输出结果文件
bam_sorted_bw_path=/path/${sample_name}.sorted.bam.bw
count_path=/path/${sample_name}.count

###########################################
# 去接头
cutadapt --pair-filter=any --minimum-length 15 --max-n 8 \
-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \
-o $clean_fastq1_path -p $clean_fastq2_path \
$fastq1_path $fastq2_path 

# 去除低质量碱基
trimmomatic PE -threads $threads -phred33 \
$clean_fastq1_path $clean_fastq2_path \
-baseout $filter_fastq_path AVGQUAL:20 SLIDINGWINDOW:4:15 MINLEN:15

# 比对到参考基因组
hisat2 --threads $threads -x /reference/hisat2/hg19 \
-1 $filter_fastq1_path -2 $filter_fastq1_path \
-S $sam_path

# SAM转换为BAM
samtools view -bS $sam_path -@ $threads| \
samtools sort $bam_path -o $bam_sorted_path -@ $threads

# 排序和索引BAM文件
samtools index $bam_path -@ $threads

# featureCounts
featureCounts -T 30 -t exon -g gene_id \
-a /reference/hg19/Homo_sapiens.GRCh37.75.gtf \
-o $count_path $bam_sorted_path

# 生成bamCoverage
bamCoverage --bam $bam_sorted_path -o $bam_sorted_bw_path \
--binSize 10 -p $threads

小鼠转录组上游分析

参考基因组和GTF注释文件替换为小鼠mm10。

# 样本名称
sample_name=sample

# 线程
threads=4

# 双端测序原始fastq1和fastq2路径
fastq1_path=/path/${sample_name}_1.fq.gz
fastq2_path=/path/${sample_name}_2.fq.gz

# 双端测序clean fastq1和fastq2路径
clean_fastq1_path=/path/${sample_name}_clean_1.fq.gz
clean_fastq2_path=/path/${sample_name}_clean_2.fq.gz

# 双端测序clean filter fastq1和fastq2路径
filter_fastq_path=/path/${sample_name}_filter.fq.gz
filter_fastq1_path=/path/${sample_name}_filter_1.fq.gz
filter_fastq2_path=/path/${sample_name}_filter_2.fq.gz

# 输出SAM文件路径
sam_path=/path/${sample_name}.sam

# 输出BAM文件路径
bam_path=/path/${sample_name}.bam
bam_sorted_path=/path/${sample_name}.sorted.bam

# 输出结果文件
bam_sorted_bw_path=/path/${sample_name}.sorted.bam.bw
count_path=/path/${sample_name}.count

###########################################
# 去接头
cutadapt --pair-filter=any --minimum-length 15 --max-n 8 \
-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \
-o $clean_fastq1_path -p $clean_fastq2_path \
$fastq1_path $fastq2_path 

# 去除低质量碱基
trimmomatic PE -threads $threads -phred33 \
$clean_fastq1_path $clean_fastq2_path \
-baseout $filter_fastq_path AVGQUAL:20 SLIDINGWINDOW:4:15 MINLEN:15

# 比对到参考基因组
hisat2 --threads $threads -x /reference/hisat2/mm10 \
-1 $filter_fastq1_path -2 $filter_fastq1_path \
-S $sam_path

# SAM转换为BAM
samtools view -bS $sam_path -@ $threads| \
samtools sort $bam_path -o $bam_sorted_path -@ $threads

# 排序和索引BAM文件
samtools index $bam_path -@ $threads

# featureCounts
featureCounts -T 30 -t exon -g gene_id \
-a /reference/mm10/Mus_musculus.GRCm38.102.gtf  \
-o $count_path $bam_sorted_path

# 生成bamCoverage
bamCoverage --bam $bam_sorted_path -o $bam_sorted_bw_path \
--binSize 10 -p $threads
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

生信与基因组学

每一份鼓励是我坚持下去动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值