参考基因组下载和建立索引

参考基因组下载和建立索引

下载的小鼠基因组
cd ~/bioreference
mkdir -p  genome/mm10  && cd genome/mm10
nohup wget http://hgdownload.cse.ucsc.edu/goldenPath/mm10/bigZips/chromFa.tar.gz  &
tar zvfx chromFa.tar.gz
cat *.fa > mm10.fa
rm chr*.fa

下载hg19:
cd ~/bioreference
mkdir -p genome/hg19  && cd genome/hg19
nohup wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/chromFa.tar.gz &
tar zvfx chromFa.tar.gz
cat *.fa > hg19.fa
rm chr*.fa

下载hg38
cd ~/bioreference
mkdir -p genome/hg38  && cd genome/hg38
nohup wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz  &

bowtie软件建立索引文件
cd ~/bioreference
mkdir -p index/bowtie && cd index/bowtie
nohup time ~/biosoft/bowtie/bowtie2-2.2.9/bowtie2-build  ~/reference/genome/hg19/hg19.fa  ~/reference/index/bowtie/hg19 1>hg19.bowtie_index.log 2>&1 &
nohup time ~/biosoft/bowtie/bowtie2-2.2.9/bowtie2-build  ~/reference/genome/hg38/hg38.fa  ~/reference/index/bowtie/hg38 1>hg38.bowtie_index.log 2>&1 &
nohup time ~/biosoft/bowtie/bowtie2-2.2.9/bowtie2-build  ~/reference/genome/mm10/mm10.fa  ~/reference/index/bowtie/mm10 1>mm10.bowtie_index.log 2>&1 &


bwa软件建立索引文件

cd ~/bioreference
mkdir -p index/bwa && cd index/bwa
nohup time bwa index   -a bwtsw   -p ~/bioreference/index/bwa/hg19  ~/bioreference/genome/hg19/hg19.fa 1>hg19.bwa_index.log 2>&1   &
nohup time bwa index   -a bwtsw   -p ~/bioreference/index/bwa/hg38  ~/bioreference/genome/hg38/hg38.fa 1>hg38.bwa_index.log 2>&1   &
nohup time bwa index   -a bwtsw   -p ~/bioreference/index/bwa/mm10  ~/bioreference/genome/mm10/mm10.fa 1>mm10.bwa_index.log 2>&1   &


hisat软件建立索引文件
cd ~/reference
mkdir -p index/hisat && cd index/hisat
nohup wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/hg19.tar.gz  &
nohup wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/hg38.tar.gz  &
nohup wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/grcm38.tar.gz &
tar zxvf hg19.tar.gz
tar zxvf grcm38.tar.gz
tar zxvf hg38.tar.gz

常用注释文件

# human genome reference 
# md5sum hg19.fa 806c02398f5ac5da8ffd6da2d1d5d1a9
cd /media/yuansh/14THHD/reference
mkdir -p genome/hg19  && cd genome/hg19 
nohup wget -c http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.gz &

 
# md5sum hg38.fa 1c9dcaddfa41027f17cd8f7a82c7293b
cd /media/yuansh/14THHD/reference
mkdir -p genome/hg38  && cd genome/hg38 
nohup wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.fa.gz  &

# bowtie
cd /media/yuansh/14THHD/reference
mkdir -p index/bowtie && cd index/bowtie 
nohup time /media/yuansh/14THHD/biosoft/bowtie/bowtie2-2.2.9/bowtie2-build  /media/yuansh/14THHD/reference/genome/hg19/hg19.fa  /media/yuansh/14THHD/reference/index/bowtie/hg19 1>hg19.bowtie_index.log 2>&1 &
nohup time /media/yuansh/14THHD/biosoft/bowtie/bowtie2-2.2.9/bowtie2-build  /media/yuansh/14THHD/reference/genome/hg38/hg38.fa  /media/yuansh/14THHD/reference/index/bowtie/hg38 1>hg38.bowtie_index.log 2>&1 &
nohup time /media/yuansh/14THHD/biosoft/bowtie/bowtie2-2.2.9/bowtie2-build  /media/yuansh/14THHD/reference/genome/mm10/mm10.fa  /media/yuansh/14THHD/reference/index/bowtie/mm10 1>mm10.bowtie_index.log 2>&1 &

# bwa
cd /media/yuansh/14THHD/reference
mkdir -p index/bwa && cd index/bwa 
nohup time /media/yuansh/14THHD/biosoft/bwa/bwa-0.7.15/bwa index   -a bwtsw   -p /media/yuansh/14THHD/reference/index/bwa/hg19  /media/yuansh/14THHD/reference/genome/hg19/hg19.fa 1>hg19.bwa_index.log 2>&1   &
nohup time /media/yuansh/14THHD/biosoft/bwa/bwa-0.7.15/bwa index   -a bwtsw   -p /media/yuansh/14THHD/reference/index/bwa/hg38  /media/yuansh/14THHD/reference/genome/hg38/hg38.fa 1>hg38.bwa_index.log 2>&1   &
nohup time /media/yuansh/14THHD/biosoft/bwa/bwa-0.7.15/bwa index   -a bwtsw   -p /media/yuansh/14THHD/reference/index/bwa/mm10  /media/yuansh/14THHD/reference/genome/mm10/mm10.fa 1>mm10.bwa_index.log 2>&1   &

# hista
cd /media/yuansh/14THHD/reference
mkdir -p index/hisat && cd index/hisat 
nohup wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/hg19.tar.gz  &
nohup wget ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/data/hg38.tar.gz  &
tar zxvf hg19.tar.gz
tar zxvf hg38.tar.gz

  
  
mkdir -p /media/yuansh/14THHD/annotation/variation/human/ExAC
cd /media/yuansh/14THHD/reference/annotation/variation/human/ExAC
## http://exac.broadinstitute.org/
## ftp://ftp.broadinstitute.org/pub/ExAC_release/current
wget ftp://ftp.broadinstitute.org/pub/ExAC_release/release1/ExAC.r1.sites.vep.vcf.gz.tbi 
nohup wget ftp://ftp.broadinstitute.org/pub/ExAC_release/release1/ExAC.r1.sites.vep.vcf.gz &
wget ftp://ftp.broadinstitute.org/pub/ExAC_release/current/cnv/exac-final-cnv.gene.scores071316 
wget ftp://ftp.broadinstitute.org/pub/ExAC_release/current/cnv/exac-final.autosome-1pct-sq60-qc-prot-coding.cnv.bed
 
 

## https://www.ncbi.nlm.nih.gov/projects/SNP/
## ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b147_GRCh38p2/
## ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b147_GRCh37p13/

mkdir -p /media/yuansh/14THHD/annotation/variation/human/dbSNP/hg19_GRCH37
cd /media/yuansh/14THHD/annotation/variation/human/dbSNP/hg19_GRCH37

# md5sum vcf.gz  4f696650bbc5b34270a8a9727120a0e3
nohup wget -c ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/VCF/All_20180423.vcf.gz &
nohup wget -c ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/VCF/All_20180423.vcf.gz.md5
nohup wget -c ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh37p13/VCF/All_20180423.vcf.gz.tbi &

mkdir -p /media/yuansh/14THHD/annotation/variation/human/dbSNP/hg38_GRCH38
cd /media/yuansh/14THHD/annotation/variation/human/dbSNP/hg38_GRCH38
nohup wget -c ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/All_20180418.vcf.gz &
nohup wget -c ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/All_20180418.vcf.gz.md5 &
nohup wget -c ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/All_20180418.vcf.gz.tbi &



mkdir -p /media/yuansh/14THHD/annotation/variation/human/1000genomes
cd /media/yuansh/14THHD/annotation/variation/human/1000genomes 
## ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/ 
nohup wget  -c -r -nd -np -k -L -p  ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502 &
s
 
mkdir -p ~/annotation/variation/human/ESP6500
cd ~/annotation/variation/human/ESP6500
# http://evs.gs.washington.edu/EVS/
nohup wget http://evs.gs.washington.edu/evs_bulk_data/ESP6500SI-V2-SSA137.GRCh38-liftover.snps_indels.vcf.tar.gz & 
 
mkdir -p ~/annotation/variation/human/UK10K
cd ~/annotation/variation/human/UK10K
# http://www.uk10k.org/
nohup wget ftp://ngs.sanger.ac.uk/production/uk10k/UK10K_COHORT/REL-2012-06-02/UK10K_COHORT.20160215.sites.vcf.gz & 
 
mkdir -p ~/annotation/variation/human/gonl
cd ~/annotation/variation/human/gonl
## http://www.nlgenome.nl/search/
## https://molgenis26.target.rug.nl/downloads/gonl_public/variants/release5/
nohup wget  -c -r -nd -np -k -L -p  https://molgenis26.target.rug.nl/downloads/gonl_public/variants/release5  &
 
mkdir -p ~/annotation/variation/human/omin
cd ~/annotation/variation/human/omin
 
mkdir -p ~/annotation/variation/human/GWAS
cd ~/annotation/variation/human/GWAS
 
mkdir -p ~/annotation/variation/human/hapmap
cd ~/annotation/variation/human/hapmap
# ftp://ftp.ncbi.nlm.nih.gov/hapmap/
wget ftp://ftp.ncbi.nlm.nih.gov/hapmap/phase_3/relationships_w_pops_051208.txt 
nohup wget -c -r -np -k -L -p  -nd -A.gz ftp://ftp.ncbi.nlm.nih.gov/hapmap/phase_3/hapmap3_reformatted &
# ftp://ftp.hgsc.bcm.tmc.edu/pub/data/HapMap3-ENCODE/ENCODE3/ENCODE3v1/
wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/HapMap3-ENCODE/ENCODE3/ENCODE3v1/bcm-encode3-QC.txt 
wget ftp://ftp.hgsc.bcm.tmc.edu/pub/data/HapMap3-ENCODE/ENCODE3/ENCODE3v1/bcm-encode3-submission.txt.gz
 
## 1 million single nucleotide polymorphisms (SNPs) for DNA samples from each of the three ethnic groups in Singapore – Chinese, Malays and Indians.
## The Affymetrix Genome-Wide Human SNP Array 6.0   && The Illumina Human1M single BeadChip 
## http://www.statgen.nus.edu.sg/~SGVP/
## http://www.statgen.nus.edu.sg/~SGVP/singhap/files-website/samples-information.txt
# http://www.statgen.nus.edu.sg/~SGVP/singhap/files-website/genotypes/2009-01-30/QC/
 
## Singapore Sequencing Malay Project (SSMP) 
mkdir -p ~/annotation/variation/human/SSMP
cd ~/annotation/variation/human/SSMP
## http://www.statgen.nus.edu.sg/~SSMP/
## http://www.statgen.nus.edu.sg/~SSMP/download/vcf/2012_05 
 
## Singapore Sequencing Indian Project (SSIP) 
mkdir -p ~/annotation/variation/human/SSIP
cd ~/annotation/variation/human/SSIP
# http://www.statgen.nus.edu.sg/~SSIP/
## http://www.statgen.nus.edu.sg/~SSIP/download/vcf/dataFreeze_Feb2013
  
wget ftp://ftp.ensembl.org/pub/release-75/gtf/homo_sapiens/Homo_sapiens.GRCh37.75.gtf.gz 
wget ftp://ftp.ensembl.org/pub/release-86/gtf/homo_sapiens/Homo_sapiens.GRCh38.86.chr.gtf.gz 
 
mkdir -p ~/reference/gtf/gencode
cd  ~/reference/gtf/gencode
## https://www.gencodegenes.org/releases/current.html
wget ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/gencode.v25.2wayconspseudos.gtf.gz
wget ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/gencode.v25.long_noncoding_RNAs.gtf.gz 
wget ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/gencode.v25.polyAs.gtf.gz 
wget ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/gencode.v25.annotation.gtf.gz 
## https://www.gencodegenes.org/releases/25lift37.html 
wget ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/GRCh37_mapping/gencode.v25lift37.annotation.gtf.gz 
wget ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/GRCh37_mapping/gencode.v25lift37.metadata.HGNC.gz 
wget ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/GRCh37_mapping/gencode.v25lift37.metadata.EntrezGene.gz 
wget ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_25/GRCh37_mapping/gencode.v25lift37.metadata.RefSeq.gz 
 
mkdir -p ~/reference/gtf/ensembl/homo_sapiens_86
cd  ~/reference/gtf/ensembl/homo_sapiens_86
## http://asia.ensembl.org/info/data/ftp/index.html
 
cd ~/reference
mkdir -p  genome/GRCh38_reference_genome  && cd genome/GRCh38_reference_genome
# http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/ 

nohup wget http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/GRCh38_full_analysis_set_plus_decoy_hla.fa

wget http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/GRCh38_full_analysis_set_plus_decoy_hla.fa.fai
wget http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/GRCh38_full_analysis_set_plus_decoy_hla.dict
 
## ftp://ftp.broadinstitute.org/bundle/b37/
mkdir -p ~/annotation/GATK
cd ~/annotation/variation/GATK
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/b37/1000G_phase1.snps.high_confidence.b37.vcf.gz 
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/b37/dbsnp_138.b37.vcf.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/b37/human_g1k_v37.fasta.gz 
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/b37/NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.sites.vcf.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/b37/Mills_and_1000G_gold_standard.indels.b37.vcf.gz 
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/b37/hapmap_3.3.b37.vcf.gz
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/b37/1000G_phase1.indels.b37.vcf.gz 
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/b37/1000G_phase1.indels.b37.vcf.idx.gz
gunzip 1000G_phase1.indels.b37.vcf.idx.gz
gunzip 1000G_phase1.indels.b37.vcf.gz
    
mkdir -p  ~/institute/ENSEMBL/gtf
cd  ~/institute/ENSEMBL/gtf
wget ftp://ftp.ensembl.org/pub/release-87/gtf/homo_sapiens/Homo_sapiens.GRCh38.87.chr.gtf.gz 
wget ftp://ftp.ensembl.org/pub/release-87/gtf/mus_musculus/Mus_musculus.GRCm38.87.chr.gtf.gz
wget ftp://ftp.ensembl.org/pub/release-87/gtf/danio_rerio/Danio_rerio.GRCz10.87.chr.gtf.gz
  
cd ~/institute/TCGA/firehose
## https://gdac.broadinstitute.org/
wget http://gdac.broadinstitute.org/runs/stddata__2016_01_28/data/ACC/20160128/gdac.broadinstitute.org_ACC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_minus_germline_cnv_hg19__seg.Level_3.2016012800.0.0.tar.gz  -O ACC.gistic.seg.tar.gz
wget http://gdac.broadinstitute.org/runs/stddata__2016_01_28/data/ACC/20160128/gdac.broadinstitute.org_ACC.Merge_snp__genome_wide_snp_6__broad_mit_edu__Level_3__segmented_scna_hg19__seg.Level_3.2016012800.0.0.tar.gz  -O ACC.raw.seg.tar.gz 
wget http://gdac.broadinstitute.org/runs/stddata__2016_01_28/data/ACC/20160128/gdac.broadinstitute.org_ACC.Mutation_Packager_Calls.Level_3.2016012800.0.0.tar.gz -O ACC.maf.tar.gz
wget http://gdac.broadinstitute.org/runs/stddata__2016_01_28/data/ACC/20160128/gdac.broadinstitute.org_ACC.Mutation_Packager_Oncotated_Calls.Level_3.2016012800.0.0.tar.gz -O ACC.maf.anno.tar.gz

SSH 传输文件

在linux下一般用scp这个命令来通过ssh传输文件。

#1、从服务器上下载文件
scp username@servername:/path/filename /var/www/local_dir(本地目录)

 #例如scp root@192.168.0.101:/var/www/test.txt 把192.168.0.101上的/var/www/test.txt 的文件下载到/var/www/local_dir(本地目录)



#2、上传本地文件到服务器
scp /path/filename username@servername:/path  

#例如scp /Users/yuansh/Desktop/1.pdf yuansh@192.168.1.153:/mnt/c/Users/yuansh/Desktop

 

#3、从服务器下载整个目录
scp -r username@servername:/var/www/remote_dir/(远程目录) /var/www/local_dir(本地目录)

#例如:scp -r yaunsh@192.168.0.153:/mnt/c/Users/yuansh/Desktop/data/U /Users/yuansh/Desktop  

#4、上传目录到服务器
scp -r local_dir username@servername:remote_dir
#例如:scp -r test  root@192.168.0.101:/var/www/  把当前目录下的test目录上传到服务器的/var/www/ 目录

GATK软件安装

搜索github,自行下载最新版本

Releases · broadinstitute/gatk (github.com)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-K4AAXwTD-1635737746121)(…/image/image-20201211204436619.png)]

右键点击复制链接然后下载

cd biosoft
proxy_on # 开启终端代理
wget -c https://github.com/broadinstitute/gatk/releases/download/4.1.9.0/gatk-4.1.9.0.zip
# 因为我安装了zsh,如果是bash的话,请使用unzip
# 如果没有unzip,则先下载 
# sudo apt install unzip
x gatk-4.1.9.0.zip
mv gatk-4.1.9.0 gatk #改一下名字

#添加环境变量
vim ~/.zshrc 
export PATH=/home/yuansh/biosoft/gatk:$PATH
# 运行显示下面的东西则安装完毕
gatk

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-xN7mUJTM-1635737746126)(…/image/image-20201211210243279.png)]

  • 5
    点赞
  • 34
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值