hg19参考基因组这注释文件加上gtf
sed -E ‘s/^>([0-9]+|X|MT) dna:chromosome chromosome:GRCh37:(\1):1:([0-9]+)😦[0-9]+) REF/>chr\1 dna:chromosome chromosome:GRCh37:chr\1:\2:\3:\4 REF/’ hg19N.fa >hg19.fa
sed -i ‘s/>Y dna:chromosome chromosome:GRCh37:Y:2649521:59034049:1 REF/>chrY dna:chromosome chromosome:GRCh37:chrY:2649521:59034049:1 REF/’ hg19.fa
sed -E ‘s/^([0-9]+|X|Y|MT)/chr&/’ hg19N.gtf>hg19.gtf
grep -P ‘s/^>([0-9]+|X|MT)’ hg19N.gtf |tail
grep -P ‘^([0-9]+|X|Y|MT)+’ hg19N.gtf |tail
Linux shell字符串截取与拼接https://blog.csdn.net/u013485792/article/details/70241377?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522159801522019724846401658%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.%2522%257D&request_id=159801522019724846401658&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2allfirst_rank_ecpm_v3~pc_rank_v3-2-70241377.pc_ecpm_v3_pc_rank_v3&utm_term=linux%E6%88%AA%E5%8F%96%E8%B7%AF%E5%BE%84%E6%9C%80%E5%90%8E%E7%9A%84%E4%B8%80%E4%B8%AA%E5%AD%97%E6%AE%B5+substr&spm=1018.2118.3001.4187
加染色体号的三部曲::
加一列
for i in {1…18}
do
awk 'BEGIN{ FS=“\t”;OFS=“\t” } {if($3==“1”){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad > ./ontad/chr$i-1
awk 'BEGIN{ FS=“\t”;OFS=“\t” } {if($3!=“1” && $3!=“0” ){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad >./ontad/chr$i-2
done
for i in X Y
do
awk '{if($3==“1”){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad > ./ontad/chr$i-1
awk '{if($3!=“1” && $3!=“0” ){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad >./ontad/chr$i-2
done
加数字
for i in {1…18}
do
cat chri−1∣seds/chr/chri-1|sed s/chr/chri−1∣seds/chr/chri/>chri−1catchri-1
cat chri−1catchri-2|sed s/chr/chri/>chri/>chri/>chri-2
done
for i in X Y
do
cat chri−1∣seds/chr/chri-1|sed s/chr/chri−1∣seds/chr/chri/>chri−1catchri-1
cat chri−1catchri-2|sed s/chr/chri/>chri/>chri/>chri-2
done
综合起来
for i in {1…18}
do
awk 'BEGIN{ FS=“\t”;OFS=“\t” } {if($3==“1”){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad |cat| sed s/chr/chri/g>./ontad/chri/g > ./ontad/chri/g>./ontad/chri-1
awk 'BEGIN{ FS=“\t”;OFS=“\t” } {if($3!=“1” && $3!=“0” ){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad |cat|sed s/chr/chri/g>./ontad/chri/g>./ontad/chri/g>./ontad/chri-2
done
for i in X Y
do
awk '{if($3==“1”){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad|cat| sed s/chr/chri/g>./ontad/chri/g > ./ontad/chri/g>./ontad/chri-1
awk '{if($3!=“1” && $3!=“0” ){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad |cat|sed s/chr/chri/g>./ontad/chri/g>./ontad/chri/g>./ontad/chri-2
done
。。。。。。。。。。。。。。。。。。。。。。。。
###ls * 是死循环,一定要注意在文件夹内不要生成什么文件,或者用ls **???的具体代替。
for i in ls ontad/*
do
i=${i##*/}
echo imkdir−p./ou/i
mkdir -p ./ou/imkdir−p./ou/i
mkdir -p ./ontadsort
sort-bed ontad/i>ontadsort/i>ontadsort/i>ontadsort/i.sort
for j in ls K562_TF_chip-seq_no_rep/*
do
j=${j##*/}
echo $j
mkdir -p ./K562_TF_chip-seq_no_repsort
awk 'BEGIN{FS=" “;OFS=”\t"}{$1=$1;printKaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲' K562_TF_chip…j > K562_TF_chip-seq_no_repsort/j.tmpsort−bedK562TFchip−seqnorepsort/j.tmp
sort-bed K562_TF_chip-seq_no_repsort/j.tmpsort−bedK562TFchip−seqnorepsort/j.tmp>K562_TF_chip-seq_no_repsort/j.sortbedops−e1ontadsort/j.sort
bedops -e 1 ontadsort/j.sortbedops−e1ontadsort/i.sort K562_TF_chip-seq_no_repsort/j.sort>./ou/j.sort > ./ou/j.sort>./ou/i/KaTeX parse error: Expected group after '_' at position 4: {i}_̲{j}
done
done
本文详细介绍了如何使用Linux shell脚本对hg19参考基因组进行注释和处理,包括对基因组FASTA文件和GTF文件的编辑,以及如何通过awk和sed命令添加染色体编号前缀。
185

被折叠的 条评论
为什么被折叠?



