hg19参考基因组这注释文件加上gtf
sed -E ‘s/^>([0-9]+|X|MT) dna:chromosome chromosome:GRCh37:(\1):1:([0-9]+)😦[0-9]+) REF/>chr\1 dna:chromosome chromosome:GRCh37:chr\1:\2:\3:\4 REF/’ hg19N.fa >hg19.fa
sed -i ‘s/>Y dna:chromosome chromosome:GRCh37:Y:2649521:59034049:1 REF/>chrY dna:chromosome chromosome:GRCh37:chrY:2649521:59034049:1 REF/’ hg19.fa
sed -E ‘s/^([0-9]+|X|Y|MT)/chr&/’ hg19N.gtf>hg19.gtf
grep -P ‘s/^>([0-9]+|X|MT)’ hg19N.gtf |tail
grep -P ‘^([0-9]+|X|Y|MT)+’ hg19N.gtf |tail
Linux shell字符串截取与拼接https://blog.csdn.net/u013485792/article/details/70241377?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522159801522019724846401658%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.%2522%257D&request_id=159801522019724846401658&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2allfirst_rank_ecpm_v3~pc_rank_v3-2-70241377.pc_ecpm_v3_pc_rank_v3&utm_term=linux%E6%88%AA%E5%8F%96%E8%B7%AF%E5%BE%84%E6%9C%80%E5%90%8E%E7%9A%84%E4%B8%80%E4%B8%AA%E5%AD%97%E6%AE%B5+substr&spm=1018.2118.3001.4187
加染色体号的三部曲::
加一列
for i in {1…18}
do
awk 'BEGIN{ FS=“\t”;OFS=“\t” } {if($3==“1”){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad > ./ontad/chr$i-1
awk 'BEGIN{ FS=“\t”;OFS=“\t” } {if($3!=“1” && $3!=“0” ){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad >./ontad/chr$i-2
done
for i in X Y
do
awk '{if($3==“1”){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad > ./ontad/chr$i-1
awk '{if($3!=“1” && $3!=“0” ){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad >./ontad/chr$i-2
done
加数字
for i in {1…18}
do
cat chr
i
−
1
∣
s
e
d
s
/
c
h
r
/
c
h
r
i-1|sed s/chr/chr
i−1∣seds/chr/chri/>chr
i
−
1
c
a
t
c
h
r
i-1 cat chr
i−1catchri-2|sed s/chr/chr
i
/
>
c
h
r
i/>chr
i/>chri-2
done
for i in X Y
do
cat chr
i
−
1
∣
s
e
d
s
/
c
h
r
/
c
h
r
i-1|sed s/chr/chr
i−1∣seds/chr/chri/>chr
i
−
1
c
a
t
c
h
r
i-1 cat chr
i−1catchri-2|sed s/chr/chr
i
/
>
c
h
r
i/>chr
i/>chri-2
done
综合起来
for i in {1…18}
do
awk 'BEGIN{ FS=“\t”;OFS=“\t” } {if($3==“1”){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad |cat| sed s/chr/chr
i
/
g
>
.
/
o
n
t
a
d
/
c
h
r
i/g > ./ontad/chr
i/g>./ontad/chri-1
awk 'BEGIN{ FS=“\t”;OFS=“\t” } {if($3!=“1” && $3!=“0” ){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad |cat|sed s/chr/chr
i
/
g
>
.
/
o
n
t
a
d
/
c
h
r
i/g>./ontad/chr
i/g>./ontad/chri-2
done
for i in X Y
do
awk '{if($3==“1”){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad|cat| sed s/chr/chr
i
/
g
>
.
/
o
n
t
a
d
/
c
h
r
i/g > ./ontad/chr
i/g>./ontad/chri-1
awk '{if($3!=“1” && $3!=“0” ){print “chr”,KaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲}' OnTAD_NC_p…i.tad |cat|sed s/chr/chr
i
/
g
>
.
/
o
n
t
a
d
/
c
h
r
i/g>./ontad/chr
i/g>./ontad/chri-2
done
。。。。。。。。。。。。。。。。。。。。。。。。
###ls * 是死循环,一定要注意在文件夹内不要生成什么文件,或者用ls **???的具体代替。
for i in ls ontad/*
do
i=${i##*/}
echo
i
m
k
d
i
r
−
p
.
/
o
u
/
i mkdir -p ./ou/
imkdir−p./ou/i
mkdir -p ./ontadsort
sort-bed ontad/
i
>
o
n
t
a
d
s
o
r
t
/
i>ontadsort/
i>ontadsort/i.sort
for j in ls K562_TF_chip-seq_no_rep/*
do
j=${j##*/}
echo $j
mkdir -p ./K562_TF_chip-seq_no_repsort
awk 'BEGIN{FS=" “;OFS=”\t"}{$1=$1;printKaTeX parse error: Expected 'EOF', got '}' at position 2: 0}̲' K562_TF_chip…j > K562_TF_chip-seq_no_repsort/
j
.
t
m
p
s
o
r
t
−
b
e
d
K
56
2
T
F
c
h
i
p
−
s
e
q
n
o
r
e
p
s
o
r
t
/
j.tmp sort-bed K562_TF_chip-seq_no_repsort/
j.tmpsort−bedK562TFchip−seqnorepsort/j.tmp>K562_TF_chip-seq_no_repsort/
j
.
s
o
r
t
b
e
d
o
p
s
−
e
1
o
n
t
a
d
s
o
r
t
/
j.sort bedops -e 1 ontadsort/
j.sortbedops−e1ontadsort/i.sort K562_TF_chip-seq_no_repsort/
j
.
s
o
r
t
>
.
/
o
u
/
j.sort > ./ou/
j.sort>./ou/i/KaTeX parse error: Expected group after '_' at position 4: {i}_̲{j}
done
done