$ cat <test.fasta
> >SOX2
> ACGAGGGACGCATCGGACGACTGCAGGACTGTC
> ACGAGGGACGCATCGGACGACTGCAGGACTGTC
> ACGAGGGACGCATCGGACGACTGCAGGAC
> >POU5F1
> CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT
> CGGAAGGTAGTCGTCAGTGCAGCGAGTCC
> >NANOG
> ACGAGGGACGCATCGGACGACTGCAGGACTGTC
> ACGAGGGACGCATCGGACGACTGCAGG
> ACGAGGGACGCATCGGACGACTGCAGGACTGTC
> ACGAGGGACGCATCGGACGACTGCAGGACTGT
> END
# 给>号开头的行的行尾加个TAB键,以便隔开名字和序列
# TAB键不可见,直接看看不大
# ()表示记录匹配的内容,1则表示()中记录的匹配的内容
# 后面我们专门讲sed
$ sed 's/^(>.*)/1/' test.fasta
>SOX2
ACGAGGGACGCATCGGACGACTGCAGGACTGTC
ACGAGGGACGCATCGGACGACTGCAGGACTGTC
ACGAGGGACGCATCGGACGACTGCAGGAC
>POU5F1
CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT
CGGAAGGTAGTCGTCAGTGCAGCGAGTCC
>NANOG
ACGAGGGACGCATCGGACGACTGCAGGACTGTC
ACGAGGGACGCATCGGACGACTGCAGG
ACGAGGGACGCATCGGACGACTGCAGGACTGTC
ACGAGGGACGCATCGGACGACTGCAGGACTGT
#使用cat -A 可以显示文件中所有的符号
# ^I 表示tab键
# $表示行尾
$ sed 's/^(>.*)/1/' test.fasta | cat -A
>SOX2^I$
ACGAGGGACGCATCGGACGACTGCAGGACTGTC$
ACGAGGGACGCATCGGACGACTGCAGGACTGTC$
ACGAGGGACGCATCGGACGACTGCAGGAC$
>POU5F1^I$
CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT$
CGGAAGGTAGTCGTCAGTGCAGCGAGTCC$
>NANOG^I$
ACGAGGGACGCATCGGACGACTGCAGGACTGTC$
ACGAGGGACGCATCGGACGACTGCAGG$
ACGAGGGACGCATCGGACGACTGCAGGACTGTC$
ACGAGGGACGCATCGGACGACTGCAGGACTGT$
# 把所有的换行符替换为空格
# 主意第二个参数,引号内为空格
$ sed 's/^(>.*)/1/' test.fasta | tr '
' ' '
>SOX2 ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGAC >POU5F1 CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT CGGAAGGTAGTCGTCAGTGCAGCGAGTCC >NANOG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGT
# 把最后一个空格替换为换行符
$ sed 's/^(>.*)/1/' test.fasta | tr '
' ' ' | sed -e 's/ $/
/'
>SOX2 ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGAC >POU5F1 CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT CGGAAGGTAGTCGTCAGTGCAGCGAGTCC >NANOG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGT
# 把 ' >'替换为换行符 注意被替换的是 空格+大于号
# 当连用多个替换命令时,使用-e 隔开
$ sed 's/^(>.*)/1/' test.fasta | tr '
' ' ' | sed -e 's/ $/
/' -e 's/ >/
>/g'
>SOX2 ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGAC
>POU5F1 CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT CGGAAGGTAGTCGTCAGTGCAGCGAGTCC
>NANOG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGT
# 把所有的空格替换掉
$ sed 's/^(>.*)/1/' test.fasta | tr '
' ' ' | sed -e 's/ $/
/' -e 's/ >/
>/g' -e 's/ //g'
>SOX2 ACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGAC
>POU5F1 CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGTCGGAAGGTAGTCGTCAGTGCAGCGAGTCC
>NANOG ACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACTGT
# 把TAB键转换为换行符
$ sed 's/^(>.*)/1/' test.fasta | tr '
' ' ' | sed -e 's/ $/
/' -e 's/ >/
>/g' -e 's/ //g' -e 's//
/g'
>SOX2
ACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGAC
>POU5F1
CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGTCGGAAGGTAGTCGTCAGTGCAGCGAGTCC
>NANOG
ACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACTGT