linux 更改文件排序方式,Linux文件排序和FASTA文件操作

$ cat <test.fasta

> >SOX2

> ACGAGGGACGCATCGGACGACTGCAGGACTGTC

> ACGAGGGACGCATCGGACGACTGCAGGACTGTC

> ACGAGGGACGCATCGGACGACTGCAGGAC

> >POU5F1

> CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT

> CGGAAGGTAGTCGTCAGTGCAGCGAGTCC

> >NANOG

> ACGAGGGACGCATCGGACGACTGCAGGACTGTC

> ACGAGGGACGCATCGGACGACTGCAGG

> ACGAGGGACGCATCGGACGACTGCAGGACTGTC

> ACGAGGGACGCATCGGACGACTGCAGGACTGT

> END

# 给>号开头的行的行尾加个TAB键,以便隔开名字和序列

# TAB键不可见,直接看看不大

# ()表示记录匹配的内容,1则表示()中记录的匹配的内容

# 后面我们专门讲sed

$ sed 's/^(>.*)/1/' test.fasta

>SOX2

ACGAGGGACGCATCGGACGACTGCAGGACTGTC

ACGAGGGACGCATCGGACGACTGCAGGACTGTC

ACGAGGGACGCATCGGACGACTGCAGGAC

>POU5F1

CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT

CGGAAGGTAGTCGTCAGTGCAGCGAGTCC

>NANOG

ACGAGGGACGCATCGGACGACTGCAGGACTGTC

ACGAGGGACGCATCGGACGACTGCAGG

ACGAGGGACGCATCGGACGACTGCAGGACTGTC

ACGAGGGACGCATCGGACGACTGCAGGACTGT

#使用cat -A 可以显示文件中所有的符号

# ^I 表示tab键

# $表示行尾

$ sed 's/^(>.*)/1/' test.fasta | cat -A

>SOX2^I$

ACGAGGGACGCATCGGACGACTGCAGGACTGTC$

ACGAGGGACGCATCGGACGACTGCAGGACTGTC$

ACGAGGGACGCATCGGACGACTGCAGGAC$

>POU5F1^I$

CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT$

CGGAAGGTAGTCGTCAGTGCAGCGAGTCC$

>NANOG^I$

ACGAGGGACGCATCGGACGACTGCAGGACTGTC$

ACGAGGGACGCATCGGACGACTGCAGG$

ACGAGGGACGCATCGGACGACTGCAGGACTGTC$

ACGAGGGACGCATCGGACGACTGCAGGACTGT$

# 把所有的换行符替换为空格

# 主意第二个参数,引号内为空格

$ sed 's/^(>.*)/1/' test.fasta | tr '

' ' '

>SOX2 ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGAC >POU5F1 CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT CGGAAGGTAGTCGTCAGTGCAGCGAGTCC >NANOG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGT

# 把最后一个空格替换为换行符

$ sed 's/^(>.*)/1/' test.fasta | tr '

' ' ' | sed -e 's/ $/

/'

>SOX2 ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGAC >POU5F1 CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT CGGAAGGTAGTCGTCAGTGCAGCGAGTCC >NANOG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGT

# 把 ' >'替换为换行符 注意被替换的是 空格+大于号

# 当连用多个替换命令时,使用-e 隔开

$ sed 's/^(>.*)/1/' test.fasta | tr '

' ' ' | sed -e 's/ $/

/' -e 's/ >/

>/g'

>SOX2 ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGAC

>POU5F1 CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGT CGGAAGGTAGTCGTCAGTGCAGCGAGTCC

>NANOG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGG ACGAGGGACGCATCGGACGACTGCAGGACTGTC ACGAGGGACGCATCGGACGACTGCAGGACTGT

# 把所有的空格替换掉

$ sed 's/^(>.*)/1/' test.fasta | tr '

' ' ' | sed -e 's/ $/

/' -e 's/ >/

>/g' -e 's/ //g'

>SOX2 ACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGAC

>POU5F1 CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGTCGGAAGGTAGTCGTCAGTGCAGCGAGTCC

>NANOG ACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACTGT

# 把TAB键转换为换行符

$ sed 's/^(>.*)/1/' test.fasta | tr '

' ' ' | sed -e 's/ $/

/' -e 's/ >/

>/g' -e 's/ //g' -e 's//

/g'

>SOX2

ACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGAC

>POU5F1

CGGAAGGTAGTCGTCAGTGCAGCGAGTCCGTCGGAAGGTAGTCGTCAGTGCAGCGAGTCC

>NANOG

ACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACGAGGGACGCATCGGACGACTGCAGGACTGTCACGAGGGACGCATCGGACGACTGCAGGACTGT

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值