grep搜索特定词
查看含有如下ENSG的行
grep ENSG00000282253.1 Homo_sapiens.GRCh38.cdna.all.fa
head -7 Homo_sapiens.GRCh38.cdna.all.fa
awk提取特定列
第四行:
awk '{print $4}' Homo_sapiens.GRCh38.cdna.all.fa
第三行
awk '{print $3}' Homo_sapiens.GRCh38.cdna.all.fa
去掉fastq序列保存到ridfa.txt
grep ">" Homo_sapiens.GRCh38.cdna.all.fa > ridfq.txt
只要前四行一次完成
grep '>' Homo_sapiens.GRCh38.cdna.all.fa |awk 'NF = 4' > ridfq.txt
去除第二列
cut -d' ' -f1,3- ridfq.txt > m2.txt
去除第二行不需要的前缀
sed -i 's/chromosome:GRCh38://g' m2.txt
awk以冒号和空格分隔
awk -F ':| ' '{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10}' m2.txt
#重新输入m2.txt文件,先不要去后缀???有这种行的出现。
awk -F ' ' '{print $2}' m2.txt | awk -F ':' '{print $(NF-2),$(NF-1)}'
#以空格为分隔提取第二列作为输入
#以:为分隔符打印倒数第3和倒数第2列,即start&end
grep -c '' se.txt
grep -c '' m2.txt
##行数相同
awk '{print $1,$3}' m2.txt > col13.txt
sed -i 's/gene://g' col13.t
sed -i 's/>//g' col13.txt
paste col13.txt se.txt > merged.txt
awk -F ' ' '{print $1}' col13.txt | awk -F '.' '{print $1}' > dropdot.txt
paste dropdot.txt col13.txt > merged.txt
paste merged.txt se.txt > fina.txt
sed -i 's/^\([^.]*\)\.[^ ]* /\1 /' Homo_sapiens.GRCh38.cdna.all.deldot.fa
去掉序列取第一列
grep ">" Homo_sapiens.GRCh38.cdna.dotdel.all.fa | awk '{print $1}' > Homo_sapiens.GRCh38.cdna.first.fa
去掉>
sed -i 's/^>//' Homo_sapiens.GRCh38.cdna.first.fa