小程序-根据位置从gff中提取gene
1 需要的文件格式
1: 染色体 位置
2 程序
1: use strict; 2: use warnings; 3: 4: my (@information,$chr,%hash,$key1,$key2,); 5: 6: #构建gff的hash 7: 8: my $in_in = "saccharomyces_cerevisiae_R64-1-1_20110208.gff"; 9: open my $in, '<', $in_in or die "cannot open\n"; 10: while(<$in>) 11: { 12: chomp; 13: next if /^#/; 14: @information=split/\s+/,$_; 15: if ($information[2] eq "gene") 16: { 17: $information[0]=~/^chr(.+)/; 18: $chr=$1; 19: $information[8]=~/^ID=(.+?);/; 20: $hash{$chr}{$information[3]}{$information[4]} = $1; 21: } 22: } 23: close $in; 24: 25: 26: my $out_out = "pos_with_gene.txt"; 27: open my $out, '>', $out_out or die "failed open$!\n"; 28: #根据染色体和位置信息,遍历寻找 29: 30: my $in1_in = "pos_to_find_gen.txt"; 31: open my $in1, '<', $in1_in or die "cannot open\n"; 32: while(<$in1>) 33: { 34: chomp; 35: next if /^\s+$/; 36: @information=split/\s+/,$_; 37: foreach $key1 (sort keys %{$hash{$information[0]}}) 38: { 39: foreach $key2 (sort keys %{$hash{$information[0]}{$key1}}) 40: { 41: if ($information[1]>=$key1 and $information[1]<=$key2) 42: { 43: print $out "@information[0,1] $hash{$information[0]}{$key1}{$key2}\n"; 44: last; 45: } 46: else 47: { 48: next; 49: } 50: } 51: } 52: } 53: close $in1; 54: close $out;
Date: 2013-06-29 22:50:30 CST
HTML generated by org-mode 6.33x in emacs 23