文件对象:两个有位置信息的文件,比如polar bear 和blown bear的数据,实例如下:
数据:
ChrB01 84 0.000000 -3.517875 -9.131150 -16.843100 -25.121834 -33.764550 -42.704869 -51.924288
ChrB01 85 0.000000 -3.518447 -9.140762 -16.969798 -25.415831 -34.264817 -43.435084 -52.894483
ChrB01 86 0.000000 -3.518653 -9.144235 -17.019029 -25.538664 -34.493599 -43.804095 -53.439474
ChrB01 87 0.000000 -3.519693 -9.161941 -17.307288 -26.039671 -35.177550 -44.665470 -54.486166
ChrB01 89 0.000000 -3.519315 -9.155483 -17.197048 -25.963084 -35.175892 -44.742408 -54.620859
ChrB01 90 0.000000 -3.519878 -9.165148 -17.374057 -26.461440 -36.079650 -46.100003 -56.462372
ChrB01 91 0.000000 -3.520705 -9.179459 -17.698464 -26.903189 -36.625949 -46.841096 -57.485213
ChrB01 92 0.000000 -3.550725 -9.844786 -18.491352 -27.858983 -37.757775 -48.116431 -58.889869
ChrB01 93 0.000000 -3.550441 -9.835777 -18.361010 -27.555334 -37.260418 -47.449280 -58.057703
ChrB01 94 0.000000 -3.550945 -9.851868 -18.624373 -28.168413 -38.206045 -48.660048 -59.508482
ChrB01 95 0.000000 -3.551117 -9.857413 -18.716636 -28.409118 -38.607796 -49.213285 -60.202906
ChrB01 96 0.000000 -3.551682 -9.875809 -19.145320 -29.118007 -39.602210 -50.561270 -62.009704
ChrB01 97 0.000000 -3.521048 -9.185479 -17.878596 -27.355403 -37.462142 -48.195552 -59.591943
ChrB01 98 0.000000 -3.566567 -10.513228 -19.532988 -29.215875 -39.456631 -50.296648 -61.825081
ChrB01 99 0.000000 -3.566760 -10.525015 -19.708561 -29.545284 -39.886885 -50.744183 -62.171301
ChrB01 100 0.000000 -3.567180 -10.550923 -20.114000 -30.311736 -41.006055 -52.225258 -64.083858
ChrB01 101 0.000000 -3.578718 -11.781507 -21.478019 -32.040562 -43.226269 -55.017674 -67.515234
ChrB01 102 0.000000 -3.577940 -11.630547 -20.772247 -30.741033 -41.316284 -52.423883 -64.051384
ChrB01 103 0.000000 -3.578030 -11.646770 -20.785916 -30.745039 -41.308127 -52.398213 -63.997668
代码:
#!/usr/bin/perl -w use strict; my $pb_file = shift; my $bb_file = shift; my $n1 = 18; my $n2 = 10; open (AA, "gzip -dc $pb_file|") or die "$!"; open (BB, "gzip -dc $bb_file|") or die "$!"; open (CC, ">abfst.txt") or die "$!"; my $pb; my $bb = <BB>; while ($pb = <AA>){ my @pb = split /\t/, $pb; my @bb = split /\t/, $bb; while ($pb[1] != $bb[1]){ if ($pb[1] > $bb[1]){ $bb = <BB>; @bb = split /\t/, $bb; }else{ $pb = <AA>; @pb = split /\t/, $pb; } } my $maxpos1 = 2; my $i; for( $i=3; $i<(2*$n1+3); $i++){ $maxpos1 = $i if ( $pb[$maxpos1] < $pb[$i] ); } my $p1 = ($maxpos1-2) / (2*$n1); my $maxpos2 = 2; my $j; for( $j=3; $j<(2*$n2+3); $j++){ $maxpos2 = $j if ( $bb[$maxpos2] < $bb[$j] ); } my $p2 = ($maxpos2-2) / (2*$n2);
my $p12_2 = ($p1-$p2)*($p1-$p2); my $alpha1 = 2 * $p1 * (1 - $p1); my $alpha2 = 2 * $p2 * (1 - $p2); my $a = $p12_2 - ($n1+$n2)*($n1*$alpha1 + $n2*$alpha2) / 4*$n1*$n2*($n1+$n2-1); my $b = $p12_2 + (4*$n1*$n2-$n1-$n2)*($n1*$alpha1 + $n2*$alpha2) / 4*$n1*$n2*($n1+$n2-1); my $fst; if ($a == 0){ $fst = 0; }else{ $fst = $a / ($a + $b); } print CC "$pb[1]\t$a\t$b\t$fst\t\n"; # print CC "$pb[0]\t$pb[1]\t$a\t$b\t$fst\t\n"; }
输出:
101 0 0 0 102 0 0 0 103 0 0 0 104 0 0 0 105 0 0 0 106 0 0 0 107 0 0 0 108 0 0 0 109 0 0 0 110 -274049.987654321 6772950.01234568 -0.0421686726389265 111 0 0 0 112 0 0 0 113 0 0 0 114 0 0 0 115 0 0 0 116 0 0 0 117 0 0 0 118 0 0 0 119 0 0 0 120 0 0 0 121 -32318.9975 798741.0025 -0.0421686711617838 122 0 0 0
结果分析:待续...........