perl数据提取

#!/usr/local/bin/perl -w
# author by luyi
use strict;
#use LWP::Simple qw(getstore); 


my $obj_file="/home/getdata/dota2_html.txt";
my $info="/tmp/get_dota2_players.info";
my $all_data="/export/remotedata/dota2/wget_dota2_players.txt"; 
my $address='http://www.dota2.com/overview?u=public&appid=570&internal=1';
my $cmd=qx(which wget);
chomp($cmd);
my $curtime=qx(date +%F' '%T);
chomp($curtime);
my $curdate=qx(date +%F);
chomp($curdate);
my $version="20131115";




if (-s $all_data){
               `cp $all_data /export/remotedata/dota2/wget_dota2_players_old.txt`;
}
              
unlink $obj_file if (-f $obj_file);


qx($cmd "$address" -a $info -O $obj_file);
my $status="$?";
print "$cmd $address -a $info -O $obj_file\n";


qx(/usr/bin/dos2unix $obj_file);
open(INFO,">>$info") or die "Can't open $info : $!";
open(DOTA2,"$obj_file") or die "Can't open $obj_file : $!";
open(RST,">>$all_data") or die "Can't open $all_data : $!";


if("$status" ne "0")
{
#       print "$curtime\tPerfectWorldTelecom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
#       print RST "$curtime\tPerfectWorldTelecom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
#       print "$curtime\tPerfectWorldUnicom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
#       print RST "$curtime\tPerfectWorldUnicom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";


#       print "$curtime\tPerfectWorldTelecom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
#       print RST "$curtime\tPerfectWorldTelecom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
#       print "$curtime\tPerfectWorldUnicom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
#       print RST "$curtime\tPerfectWorldUnicom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";


        print "$curtime\tPerfectWorldTelecom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
        print RST "$curtime\tPerfectWorldTelecom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
        print "$curtime\tPerfectWorldUnicom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
        print RST "$curtime\tPerfectWorldUnicom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
        exit;
}


my $old_delim=$/;
$/=undef; 
$_ = <DOTA2>;




#if (/.+Players.+\<th\>PerfectWorldTelecom<\/th\>\n.+\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+PerfectWorldUnicom.+<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+Totals.+\<h2\>Servers.+Game\s+Servers.+\<th\>PerfectWorldTelecom<\/th\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+PerfectWorldUnicom.+<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+Totals.+Proxy\s+Servers.+\<th\>PerfectWorldTelecom<\/th\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+PerfectWorldUnicom.+<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+Totals/s){
#if (/.+Players.+\<th\>PerfectWorldTelecom<\/th\>\n.+\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+PerfectWorldUnicom.+<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+Totals.+\<h2\>Servers.+Game\s+Servers.+\<th\>PerfectWorldTelecom<\/th\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+PerfectWorldUnicom.+<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+Totals.+Proxy\s+Servers.+/s){




if (/.+Players.+\<th\>PerfectWorldTelecom<\/th\>\n.+\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+PerfectWorldUnicom.+<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+Totals.+\<h2\>Servers.+\<th\>PerfectWorldTelecom<\/th\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+PerfectWorldUnicom.+<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>\n\s+\<td\>(\d+)\<\/td\>.+Totals/s){
        #Region Total In Queue In Match In Solo Match In Practice In Tournament In Coop In Team Match Spectating Idle 
        # print "$curtime\tPerfectWorldTelecom\t${version}\t$1\t$2\t$3\t$4\t$5\t$6\t$7\t$8\t$9\t$10\t$21\t$22\t$25\t$26\n";
        #print RST "$curtime\tPerfectWorldTelecom\t${version}\t$1\t$2\t$3\t$4\t$5\t$6\t$7\t$8\t$9\t$10\t$21\t$22\t$25\t$26\n";
        #print "$curtime\tPerfectWorldUnicom\t${version}\t$11\t$12\t$13\t$14\t$15\t$16\t$17\t$18\t$19\t$20\t$23\t$24\t$27\t$28\n";
        #print RST "$curtime\tPerfectWorldUnicom\t${version}\t$11\t$12\t$13\t$14\t$15\t$16\t$17\t$18\t$19\t$20\t$23\t$24\t$27\t$28\n";
        #print "$curtime\tPerfectWorldTelecom\t${version}\t$1\t$2\t$3\t$4\t$5\t$6\t$7\t$8\t$9\t$10\t$21\t$22\n";
        #print RST "$curtime\tPerfectWorldTelecom\t${version}\t$1\t$2\t$3\t$4\t$5\t$6\t$7\t$8\t$9\t$10\t$21\t$22\n";
        #print "$curtime\tPerfectWorldUnicom\t${version}\t$11\t$12\t$13\t$14\t$15\t$16\t$17\t$18\t$19\t$20\t$23\t$24\n";
        #print RST "$curtime\tPerfectWorldUnicom\t${version}\t$11\t$12\t$13\t$14\t$15\t$16\t$17\t$18\t$19\t$20\t$23\t$24\n";
        print "$curtime\tPerfectWorldTelecom\t${version}\t$1\t$2\t$3\t$4\t$5\t$6\t$7\t$8\t$9\t$10\t$21\t$22\t$23\n";
        print RST "$curtime\tPerfectWorldTelecom\t${version}\t$1\t$2\t$3\t$4\t$5\t$6\t$7\t$8\t$9\t$10\t$21\t$22\t$23\n";
        print "$curtime\tPerfectWorldUnicom\t${version}\t$11\t$12\t$13\t$14\t$15\t$16\t$17\t$18\t$19\t$20\t$24\t$25\t$26\n";
        print RST "$curtime\tPerfectWorldUnicom\t${version}\t$11\t$12\t$13\t$14\t$15\t$16\t$17\t$18\t$19\t$20\t$24\t$25\t$26\n";
}else{
        #print "$curtime\tPerfectWorldTelecom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
        #print RST "$curtime\tPerfectWorldTelecom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
        #print "$curtime\tPerfectWorldUnicom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
        #print RST "$curtime\tPerfectWorldUnicom\t${version}\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\t0\n";
        print INFO "$curtime : The dota2 web information format have changed!\n";
        print "$curtime : The dota2 web information format have changed!\n\n";
}
close(DOTA2);
close(INFO);


close(RST);
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
提取TCGA临床数据,可以使用Perl脚本来实现。首先,我们需要获取TCGA临床数据的存储位置和格式。 在Perl脚本中,可以使用以下步骤来提取TCGA临床数据: 1. 需要先下载TCGA临床数据的存储文件,可以从TCGA官方网站或相关数据库中获取。这些文件通常以文本格式(如CSV或TSV)存储。 2. 在Perl脚本中,可以使用文件操作函数来打开并读取所下载的临床数据文件。可以使用open函数打开文件句柄,并使用指定的读取模式将文件内容读取到变量中。 3. 在读取数据文件后,可以使用字符串处理函数和正则表达式来解析数据。根据数据的格式,可以使用split函数或正则表达式来将每行数据分割成字段,并将这些字段存储到适当的变量中。 4. 根据需求,可以使用条件语句、循环和其他Perl控制结构来筛选和处理数据。例如,可以根据某个特定的临床变量或病例特征来过滤数据,并将符合条件的数据存储到新的变量或文件中。 5. 在对数据进行处理和筛选后,可以根据具体需求选择合适的输出方式。可以将处理后的数据输出为文本文件,也可以将其存储到数据库中,或者进行其他进一步的分析。 综上所述,使用Perl脚本可以实现对TCGA临床数据提取和处理。具体的脚本代码会根据数据格式和需求的不同而有所差异,上述步骤仅提供了一个基本的脚本框架,需要根据具体的情况进行相应的调整和改进。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值