通过解析job history日志和job config文件可以获得Job的具体运行情况,包括job的配置信息、运行状态及task、Attempt的运行状态和相应的counter。脚本如下:
parseJobHistory.pl
use File::Basename;
use XML::Simple;
use Data::Dumper;
if (!defined($ARGV[0]) or $ARGV[0] eq '')
{
print "useage: perl parseJobHistory.pl path-of-job-history-file\n";
exit;
}
my $filename = $ARGV[0];
my $basename = basename($filename),"\n";
my $dirname = dirname($filename),"\n";
my ($trackerhostname,$trackerstartime,$jobid_part1,$jobid_part2,$jobid_part3) = split(/_/,$basename);
my $configfile = $dirname.'/'.$trackerhostname.'_'.$trackerstartime.'_'.$jobid_part1.'_'.$jobid_part2.'_'.$jobid_part3.'_conf.xml';
my %parseBuffer = {};
my %JobStatus = {};
my %TaskStatus = {};
my %MapAttemptStatus = {};
my %ReduceAttemptStatus = {};
#my %JobConfig = {};
open FILE, "<$filename" or print("can't open $filename\n");
while(<FILE>)
{
chomp;
my $spacepos = index($_,' ');
my $type = substr($_,0,$spacepos);
my $info = substr($_,$spacepos+1,length $_);
while($info =~ s/(\w+?)="(.*?)" //)
{
my $key = $1;
my $value = $2;
if($key =~ /^.*?_?COUNTERS$/)
{
while($value =~ s/\[\((.*?)\)\((.*?)\)\((\d+)\)\]//)
{
my $ckey = $1;
my $cvalue = $3;
$parseBuffer{$key}{$ckey} = $cvalue;
}
}else
{
$parseBuffer{$key} = $value;
}
}
if(lc($type) eq lc('Job') && exists $parseBuffer{'JOBID'} && $parseBuffer{'JOBID'}=~/^job_/)
{
my $jobid = $parseBuffer{'JOBID'};
foreach my $key ( keys %parseBuffer )
{
$JobStatus{$jobid}{$key} = $parseBuffer{$key};
}
}elsif(lc($type) eq lc('Task') && exists $parseBuffer{'TASKID'} && $parseBuffer{'TASKID'}=~/^task_/)
{
my $taskid = $parseBuffer{'TASKID'};
foreach my $key ( keys %parseBuffer )
{
$TaskStatus{$taskid}{$key} = $parseBuffer{$key};
}
}elsif(lc($type) eq lc('MapAttempt') && exists $parseBuffer{'TASKID'} && exists $parseBuffer{'TASK_ATTEMPT_ID'} && $parseBuffer{'TASKID'}=~/^task_/)
{
my $taskattempid = $parseBuffer{'TASKID'}.':'.$parseBuffer{'TASK_ATTEMPT_ID'};
foreach my $key ( keys %parseBuffer )
{
$MapAttemptStatus{$taskattempid}{$key} = $parseBuffer{$key};
}
}elsif(lc($type) eq lc('ReduceAttempt') && exists $parseBuffer{'TASKID'} && exists $parseBuffer{'TASK_ATTEMPT_ID'} && $parseBuffer{'TASKID'}=~/^task_/)
{
my $taskattempid = $parseBuffer{'TASKID'}.':'.$parseBuffer{'TASK_ATTEMPT_ID'};
foreach my $key ( keys %parseBuffer )
{
$ReduceAttemptStatus{$taskattempid}{$key} = $parseBuffer{$key};
}
}
%parseBuffer = {};
}
close(FILE);
my $JobConfig = XMLin($configfile);
print Dumper %JobStatus;
print Dumper %TaskStatus;
print Dumper %MapAttemptStatus;
print Dumper %ReduceAttemptStatus;
print Dumper %$JobConfig;