以下脚本为读取二进制文件,并进行分析处理,判断有错误的数据进行修改的实例:
##分析指定目录下的日志,提取有用的信息##
##此程序由perl运行##
#use Digest::MD5 qw(md5_hex);
#print "Digest is ", md5_hex("foobarbaz"), "\n";
#use Digest::MD5 qw(md5 md5_hex md5_base64);
#$digest = md5($data);
#$digest = md5_hex($data);
#$digest = md5_base64($data);
# OO style
use Digest::MD5;
#$ctx = Digest::MD5->new;
#$ctx->add($data);
#$ctx->addfile($file_handle);
#$digest = $ctx->digest;
#$digest = $ctx->hexdigest;
#$digest = $ctx->b64digest;
#公共函数
#把整型数转为低位在前的16进制字符串
#使用方法 $strhex = inter2lowhex("9535",4); or $strhex = inter2lowhex(9535,2);
sub inter2lowhex
{
#数值
my $intervalue = $_[0] * 1;
#字节数
my $bytenum = $_[1];
my $hexreturn = "";
my $i=0;
for( $i = 0; $i < $bytenum; $i ++ ){
my $tempvalue = $intervalue & 255;
$intervalue >>= 8;
my $strtemp = sprintf("%02x",$tempvalue);
#printf("$i $strtemp $intervalue hex : $strtemp\n");
$hexreturn .= $strtemp;
}
return $hexreturn;
}
#把低位在前的16进制字符串转为整型
#使用方法 $inter = lowhex2inter("9535",4);
sub lowhex2inter
{
#字符串
my $strhex = sprintf("%s",$_[0]);;
#字节数
my $bytenum = $_[1]/2;
#printf("hex:$strhex num:$bytenum\n");
my $interreturn = 0;
my $i=0;
for( $i = 0; $i < $bytenum; $i ++ ){
$interreturn <<= 8;
my $tempvalue = substr($strhex,($bytenum-$i-1)*2,2);
my $tempinter = 0;
$tempinter += hex($tempvalue);
$interreturn += $tempinter;
#printf("$i $tempvalue $tempinter inter : $interreturn\n");
}
return $interreturn;
}
#生成同一个字符指定数量的字符串
#使用方法 makecharnum("0",4);
sub makecharnum
{
#字符
my $charvalue = $_[0];
#字符数量
my $charnum = $_[1];
my $strreturn = "";
my $i=0;
for( $i = 0; $i < $charnum; $i ++ ){
$strreturn .= $charvalue;
}
return $strreturn;
}
#计算crc32
#使用方法 makecrc32($string,$crcvalue);
sub makecrc32
{
my ($input, $init_value, $polynomial) = @_;
$init_value = 0 unless (defined $init_value);
$polynomial = 0xedb88320 unless (defined $polynomial);
my @lookup_table;
for (my $i=0; $i<256; $i++) {
my $x = $i;
for (my $j=0; $j<8; $j++) {
if ($x & 1) {
$x = ($x >> 1) ^ $polynomial;
} else {
$x = $x >> 1;
}
}
push @lookup_table, $x;
}
my $crc = $init_value ^ 0xffffffff;
foreach my $x (unpack ('C*', $input)) {
$crc = (($crc >> 8) & 0xffffff) ^ $lookup_table[ ($crc ^ $x) & 0xff ];
}
$crc = $crc ^ 0xffffffff;
return $crc;
}
#调用分析主函数
&analysemain();
#定义分析的主函数
#函数调用格式: &analysemain();
sub analysemain
{
#指定一个原始数据目录
my $dirname = "./org";
my $dirdest = "./dest";
my $logdir;
#---------------需要修改的部分----------------#
opendir($logdir,$dirname ) || die "Error in opening dir $dirname\n";
my $strlog;
#日志
my $logfilepath = "./result.log";
my $filelog;
open($filelog,">$logfilepath")|| die "Error in opening outputfile >$logfilepath\n";
#打开写入文件 >代表为重写方式打开文件
my $filename;
while( ($filename = readdir($logdir)))
{
#循环输出该目录下的文件。
#print("$filename\n");
#如果文件是.txt后缀则对文本进行处理
if ($filename =~ m/S.+/)
{
my $infilepath = "$dirname/$filename";
my $fileinput;
#"filepath" 以只读模式打开文件.
#">filepath" 以写模式打开文件.
#">>filepath" 以追加模式打开文件,写和追加的区别在于写模式将原文件覆盖,而追加模式则在文件末尾处添加内容.
#"+>filepath" 以读和写方式打开文件.
#"+>>filepath" 以读和追加方式打开文件.
open ($fileinput, "$infilepath")|| die "can not open the file $infilepath\n";
binmode($fileinput);
my $outfilepath = "$dirdest/$filename";
my $fileoutput;
open ($fileoutput, "+>$outfilepath")|| die "can not open the file $outfilepath\n";
binmode($fileoutput);
$strlog = "正在处理文件:$infilepath.........\n";print($strlog);print($filelog $strlog);
#打开文件并读入处理的文件信息
#定义到文件头
seek($fileinput,0,0);
seek($fileoutput,0,0);
#文件头处理 1+7+1+4+4
#文件类型 1
my $datalen = 1; #读取的数据长度
my $databuffer=""; #读取数据的缓存
my $datahex=""; #数据转为16进制字符串
my $databit=""; #数据为数据流
my $datainter = 0; #数据转为整数
my $salenum=0; #交易文件中的交易记录数
my $datatemp = ""; #临时使用的数
my $devid = ""; #交易的设备编号
my $errnum=0; #交易异常的数量
read($fileinput,$databuffer,$datalen);
$datahex = unpack("H*", $databuffer);
$strlog = "文件类型:$datahex\n";print($strlog);print($filelog $strlog);
print($fileoutput $databuffer);
#时间 7
$datalen = 7;
$databuffer="";
$datahex="";
read($fileinput,$databuffer,$datalen);
$datahex = unpack("H*", $databuffer);
$strlog = "时间:$datahex\n";print($strlog);print($filelog $strlog);
print($fileoutput $databuffer);
#文件版本 1
$datalen = 1;
$databuffer="";
$datahex="";
read($fileinput,$databuffer,$datalen);
$datahex = unpack("H*", $databuffer);
$strlog = "文件版本:$datahex\n";print($strlog);print($filelog $strlog);
print($fileoutput $databuffer);
#节点标识 4
$datalen = 4;
$databuffer="";
$datahex="";
read($fileinput,$databuffer,$datalen);
$datahex = unpack("H*", $databuffer);
$strlog = "节点标识:$datahex\n";print($strlog);print($filelog $strlog);
print($fileoutput $databuffer);
#文件序列 4
$datalen = 4;
$databuffer="";
$datahex="";
read($fileinput,$databuffer,$datalen);
$datahex = unpack("H*", $databuffer);
$datainter = lowhex2inter($datahex,8);
$strlog = "文件序列:$datainter\n";print($strlog);print($filelog $strlog);
print($fileoutput $databuffer);
#交易记录数
$datalen = 4;
$databuffer="";
$datahex="";
read($fileinput,$databuffer,$datalen);
$datahex = unpack("H*", $databuffer);
$datainter = lowhex2inter($datahex,8);
$salenum = $datainter;
$strlog = "记录数:$datainter\n";print($strlog);print($filelog $strlog);
print($fileoutput $databuffer);
#读取每一条交易记录
$datalen = 110;
my $iserr = 0;
my $tellpos = 0;
for (my $i=0; $i<$salenum; $i++) {
$strlog = ">>>记录$i ";print($strlog);print($filelog $strlog);
$databuffer="";
$datahex="";
$iserr = 0;
if(!read($fileinput,$databuffer,$datalen))
{
last;#break;
}
#交易时间
$datatemp = substr($databuffer,0,4);
$datahex = unpack("H*", $datatemp);
$strlog = " 时间:$datahex";print($strlog);print($filelog $strlog);
#票卡逻辑号
$datatemp = substr($databuffer,8,8);
$datahex = unpack("H*", $datatemp);
$strlog = " 票卡编号:$datahex";print($strlog);print($filelog $strlog);
#支付方式
$datatemp = substr($databuffer,59,1);
$datahex = unpack("H*", $datatemp);
$strlog = " 支付方式:$datahex";print($strlog);print($filelog $strlog);
#当前车站
$datatemp = substr($databuffer,74,4);
$datahex = unpack("H*", $datatemp);
$devid = $datahex;
$strlog = " 节点编号:$datahex";print($strlog);print($filelog $strlog);
$tellpos = tell($fileoutput);
$strlog = " 文件位置:$tellpos";print($strlog);print($filelog $strlog);
if($devid eq "02370805")
{
$iserr = 1;
$strlog = " --5"; print($strlog);print($filelog $strlog);
$errnum ++;
}
elsif($devid eq "02370806")
{
$iserr = 1;
$strlog = " --6"; print($strlog);print($filelog $strlog);
$errnum ++;
}
elsif($devid eq "02370807")
{
$iserr = 1;
$strlog = " --7";print($strlog);print($filelog $strlog);
$errnum ++;
}
$strlog = "\n"; print($strlog);print($filelog $strlog);
#写入目标文件
print($fileoutput $databuffer);
if($iserr == 1)
{
#改写支付方式
#seek($fileoutput,$tellpos+59,0);
seek($fileoutput,$tellpos+59,0);
$datatemp = inter2lowhex(1,1);
$strlog = "change:$datatemp\n"; print($strlog);print($filelog $strlog);
$databit = pack("H*", $datatemp);
print($fileoutput $databit);
seek($fileoutput,0,2);
}
}
$strlog = "数据异常数量:$errnum";print($strlog);print($filelog $strlog);
$strlog = "\n"; print($strlog);print($filelog $strlog);
#计算文件的md5
seek($fileoutput,0,0);#定位到文件头
my $ctx = Digest::MD5->new;
$ctx->addfile($fileoutput);
$databit = $ctx->digest;
$datahex = $ctx->hexdigest;
$strlog = "计算文件的md5:$datahex";print($strlog);print($filelog $strlog);
seek($fileoutput,0,2);
print($fileoutput $databit);
$strlog = "\n\n"; print($strlog);print($filelog $strlog);
close($fileinput);
close($fileoutput);
}
}
closedir($logdir);
}