Compare array

 前段时间写的upgrade,现在碰到了问题,在处理key.pl的环节上,大数据量时候效率很低,以前设计的时候没有考虑到在这个环节上能有大数据量,所以写的算法是很简单的顺序遍历,赋值也是直接赋值.结果2200条record就要花一小时才跑完,my god.
 接到user的反映后,知道这个问题得重写.趁着五一假期研究一番,现采用二叉树算法,采用引用替代直接赋值,将循环用子函数独立出来,尽量减少遍历的次数.优化后2200同样的数据,只要10秒就OK了.
   检查出一个bug:split在拆分字符串的时候,对结尾是空的段直接忽略掉;采取方法:字符串后面先加上一个值,split后再去掉这个值.如
[@more@]
my $a = "1|2|||aa|";
$a = $a."|1";
my @a = split /|/,$a;
@a = @a[ 0..($#a - 1) ];
下面是原代码:
#!/usr/local/bin/perl
#
# System :
# Program ID :
# Description :
#
# Create By :
# Creation Date : 11-June-2008
use warnings;
use strict;
use File::Spec;
use POSIX;
my $only_before = $ARGV[0];
my $only_after = $ARGV[1];
my $diff_file = $ARGV[2];
my $key_file = $ARGV[3];
my $compare_file = $ARGV[4];
my $table_name = $ARGV[5];
my $temp_path = $ARGV[6];
my $only_be_file = $ARGV[7];
my $time=strftime("%Y%m%d%H%M",localtime);
print "Start running at $timen";
my (@key,@compare,%compare);
my $split_tag;
open(my $fh,"$key_file") || die "Can't open $key_file:$!n";
while(){
chomp;
if(/^$table_name/i){
my $id = (split /:/)[1];
$split_tag = (split /:/)[2];
my $key = (split /:/)[3];
if( (defined $split_tag) && ($split_tag ne "N") ){
@{$key[$id]} = split /,/, $key;
}
elsif( (defined $split_tag) && ($split_tag eq "N") ){
$id=999999;
@{$key[$id]} = split /,/, $key;
}
}
}
close($fh);
open($fh,"$compare_file") || die "Can't open $compare_file:$!n";
while(){
chomp;
if(/^$table_name/i){
@compare = split /,/,(split /:/)[2];

}
}
close($fh);
#array to hash
map {$compare{uc($compare[$_])} = $_} 0..$#compare;
open($fh,"$only_after") || die "Can't open $only_after:$!n";
my @only_after=;
close($fh);
open($fh,"$only_before") || die "Can't open $only_before:$!n";
my @only_before=;
close($fh);
my $split_col = $split_tag - 1;
my $before = key(@only_before,@key,%compare,$split_col);
my $after = key(@only_after,@key,%compare,$split_col);
my ($before2,$after2,$diff) = diff($before,$after);
$diff = col(@compare,$diff);
unlink $diff_file;
unlink $only_be_file;
open($fh,">>$diff_file") || die "Can't open $diff_file:$!n";
open(my $fh2,">>$only_be_file") || die "Can't open $only_be_file:$!n";
if( @{$before2} ){
my $total = $#{$before2} + 1;
print $fh "===Before upgrade===n" ;
print $fh2 "===Before upgrade===n" ;
print $fh "Total : $totaln";
print $fh2 "Total : $totaln";
for my $a (0..$#{$before2}){
for my $b ( 1..$#{$before2->[$a]} ){
print $fh "$before2->[$a][$b]|";
print $fh2 "$before2->[$a][$b]|";
}
print $fh "n";
print $fh2 "n";
}
print $fh "n";
print $fh2 "n";
}
close($fh2);
if( @{$after2} ){
my $total = $#{$after2} + 1;
print $fh "===After upgrade===n" ;
print $fh "Total : $totaln";
for my $a (0..$#{$after2}){
for my $b ( 1..$#{$after2->[$a]} ){
print $fh "$after2->[$a][$b]|";
}
print $fh "n";
}
print $fh "n";
}
if( @{$diff} ){
my $total = $#{$diff} + 1;
print $fh "===difference===n" ;
print $fh "Total : $totaln";
for my $a (0..$#{$diff}){
print $fh $a + 1,"n";
print "[D] ",$a + 1,"n";
for my $b ( 0..$#{$diff->[$a]} ){
print $fh "$diff->[$a][$b]|";
}
print $fh "n";
}
print $fh "n";
}
close($fh);
sub diff{
my $before = shift;
my $after = shift;

my @diff;
my @before = @$before;
my @after = @$after;
my $tag = 0;
for(my $a = 0; $a <= $#before;$a++){
for(my $b = 0;$b <= $#after;$b++){
next if $before[$a][0] ne $after[$b][0];
for my $element ( 1..$#{$before[$b]} ){
if( $before[$a][$element] ne $after[$b][$element] ){
$before[$a][$element] = $before[$a][$element]."|1";
$after[$b][$element] = $after[$b][$element]."|1";
my @temp1 = split /|/,$before[$a][$element];
my @temp2 = split /|/,$after[$b][$element];
for(my $c = 0;$c <= $#temp1;$c++){
$temp1[$c] = "diff{$temp1[$c],$temp2[$c]}" if $temp1[$c] ne $temp2[$c];
}
push @{$diff[$tag]},(join "|",@temp1[0..( $#temp1 - 1) ]);
next;
}
push @{$diff[$tag]},$before[$a][$element];
}
$tag++;
splice @before,$a,1;
splice @after,$b,1;
}
}
for my $cc (0..$#diff){
print "[DDE] ",$cc+1,"n";
for my $dd (0..$#{$diff[$cc]}){
print "$diff[$cc][$dd]";
}
print "n";
}
return(@before,@after,@diff);
}

sub key
{
my $file = shift;
my $key = shift;
my $compar = shift;
my $split_col = shift;

my @change;

for my $num ( 0..$#{$file} ){
my $line = $file->[$num];
next if $line =~ /^$/;
chomp $line;

$line = $line."|1";
my @temp = split /|/,$line;
@temp = @temp[0..($#temp - 1)];

my $id;
if( (defined $split_tag) && ($split_tag ne "N") ){
$id = $temp[$split_col];
}else{
$id = 999999;
}
my $value = "";
for my $key (@{$key->[$id]}){
$key = uc($key);
my $col = $compar->{$key};
my $be_col = $temp[$col];
$value = join "|",$value,$be_col;
}
($change[$num][0] = $value) =~ s/^|//;

my ($i,$j,$k);
for($i = 4,$j = 0,$k = 1;;$i = $i + 5,$k ++){
if($i > $#temp){
$i = $#temp;
$change[$num][$k] = join "|",@temp[$j..$i];
last;
}
$change[$num][$k] = join "|",@temp[$j..$i];
$j = $i + 1;
}
}
return (@change);
}

sub col
{
my $compare = shift;
my $diff = shift;
my @compa;

my ($i,$j,$k);
for($i = 4,$j = 0,$k = 0;;$i = $i + 5,$k ++){
if($i > $#{$compare}){
$i = $#{$compare};
$compa[$k] = join "|",@{$compare}[$j..$i];
last;
}
$compa[$k] = join "|",@{$compare}[$j..$i];
$j = $i + 1;
}
for my $line ( 0..$#{$diff} ){
for my $element ( 0..$#{$diff->[$line]} ){
if( $diff->[$line][$element] =~ /diff{/i ){
my $col_name = "";
my @temp1 = split /|/,$diff->[$line][$element];
my @temp2 = split /|/,$compa[$element];
for my $col ( 0..$#temp1){
$diff->[$line][$element] =~ s|diff{|$temp2[$col]{|i if $temp1[$col] =~ /diff{/i;
};
$diff->[$line][$element] =~ s/diff{/$col_name{/ig;
}
}
}
return $diff;
}

来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/640706/viewspace-1043926/,如需转载,请注明出处,否则将追究法律责任。

转载于:http://blog.itpub.net/640706/viewspace-1043926/

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值