1. 数据解析为 hash #方法一 echo "aip=116.234.49.96<|>ab=<|>auid=113915<|>asid=ff80808125d8b20801273e7ca0530415<|>wv=1.0<|>" |perl -ne ' BEGIN{ use Data::Dumper; } chomp; map{ $h{$1}=$2 if /(.*?)=(.*)/ ; } split /</|>/ ; print Dumper(/%h); ' #方法二 cat /data/tongji/iphone_app/*/app*/2010/03/09/* |head -n 1 |perl -ne ' BEGIN{ use Data::Dumper; } chomp; #注意 第一个 ( ?) 后面 那个 ( ) %h=/(?:</|>)?(.*?)=(.*?)(?:</|>|$)/g; print Dumper(/%h); ' 2 .得到时间差 : # perl -e ' # BEGIN{ require("/data/shell/gmodel/inc_model/utils.pl"); } # print &diffTime("2009-12-04 16:58:38","2009-12-04 16:58:00"); # ' sub diffTime{ use Time::Local; my($st1,$st2) = @_; my $t1=timelocal($6,$5,$4,$3,$2-1,$1-1900) if $st1=~/(/d{4})-(/d{2})-(/d{2})/s+(/d{2}):(/d{2}):(/d{2})/; my $t2=timelocal($6,$5,$4,$3,$2-1,$1-1900) if $st2=~/(/d{4})-(/d{2})-(/d{2})/s+(/d{2}):(/d{2}):(/d{2})/; return $t1-$t2; } 3. 循环天 统计 人数 点击率 for((i=0;i<999;i++));do yd=$(date -d "2010-03-01 $i day" +%Y/%m/%d); yy=$(date -d "2010-03-01 $i day" +%Y-%m-%d); if [ "$yy" == "2010-04-02" ];then break fi; cat /data/tongji/iphone_app/itools/*/$yd/ip_ringer_email.log* |perl -ne ' map{ $m{$1}=$2 if /(.*?)=(.*)/ ; } split /</|>/ ; $pv++; $h{$m{auid}}++; END{ my $uv = scalar keys %h; print "'$yy'/t$uv/t$pv/n"; } ' done #map grep sort 使用 #行 命令 使用 perl -e 'print map{ $_**2 } 1..3' perl -e 'print grep { $_%2 } 1..9' perl -e 'print sort { $b<=>$a } 1..9' # 倒序 查看 使用进程数 最多用户 ps -ef |perl -ane ' $ha{$F[0]}++;END{ foreach $k ( sort{$ha{$b} <=> $ha{$a} } keys %ha){ print "$k = $ha{$k} /n"; } }' #求 两项 交集 cat BuyMusic.20090525| perl -ne 'BEGIN{ $p1="600902000005416300"; $p2="600902000006211983"; $p_col=30; $mob_col=0; }END{ my @inter = grep {$a{$_}} keys %b; # 求交集 #print $p1,"=",join(",",keys %a),""n"; #print $p2,"=",join(",",keys %b),""n"; print "产品 $p1:",scalar keys %a," "n"; print "产品 $p2:",scalar keys %b," "n"; print "交集:",scalar @inter," "n"; } chomp; @lis=split //|<>/|/ ; if( $lis[ $p_col] eq $p1 ){ $a{$lis[$mob_col]}++; } if( $lis[$p_col] eq $p2 ){ $b{$lis[$mob_col]}++; } ' #关键字 Top 10 ,输出源文本数据 : perl -e ' my $num=10; # top 10 open(MYFILE, "<$ARGV[0]"); open(MYFILE2, "<$ARGV[0]"); # 关键字列数 while(<MYFILE>){@lis=split //|<>/|/;$fck{$lis[1]}++ } foreach $k (sort { $fck{$b} <=> $fck{$a} } keys %fck){ if(++$row>$num){last; } $arr[@arr] = $k; } while(<MYFILE2>){@lis=split //|<>/|/; if(grep { $arr[$_] eq $lis[1] } 0..$#arr){ # print "$fck{$lis[1]}:$_"; #带 关键字出现次数输出 print ; } } ' qdSearch.log