这里是解析日志用的两个awk脚本,
list_pv.awk
list_click.awk脚本
list_pv.awk
BEGIN{
FS="\t";
OFS="\t";
}
{
# date pid cid sid posid aid
split($6,aids,",");
#print $1
for(i in aids){
key=$2"_"$3"_"$4"_"$5"_"aids[i];
#print key;
if(key in pv){
pv[key]++;
}else{
pv[key]=1;
pv_date[key]=substr($1,1,10);
pv_hour[key]=substr($1,12,2);
pv_pid[key]=$2;
pv_cid[key]=$3;
pv_sid[key]=$4;
pv_posid[key]=$5;
pv_aid[key]=aids[i];
}
}
}
END{
#date,hour,pid,cid,bid,sid,posid,aid,data
for(key in pv){
print pv_date[key],pv_hour[key],pv_pid[key],pv_cid[key],pv_sid[key],pv_posid[key],pv_aid[key],pv[key];
}
}
list_click.awk脚本
BEGIN{
FS="\t";
OFS="\t";
}
{
# date pid cid sid posid aid spid type
key=$2"_"$3"_"$4"_"$5"_"$6"_"$7"_"$8;
if(key in cl){
cl[key]++;
}else{
cl[key]=1;
cl_date[key]=substr($1,1,10);
cl_hour[key]=substr($1,12,2);
cl_pid[key]=$2;
cl_cid[key]=$3;
#cl_bid[key]=$4;
cl_sid[key]=$4;
cl_posid[key]=$5;
cl_aid[key]=$6;
cl_spid[key]=$7;
cl_type[key]=$8;
}
}
END{
#date,hour,pid,cid,bid,sid,posid,aid,spid,type,data
for(key in cl){
print cl_date[key],cl_hour[key],cl_pid[key],cl_cid[key],cl_sid[key],cl_posid[key],cl_aid[key],cl_spid[key],cl_type[key],cl[key];
}
}