##Log
#<SONG_PLAY>version:v3.0.0.00_A1|channel:TG75222|device_type:LenovoLenovo A300|play_type:online|uid:XXX|sid:XXXX|road_ids:3:1|startup_time:6|play_time:22|song_time:264|block_time
s:0|connect_type:GPRS|song_name:|singer:|file_name:|ip_addr:127.0.0.1|areacode:|record_time:2012-09-13 00:00:00
##
##Format
#version
#channel
#device_type
#play_type
#connect_type
#uid
#sid
#road_ids
#startup_time
#play_time
#song_time
#block_times
#song_name
#singer
#file_name
#record_time
#ip_addr
#areacode
##

脚本如下:

#!/bin/awk -f

BEGIN {
FS=OFS="|"
}

{
    for(i=1;i<=NF;i++)
     {
        sub(/<SONG_PLAY>/,"",$i)
        sub(/:/,":|",$i)
        split($i,array,"|");a=array[1];b=array[2];
        m[a]=b
     }
}

{
    for(i in m);
     {print m["version:"],m["channel:"],m["device_type:"],m["play_type:"],m["connect_type:"],m["uid:"],m["sid:"],m["road_ids:"],m["startup_time:"],m["play_time:"],m["song_time:"],m["block_times:"],m["song_name:"],m["singer:"],m["file_name:"],m["record_time:"],m["ip_addr:"],m["areacode:"] >    FILENAME".log"
     }

    for(i in m)
     {
        delete(m);
     }
}



附一个perl的脚本:

#!/usr/bin/perl
if(@ARGV != 1) {
                print "Usage:\n";
                print "perl dm_play_log_preprocess.pl logfile\n";
                exit 1;
}
my $logfile= $ARGV[0];
my $outputfile= $logfile.".log";


open(FILE,$logfile)||die"can not open the file: $logfile";
#open(FILE,"10k.txt")||die"can not open the file: $logfile";

open(OUTFILE, ">$outputfile")||die"can not open the file: $outputfile";
while (defined ($line =<FILE>)) {
         chomp $line;
         my %hash ;
         $line =~ s/<SONG_PLAY>/\|/;
         $line =~ s/\|([a-z]+):/\|$1\t/g;
         $line =~ s/\|([a-z]+\_[a-z]+):/\|$1\t/g;
         my @array = split(/\|/,$line);
         foreach my $item(@array) {
                my ($i,$j)= split(/\t/, $item);
                $hash{$i} = $j;
         }
         my $output= $hash{"version"}."\|".$hash{"channel"}."\|".$hash{"device_type"}."\|".$hash{"play_type"}."\|".$hash{"connect_type"}."\|".$hash{"uid"}."\|".$hash{"sid"}."\|".$hash{"road_ids"}."\|".$h
ash{"startup_time"}."\|".$hash{"play_time"}."\|".$hash{"song_time"}."\|".$hash{"block_times"}."\|".$hash{"song_name"}."\|".$hash{"singer"}."\|".$hash{"file_name"}."\|".$hash{"record_time"}."\|".$hash
{"ip_addr"}."\|".$hash{"areacode"};
        print OUTFILE ("$output"."\n");
}
close FILE;


python写法

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import sys
import re
from re import  split

f = open(sys.argv[1])
a = ['version','channel','device_type','play_type','connect_type','uid','sid','road_ids','startup_time','play_time','song_time','block_times','song_name','singer','file_name','record_time','ip_addr']

for line in f.readlines():
    dict = {}
    g = []
    m = line.strip().split('|')
    for l in m:
        
        d = l.split(':')
        if  'record_time' in d[0]:
            dict[d[0]] = d[0]+":"+d[1]+':'+d[2]+':'+d[3]
        else:
        dict[d[0].replace('<SONG_PLAY>','')] = d[0].replace('<SONG_PLAY>','')+":"+d[1]
        #dict[d[0]] = d[0]+":"+d[1]
    for t in a:
        g.append(dict.get(t))
    print ','.join(g[:]).strip()
f.close()