##Log
#<SONG_PLAY>version:v3.0.0.00_A1|channel:TG75222|device_type:LenovoLenovo A300|play_type:online|uid:XXX|sid:XXXX|road_ids:3:1|startup_time:6|play_time:22|song_time:264|block_time
s:0|connect_type:GPRS|song_name:|singer:|file_name:|ip_addr:127.0.0.1|areacode:|record_time:2012-09-13 00:00:00
##
##Format
#version
#channel
#device_type
#play_type
#connect_type
#uid
#sid
#road_ids
#startup_time
#play_time
#song_time
#block_times
#song_name
#singer
#file_name
#record_time
#ip_addr
#areacode
##
脚本如下:
#!/bin/awk -f
BEGIN {
FS=OFS="|"
}
{
for(i=1;i<=NF;i++)
{
sub(/<SONG_PLAY>/,"",$i)
sub(/:/,":|",$i)
split($i,array,"|");a=array[1];b=array[2];
m[a]=b
}
}
{
for(i in m);
{print m["version:"],m["channel:"],m["device_type:"],m["play_type:"],m["connect_type:"],m["uid:"],m["sid:"],m["road_ids:"],m["startup_time:"],m["play_time:"],m["song_time:"],m["block_times:"],m["song_name:"],m["singer:"],m["file_name:"],m["record_time:"],m["ip_addr:"],m["areacode:"] > FILENAME".log"
}
for(i in m)
{
delete(m);
}
}
附一个perl的脚本:
#!/usr/bin/perl
if(@ARGV != 1) {
print "Usage:\n";
print "perl dm_play_log_preprocess.pl logfile\n";
exit 1;
}
my $logfile= $ARGV[0];
my $outputfile= $logfile.".log";
open(FILE,$logfile)||die"can not open the file: $logfile";
#open(FILE,"10k.txt")||die"can not open the file: $logfile";
open(OUTFILE, ">$outputfile")||die"can not open the file: $outputfile";
while (defined ($line =<FILE>)) {
chomp $line;
my %hash ;
$line =~ s/<SONG_PLAY>/\|/;
$line =~ s/\|([a-z]+):/\|$1\t/g;
$line =~ s/\|([a-z]+\_[a-z]+):/\|$1\t/g;
my @array = split(/\|/,$line);
foreach my $item(@array) {
my ($i,$j)= split(/\t/, $item);
$hash{$i} = $j;
}
my $output= $hash{"version"}."\|".$hash{"channel"}."\|".$hash{"device_type"}."\|".$hash{"play_type"}."\|".$hash{"connect_type"}."\|".$hash{"uid"}."\|".$hash{"sid"}."\|".$hash{"road_ids"}."\|".$h
ash{"startup_time"}."\|".$hash{"play_time"}."\|".$hash{"song_time"}."\|".$hash{"block_times"}."\|".$hash{"song_name"}."\|".$hash{"singer"}."\|".$hash{"file_name"}."\|".$hash{"record_time"}."\|".$hash
{"ip_addr"}."\|".$hash{"areacode"};
print OUTFILE ("$output"."\n");
}
close FILE;
python写法
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import re
from re import split
f = open(sys.argv[1])
a = ['version','channel','device_type','play_type','connect_type','uid','sid','road_ids','startup_time','play_time','song_time','block_times','song_name','singer','file_name','record_time','ip_addr']
for line in f.readlines():
dict = {}
g = []
m = line.strip().split('|')
for l in m:
d = l.split(':')
if 'record_time' in d[0]:
dict[d[0]] = d[0]+":"+d[1]+':'+d[2]+':'+d[3]
else:
dict[d[0].replace('<SONG_PLAY>','')] = d[0].replace('<SONG_PLAY>','')+":"+d[1]
#dict[d[0]] = d[0]+":"+d[1]
for t in a:
g.append(dict.get(t))
print ','.join(g[:]).strip()
f.close()
转载于:https://blog.51cto.com/chenwenming/1074121