awk qos分析脚本两例。

之前写的用于分析webcdn日志的awk脚本。一个可以用来分析流量和状态码。另一个用来分析错误码。

使用方式:

zcat  /.log.gz |awk -f  analyze_awk.awk -  #domain和traffic相关分析,要注 意日志格式

zcat  /.log.gz |awk -f  auto_awk.awk -   #wrong http code分析

(目前我们使用监控系统来直接调用脚本,在发现问题时可以直接分析错误的domain和url,方便快速定位问题)

有兴趣的同学测试下,欢迎大家共同学习和探讨。

脚本1:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
         gsub(/ "/," ");
         client=$1;
         domain=$2;
         url=$7;
         status=$9;
         bytes=$10;
         upstream=$(NF-2);
         time =$NF;     
         if  (((match(status, "4.." ))||(match(status, "5.." )))&& (status != 400)) {
             ++arr_status_num[status];
             ++arr_domain_num[domain];
             ++arr_domain_status[domain,status];
             ++arr_domain_url_num[ "http://" domain "" url "" ];
             ++arr_domain_url_status[ "http://" domain "" url "" ,status];
         }
}
END {
     #http status statistic
     print  "\033[40;33m#################################\033[0m"
     print  "\033[40;32m#get http code summary: code,sum#\033[0m"
     print  "\033[40;33m#################################\033[0m"
     status_sort=asorti(arr_status_num,sort_status);
     for  (i=1; i<=status_sort; i++) {
         s=sort_status[i];
         if  (s ~  "4.." ) {
             printf  "%s:%d\n" ,
             s,arr_status_num[s];
         else  if  (s ~  "5.." ) {
             printf  "%s:%d\n" ,
             s,arr_status_num[s];
         }
     }
         #get domain statistics
         print  "==================================================="
         print  "\033[40;33m###############################################################\033[0m"
         print  "\033[40;32m#get domain  summary: domain|error_sum|4xx|5xx|404|499|502|504#\033[0m"
         print  "\033[40;33m###############################################################\033[0m"
         domain_sort=asorti(arr_domain_num,sort_domain);
         for  (i=1; i<=domain_sort; i++) {
         g=sort_domain[i];
         arr_domain_num_error=arr_domain_status[g,502]+arr_domain_status[g,504]+arr_domain_status[g,404]+arr_domain_status[g,499]+arr_domain_status[g,415]+arr_domain_status[g,403];
         arr_domain_num_4xx=arr_domain_status[g,404]+arr_domain_status[g,499]+arr_domain_status[g,415]+arr_domain_status[g,403]
         arr_domain_num_5xx=arr_domain_status[g,502]+arr_domain_status[g,504]
         printf  "%s|%d|%d|%d|%d|%d|%d|%d\n" ,
         g,arr_domain_num_error,arr_domain_num_4xx,arr_domain_num_5xx,arr_domain_status[g,404],arr_domain_status[g,499],arr_domain_status[g,502],arr_domain_status[g,504] |  "sort -t '|' -k 2 -nr|head -20"
     }
         close( "sort -t '|' -k 2 -nr|head -20" )
         #get url statistics
         print  "==================================================="   
         print  "\033[40;33m################################################################\033[0m"
         print  "\033[40;32m#get url summary: url | error_sum| 4xx| 5xx| 404| 499| 502| 504#\033[0m"
         print  "\033[40;33m################################################################\033[0m"
         url_sort=asorti(arr_domain_url_num,url_domain);
         for  (i=1; i<=url_sort; i++) {
         g=url_domain[i];
         arr_domain_url_num_error=arr_domain_url_status[g,404]+arr_domain_url_status[g,499]+arr_domain_url_status[g,502]+arr_domain_url_status[g,504]+arr_domain_url_status[g,403]+arr_domain_url_status[g,415];
         arr_domain_url_num_4xx=arr_domain_url_status[g,404]+arr_domain_url_status[g,499]+arr_domain_url_status[g,403]+arr_domain_url_status[g,415];
         arr_domain_url_num_5xx=arr_domain_url_status[g,502]+arr_domain_url_status[g,504];     
         printf  "%s | %d| %d| %d| %d| %d| %d| %d\n" ,
         g,arr_domain_url_num_error,arr_domain_url_num_4xx,arr_domain_url_num_5xx,arr_domain_url_status[g,404],arr_domain_url_status[g,499],arr_domain_url_status[g,502],arr_domain_url_status[g,504]|  "sort -t ' ' -k 3 -nr|head -20"
     }
         close( "sort -t ' ' -k 3 -nr|head -20" )
}


脚本2:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
BEGIN {
         OFS= ";"
}
{
         gsub(/ "/," ");
         client=$1;
         domain=$2;
         url=$7;
         status=$9;
         bytes=$10;
         upstream=$(NF-2);
         time =$NF; 
         if  (status>0 && (match(status, "..." ))) {
             sum_access++;
             ++arr_status_num[status];
             arr_status_bytes[status]+=bytes;
             arr_status_time[status]+= time ;
               
             ++arr_domain_num[domain];
             ++arr_domain_status[domain,status];
             ++arr_domain_url_num[ "http://" domain "" url "" ];
             ++arr_domain_url_status[ "http://" domain "" url "" ,status];
             if  (status==200 || status==206) {
                 #about domain
                 arr_domain_bytes[domain]+=bytes;
                 arr_domain_time[domain]+= time ;
                 #about client(usualy ip)
                 arr_client_bytes[client]+=bytes;
                 arr_client_time[client]+= time ;
       
                 if  ( time  <= 0.1) {
                     ++arr_status_speed1[status]; ++arr_domain_speed1[domain];
                 }
       
                 if  (( time  <= 0.2) && ( time  >= 0.1)) {
                     ++arr_status_speed2[status]; ++arr_domain_speed2[domain];
                 }
       
                 if  (( time  <= 0.5) && ( time  >= 0.2)) {
                     ++arr_status_speed5[status]; ++arr_domain_speed5[domain];
                 }
       
                 if  (( time  <= 1) && ( time  >= 0.5)) {
                     ++arr_status_speed10[status]; ++arr_domain_speed10[domain];
                 }
                 if  (( time  <= 2) && ( time  >= 1)) {
                     ++arr_status_speed20[status]; ++arr_domain_speed20[domain];
                 }
       
                 if  (( time  <= 5) && ( time  >= 2)) {
                     ++arr_status_speed50[status]; ++arr_domain_speed50[domain];
                 }
                   
                 if  ( time  >= 5) {
                     ++arr_status_speed5x[status]; ++arr_domain_speed5x[domain];
                 }
             }
         }
}
END {
     #http status statistic
     print  "\033[40;33m##########################################################################################################################\033[0m"
     print  "\033[40;32m#get http code summary: code,sum,rate,speed(Kb),avg_time(s),<0.1 rate,<0.2 rate,<0.5 rate,<1 rate,<2 rate,<5 rate,>5 rate#\033[0m"
     print  "\033[40;33m##########################################################################################################################\033[0m"
     status_sort=asorti(arr_status_num,sort_status);
     #print arr_status_time[200]
     for  (i=1; i<=status_sort; i++) {
         s=sort_status[i];
         if  (s==200 || s==206) {
             printf  "%s:%d,%.2f|%.2f|%.3f|%.2f,%.2f,%.2f,%.2f,%.2f,%.2f\n" ,
             s,arr_status_num[s],arr_status_num[s] /sum_access ,arr_status_bytes[s]*8/(1024*arr_status_time[s]),arr_status_time[s] /arr_status_num [s],
             arr_status_speed1[s] /arr_status_num [s]*100,arr_status_speed2[s] /arr_status_num [s]*100,arr_status_speed5[s] /arr_status_num [s]*100,
             arr_status_speed10[s] /arr_status_num [s]*100,arr_status_speed20[s] /arr_status_num [s]*100,arr_status_speed50[s] /arr_status_num [s]*100,arr_status_speed5x[s] /arr_status_num [s]*100;
         else  if  (s ~  "3.." ) {
             printf  "%s:%d,%.2f\n" ,
             s,arr_status_num[s],arr_status_num[s] /sum_access ;
         else  if  (s ~  "4.." ) {
             printf  "%s:%d,%.2f\n" ,
             s,arr_status_num[s],arr_status_num[s] /sum_access ;
         else  if  (s ~  "5.." ) {
             printf  "%s:%d,%.2f\n" ,
             s,arr_status_num[s],arr_status_num[s] /sum_access ;
         }
           
     }
         #get domain statistics
         print  "==================================================="   
         print  "\033[40;33m###########################################################################\033[0m"
         print  "\033[40;32m#get domain  summary: domain,sum,rate,valid rate,4xx rate,5xx rate,data(G)#\033[0m"
         print  "\033[40;33m###########################################################################\033[0m"
         domain_sort=asorti(arr_domain_num,sort_domain);
         for  (i=1; i<=domain_sort; i++) {
         g=sort_domain[i];
         arr_domain_num_valid=arr_domain_status[g,200]+arr_domain_status[g,206]+arr_domain_status[g,302]+arr_domain_status[g,301];
         arr_domain_num_4xx=arr_domain_status[g,404]+arr_domain_status[g,499]+arr_domain_status[g,403]+arr_domain_status[g,415];
#       arr_domain_num_4xx=arr_domain_status[g,4..];
         arr_domain_num_5xx=arr_domain_status[g,502]+arr_domain_status[g,504];
         arr_domain_num_404=arr_domain_status[g,404]
         arr_domain_num_499=arr_domain_status[g,499]
         if  (arr_domain_time[g]>0) {
             printf  "%s:%d,%.2f|%.2f,%.2f,%.2f,%.2f\n" ,
             g,arr_domain_num[g],arr_domain_num[g] /sum_access ,arr_domain_num_valid /arr_domain_num [g],arr_domain_num_4xx /arr_domain_num [g],arr_domain_num_5xx /arr_domain_num [g],arr_domain_bytes[g]*8/(1024*1024*1024) |  "sort -t ',' -k 2 -nr|head -20"
         }
     }
         close( "sort -t ',' -k 2 -nr|head -20" )
         #get url sum statistics
         print  "==================================================="
           
         print  "\033[40;33m###########################################################\033[0m"
         print  "\033[40;32m#get url  summary:url| sum| valid rate| 4xx rate |5xx rate#\033[0m"
         print  "\033[40;33m###########################################################\033[0m"
         url_sort=asorti(arr_domain_url_num,url_domain);
         for  (i=1; i<=url_sort; i++) {
         g=url_domain[i];
         arr_domain_url_num_valid=arr_domain_url_status[g,200]+arr_domain_url_status[g,206]+arr_domain_url_status[g,302]+arr_domain_url_status[g,301];
         arr_domain_url_num_4xx=arr_domain_url_status[g,404]+arr_domain_url_status[g,499]+arr_domain_url_status[g,403]+arr_domain_url_status[g,415];
         arr_domain_url_num_5xx=arr_domain_url_status[g,502]+arr_domain_url_status[g,504];
         arr_domain_url_num_404=arr_domain_url_status[g,404]
         arr_domain_url_num_499=arr_domain_url_status[g,499]
           
         printf  "%s| %d| %.2f| %.2f| %.2f\n" ,
         g ,arr_domain_url_num[g] ,arr_domain_url_num_valid /arr_domain_url_num [g],arr_domain_url_num_4xx /arr_domain_url_num [g],arr_domain_url_num_5xx /arr_domain_url_num [g]|  "sort -t ' ' -k 2 -nr|head -20"
     }
         close( "sort -t ' ' -k 2 -nr|head -20" )
         #get url errorsum statistics
         print  "==================================================="
           
         print  "\033[40;33m##########################################################################\033[0m"
         print  "\033[40;32m#get url  summary:url| sum| valid rate| errorsum rate| 4xx rate| 5xx rate#\033[0m"
         print  "\033[40;33m##########################################################################\033[0m"
         url_sort=asorti(arr_domain_url_num,url_domain);
         for  (i=1; i<=url_sort; i++) {
         g=url_domain[i];
         arr_domain_url_num_valid=arr_domain_url_status[g,200]+arr_domain_url_status[g,206]+arr_domain_url_status[g,302]+arr_domain_url_status[g,301];
         arr_domain_url_num_4xx=arr_domain_url_status[g,404]+arr_domain_url_status[g,499]+arr_domain_url_status[g,403]+arr_domain_url_status[g,415];
         arr_domain_url_num_5xx=arr_domain_url_status[g,502]+arr_domain_url_status[g,504];
         arr_domain_url_num_404=arr_domain_url_status[g,404]
         arr_domain_url_num_499=arr_domain_url_status[g,499]
         printf  "%s| %d| %.2f| %.2f| %.2f| %.2f\n" ,
         g ,arr_domain_url_num[g] ,arr_domain_url_num_valid /arr_domain_url_num [g] ,(arr_domain_url_num_4xx+arr_domain_url_num_5xx) /arr_domain_url_num [g] ,arr_domain_url_num_4xx /arr_domain_url_num [g] ,arr_domain_url_num_5xx /arr_domain_url_num [g]|  "sort -t ' ' -k 4 -nr|head -10"
     }
         close( "sort -t ' ' -k 4 -nr|head -10" )
}