1. 目标
性能采集脚本将用uptime、vmstat、free、iostat命令采集到的数据记录到了一个日志文件中。本脚本就是将按照不同的标签把数据提取出来,加上标题栏保存为csv文件,方便进一步的分析。
性能数据记录文件。文件的第一列文件时间,一分钟一组数据,第二列为标签,grep这个标签就可以分离数据了。
oliver@bigdatadev:~/_src/testing/analyse/data-20160723$ head os_20160723.log
20160723-00:00:01 uptime 00:00:01 up 3 days, 22:26, 0 users, load average: 0.31, 0.42, 0.43
20160723-00:00:01 vmstat 3 0 0 5608676 4083744 13327436 0 0 6 45 15 13 1 4 95 0 0
20160723-00:00:01 free 32186 26709 5477 0 3988 13015
20160723-00:00:01 iostat-cpu 1.47 0.01 3.93 0.05 0.00 94.55
20160723-00:00:01 iostat-io hda 0.95 5.34 0.15 1.33 10.56 53.38 43.19 0.00 3.20 0.46 0.07
20160723-00:00:01 iostat-io hda1 0.95 5.34 0.15 1.33 10.56 53.38 43.19 0.00 3.20 0.46 0.07
20160723-00:00:01 iostat-io hdb 0.00 0.00 0.00 0.00 0.01 0.00 27.94 0.00 2.35 2.35 0.00
20160723-00:00:01 iostat-io hdc 0.06 29.25 0.37 8.49 34.00 302.00 37.92 0.14 15.97 0.52 0.46
20160723-00:01:01 uptime 00:01:01 up 3 days, 22:27, 0 users, load average: 0.29, 0.40, 0.43
20160723-00:01:01 vmstat 3 0 0 5605176 4084260 13328336 0 0 6 44 15 13 1 4 95 0 0
oliver@bigdatadev:~/_src/testing/analyse/data-20160723$
2.数据清洗脚本
用grep匹配数据实现抽取;用awk实现删除列、增加标题行等操作;用case实现选择,每一类数据对应一个实际的抽取函数。
#!/bin/bash
label=$1
infile=$2
outfile=$3
function etl_os_uptime(){
grep "$label" $infile | awk '{ if(NR==1){print "datetime,loadavg_1,loadavg_5,loadavg_15" };\
if(NF==14){print $1 "," $12 $13 $14}else if(NF==15){ print $1 "," $13 $14 $15} \
}' > $outfile
}
function etl_os_vmstat(){
grep "$label" $infile | awk '{ if(NR==1){print "datetime,r,b,swpd,free,buff,cache,si,so,bi,bo,in,cs,us,sy,id,wa,st" };\
print $1 "," $3 "," $4 "," $5 "," $6 "," $7 "," $8 "," $9 "," $10 "," \
$11 "," $12 "," $13 "," $14 "," $15 "," $16 "," $17 "," $18 "," $19 ","
}' > $outfile
}
function etl_os_free(){
grep "$label" $infile | awk '{ if(NR==1){print "datetime,total,used,free,shared,buffers,cached" };\
print $1 "," $3 "," $4 "," $5 "," $6 "," $7 "," $8\
}' > $outfile
}
function etl_os_iostatcpu(){
grep "$label" $infile | awk '{ if(NR==1){print "datetime,total,user,nice,system,iowait,steal,idle" };\
print $1 "," $3 "," $4 "," $5 "," $6 "," $7 "," $8\
}' > $outfile
}
function etl_os_iostatio(){
grep "$label" $infile | awk '{ if(NR==1){print "datetime,Device,rrqm/s,wrqm/s,r/s,w/s,rsec/s,wsec/s,avgrq-sz,avgqu-sz,await,svctm,util" };\
print $1 "," $3 "," $4 "," $5 "," $6 "," $7 "," $8 "," \
$9 "," $10 "," $11 "," $12 "," $13 "," $14
}' > $outfile
}
case $1 in
"uptime")
etl_os_uptime
;;
"vmstat")
etl_os_vmstat
;;
"free")
etl_os_free
;;
"iostat-cpu")
etl_os_iostatcpu
;;
"iostat-io")
etl_os_iostatio
;;
*)
echo "It's not be supported."
;;
esac
2.运行效果
../etl.sh iostat-io os_20160723.log os_iostatio_20160723.log
oliver@bigdatadev:~/_src/testing/analyse/data-20160723$ head os_iostatio_20160723.log
datetime,Device,rrqm/s,wrqm/s,r/s,w/s,rsec/s,wsec/s,avgrq-sz,avgqu-sz,await,svctm,util
20160723-00:00:01,hda,0.95,5.34,0.15,1.33,10.56,53.38,43.19,0.00,3.20,0.46,0.07
20160723-00:00:01,hda1,0.95,5.34,0.15,1.33,10.56,53.38,43.19,0.00,3.20,0.46,0.07
20160723-00:00:01,hdb,0.00,0.00,0.00,0.00,0.01,0.00,27.94,0.00,2.35,2.35,0.00
20160723-00:00:01,hdc,0.06,29.25,0.37,8.49,34.00,302.00,37.92,0.14,15.97,0.52,0.46
20160723-00:01:01,hda,0.95,5.34,0.15,1.33,10.56,53.38,43.19,0.00,3.20,0.46,0.07
20160723-00:01:01,hda1,0.95,5.34,0.15,1.33,10.55,53.38,43.19,0.00,3.20,0.46,0.07
20160723-00:01:01,hdb,0.00,0.00,0.00,0.00,0.01,0.00,27.94,0.00,2.35,2.35,0.00
20160723-00:01:01,hdc,0.06,29.25,0.37,8.49,33.99,301.99,37.92,0.14,15.98,0.52,0.46
20160723-00:02:01,hda,0.95,5.34,0.15,1.33,10.55,53.38,43.18,0.00,3.20,0.46,0.07
oliver@bigdatadev:~/_src/testing/analyse/data-20160723$