shell + go + mysql + grafana nginx日志统计 (二) :统计及存入数据库

开始

首先把我们要采集的日志拿出来,大概这个样子

123.131.xx.xxx 307 0.012 [2018-01-16T10:42:50+08:00] POST /login HTTP/1.1 - 0 .......
121.19.xx.xx 200 0.010 [2018-01-16T10:42:51+08:00] GET / HTTP/1.1 - 4228 ........
120.221.xxx.xx 200 0.007 [2018-01-16T10:42:56+08:00] GET / HTTP/1.1 - 4227 .........

而我所接触的服务中一个服务大概每天产生90万条访问日志,而类似的服务有6个左右。其他一些林散的服务每个每天大概产生日志30-40万条左右。再来看看我的机器性能,4核8G带宽1M的一台机器,上面运行了zabbix,jenkins,mysql等程序,白天有日志查看需求的时候,带宽占用也比较大。

所以一次性运行完,而且还要让统计后的结果尽量的小,就成了需要思考的问题。不是收集所有日志而只是把相同的统计到一起,所以时间粒度也就需要放大一点,这里我统计的每个小时不同URL的访问时间,IP,状态码等。当然如果需要更加精确的统计数据比如说秒,分也是可以做的,这个放到后面再说。

为什么用go?

  • 我刚好开始学go语言,才把语法弄清楚了一些。
  • 看到一片讲词频统计的代码片段 地址https://studygolang.com/articles/3393 觉得这个刚好能解决我的问题就照着写了。
  • 一次编译到处运行,这一点是我觉得最爽的地方

首先是引入需要用到的包

package main

import (
	"bufio"
	"fmt"
	"os"
	"strings"
	"flag"
	"github.com/astaxie/beego/orm"
	_ "github.com/go-sql-driver/mysql"
	"strconv"
	"time"
)

之后定义数据的格式

\\用于Nginx响应时间
type ngx_res struct {
	Id        int64
	Date      time.Time
	Url       string
	Project   string
	Xiaoyu10  int
	Xiaoyu50  int
	Xiaoyu100 int
	Xiaoyu500 int
	Dayu500   int
}
\\用于IP访问次数
type ngx_ip struct {
	Id      int64
	Date    time.Time
	Project string
	Ip      string
	Times   string
}
\\用于状态码,Url,次数
type ngx_access struct {
	Id      int64
	Date    time.Time
	Project string
	Code    int64
	Url     string
	Times   int
}
type time_res struct {
	times_10       int
	times_50       int
	times_100      int
	times_500      int
	times_dayu_500 int
}

然后定义三个map 之后会把统计的东西放进去

var hourmap map[string]int = make(map[string]int, 0)
var resmap map[string]time_res = make(map[string]time_res, 0)
var ipmap map[string]int = make(map[string]int, 0)

定义一个读取及统计文件的函数

func read(filename string) {
//根据文件名读取文件
	fi, err := os.Open(filename)
	if err != nil {
		fmt.Printf("Error: %s\n", err)
		return
	}
	defer fi.Close()

	br := bufio.NewReader(fi)

	for {
//按照\n为分隔符来for循环
		a, err := br.ReadString('\n')
		if err != nil {
			break
		}
		log := string(a)

//这里就开始分日志了
        //这里可以看作再每行里面操作
		split := strings.Split(log, " ")
		ip := split[0]
        //取出日期
		date_tmp := strings.Split(split[3], "[")[1]
		date_string := strings.Split(date_tmp, ":")[0]
        //取出url,并且去掉=符号和?号后带的参数
		url1 := strings.Split(split[5], "?")[0]
		url := strings.Split(url1, "=")[0]
        //取出状态码
		code := split[1]
        //把日志中取出的响应时间转化微float64类型
		resp, err := strconv.ParseFloat(split[2], 64)
		if err != nil {
			break
		}
//这里开始就是写入到map中了
        //把  日期:url:状态码 作为键  访问次数作为值 传入hourmap中
		hourmap[date_string+":"+url+":"+code]++
        //把  日期:访问IP 作为键  访问次数作为值 传入ipmap中
		ipmap[date_string+":"+ip]++
        //把  日期:URL 作为键  把之前定义的time_res作为值 传入resmap中
		v, ok := resmap[date_string+":"+url]
        //判断,如果这个键存在就把resp拿出来做下面的判断在相应的地方加1,如果这个键不存在就再判断后创建这个键值对
		if ok {
			if resp <= 0.01 {
				a := time_res{v.times_10 + 1, v.times_50, v.times_100, v.times_500, v.times_dayu_500}
				resmap[date_string+":"+url] = a
			} else if resp > 0.01 && resp <= 0.05 {
				a := time_res{v.times_10, v.times_50 + 1, v.times_100, v.times_500, v.times_dayu_500}
				resmap[date_string+":"+url] = a
			} else if resp > 0.05 && resp <= 0.1 {
				a := time_res{v.times_10, v.times_50, v.times_100 + 1, v.times_500, v.times_dayu_500}
				resmap[date_string+":"+url] = a
			} else if resp > 0.1 && resp <= 0.5 {
				a := time_res{v.times_10, v.times_50, v.times_100, v.times_500 + 1, v.times_dayu_500}
				resmap[date_string+":"+url] = a
			} else {
				a := time_res{v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500 + 1}
				resmap[date_string+":"+url] = a
			}
		} else {
			if resp <= 0.01 {
				a := time_res{1, 0, 0, 0, 0}
				resmap[date_string+":"+url] = a
			} else if resp > 0.01 && resp <= 0.05 {
				a := time_res{0, 1, 0, 0, 0}
				resmap[date_string+":"+url] = a
			} else if resp > 0.1 && resp <= 0.5 {
				a := time_res{0, 0, 1, 0, 0}
				resmap[date_string+":"+url] = a
			} else if resp > 0.1 && resp <= 0.5 {
				a := time_res{0, 0, 0, 1, 0}
				resmap[date_string+":"+url] = a
			} else {
				a := time_res{0, 0, 0, 0, 1}
				resmap[date_string+":"+url] = a
			}
		}
	}
}

写入数据库

####需要在这里说下的是如果你是统计的完全不相干的项目的日志,我认为不放在一个表里面是比较好的,也就是修改一下上面的数据格式名称,再下面初始化数据库的时候再修改new()中的东西再在后面改下sql中的表名。 这里写入数据库我使用beego提供的orm,事实上我只会着一种方式 。选用的数据库是mariadb.这里有个坑,mariadb的timezone CST 是美国中部时间。。。。。

####初始化数据库

func RegisterDb(uname string, passwd string, ipaddr string, port string, databasename string) {
	orm.RegisterDriver("mysql", orm.DRMySQL)
	orm.RegisterDataBase("default", "mysql", uname+":"+passwd+"@tcp("+ipaddr+":"+port+")/"+databasename+"?charset=utf8", 10)
	orm.RegisterModel(new(ngx_access), new(ngx_ip), new(ngx_res))
}

####定义插入数据的函数

func Add_access(project string, date string, code string, url string, times int) error {
	o := orm.NewOrm()

	codes, err := strconv.ParseInt(code, 10, 64)
	if err != nil {
		return err
	}
	_, error := o.Raw("INSERT INTO `ngx_access` (`date`, `project`, `code`, `url`, `times`) VALUES (?, ?, ?, ?, ?);", date, project, codes, url, times).Exec()
	return error
}
func Add_ip(project string, date string, ip string, times int) error {
	o := orm.NewOrm()

	_, error := o.Raw("INSERT INTO `ngx_ip` (`date`, `project`, `ip`,`times`) VALUES (?, ?, ?, ?);", date, project, ip, times).Exec()
	return error
}
func Add_res(project string, date string, url string, xiaoyu10 int, xiaoyu50 int, xiaoyu100 int, xiaoyu500 int, dayu500 int) error {
	o := orm.NewOrm()

	_, error := o.Raw("INSERT INTO `ngx_res` (`date`, `project`,`url`,`xiaoyu10`,`xiaoyu50`,`xiaoyu100`,`xiaoyu500`,`dayu500`) VALUES (?, ?, ?, ?, ?, ?, ?, ?);", date, project, url, xiaoyu10, xiaoyu50, xiaoyu100, xiaoyu500, dayu500).Exec()
	return error
}

####再定义一个时间替换函数,作用是把字符串转换为时间类型

func time_tihuan(date_hour string) time.Time {
	//输入时间字符串并拼接
	//time_string := date_hour
	//获取服务器时区
	//loc, _ := time.LoadLocation("Asia/Chongqing")

	//字符串转为时间类型
	theTime, err := time.Parse("2006-01-02T15:04:05 -0700", date_hour)
	if err != nil {
		fmt.Println(err)
	}
	return theTime
}

####初始化数据库填入数据库的连接信息

func init() {
	RegisterDb("uername", "password", "xxx.xxx.xxx.xxx", "xxxx", "databasename")
}

主函数定义

func main() {
//定义一个从命令行传入参数函数把filename从命令行传入
	var filename string
	flag.StringVar(&filename, "filename", "2017-12-35_xxxxx.log", "nginx access log filename!")
	flag.Parse()
	//read函数 执行后数据统计入map中
	read(filename)
	orm.Debug = true
	orm.RunSyncdb("default", false, true)
//更具filename 来确定project 的名字
	project1 := strings.Split(filename, ".")[0]
	project := strings.Split(project1, "_")[1]
//定义一个map 用来存放一小时只有一次访问的URL,用于去除类似扫描器之类的无效访问。
	var hourmap_one map[string]int = make(map[string]int, 0)
	for k, v := range hourmap {
         //hourmap如果键的值不等于1则写入数据库,反之写入hourmap_one
		if v != 1 {
			a := strings.Split(k, ":")
			date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
			Add_access(project, date, a[2], a[1], v)
		} else {
			a := strings.Split(k, ":")
			hourmap_one[a[0]+":oneurl:200"]++
		}
	}
//把hourmap_one写入数据库
	for k, v := range hourmap_one {
		a := strings.Split(k, ":")
		date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
		Add_access(project, date, a[2], a[1], v)
	}
//把ipmap每小时大于5次访问的IP写入数据库
	for k, v := range ipmap {
		if v > 5 {
			a := strings.Split(k, ":")
			date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
			Add_ip(project, date, a[1], v)
		}
	}
//把resmap写入数据库
	for k, v := range resmap {

		a := strings.Split(k, ":")
		date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
		Add_res(project, date, a[1], v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500)
	}

}

完整的代码

package main

import (
	"bufio"
	"fmt"
	"os"
	"strings"
	//"sort"
	"flag"
	"github.com/astaxie/beego/orm"
	_ "github.com/go-sql-driver/mysql"
	"strconv"
	"time"
)

//写入数据库
type ngx_res struct {
	Id        int64
	Date      time.Time
	Url       string
	Project   string
	Xiaoyu10  int
	Xiaoyu50  int
	Xiaoyu100 int
	Xiaoyu500 int
	Dayu500   int
}
type ngx_ip struct {
	Id      int64
	Date    time.Time
	Project string
	Ip      string
	Times   string
}
type ngx_access struct {
	Id      int64
	Date    time.Time
	Project string
	Code    int64
	Url     string
	Times   int
}
type time_res struct {
	times_10       int
	times_50       int
	times_100      int
	times_500      int
	times_dayu_500 int
}

func Add_access(project string, date string, code string, url string, times int) error {
	o := orm.NewOrm()

	codes, err := strconv.ParseInt(code, 10, 64)
	if err != nil {
		return err
	}
	_, error := o.Raw("INSERT INTO `ngx_access` (`date`, `project`, `code`, `url`, `times`) VALUES (?, ?, ?, ?, ?);", date, project, codes, url, times).Exec()
	return error
}
func Add_ip(project string, date string, ip string, times int) error {
	o := orm.NewOrm()

	_, error := o.Raw("INSERT INTO `ngx_ip` (`date`, `project`, `ip`,`times`) VALUES (?, ?, ?, ?);", date, project, ip, times).Exec()
	return error
}
func Add_res(project string, date string, url string, xiaoyu10 int, xiaoyu50 int, xiaoyu100 int, xiaoyu500 int, dayu500 int) error {
	o := orm.NewOrm()

	_, error := o.Raw("INSERT INTO `ngx_res` (`date`, `project`,`url`,`xiaoyu10`,`xiaoyu50`,`xiaoyu100`,`xiaoyu500`,`dayu500`) VALUES (?, ?, ?, ?, ?, ?, ?, ?);", date, project, url, xiaoyu10, xiaoyu50, xiaoyu100, xiaoyu500, dayu500).Exec()
	return error
}

//初始化数据库
func RegisterDb(uname string, passwd string, ipaddr string, port string, databasename string) {
	orm.RegisterDriver("mysql", orm.DRMySQL)
	orm.RegisterDataBase("default", "mysql", uname+":"+passwd+"@tcp("+ipaddr+":"+port+")/"+databasename+"?charset=utf8", 10)
	orm.RegisterModel(new(ngx_access), new(ngx_ip), new(ngx_res))
}

var hourmap map[string]int = make(map[string]int, 0)
var resmap map[string]time_res = make(map[string]time_res, 0)
var ipmap map[string]int = make(map[string]int, 0)

//读取文件
func read(filename string) {
	fi, err := os.Open(filename)
	if err != nil {
		fmt.Printf("Error: %s\n", err)
		return
	}
	defer fi.Close()

	br := bufio.NewReader(fi)
	for {
		a, err := br.ReadString('\n')
		if err != nil {
			break
		}
		log := string(a)
		//计算每小时访问次数
		split := strings.Split(log, " ")
		ip := split[0]
		date_tmp := strings.Split(split[3], "[")[1]
		date_string := strings.Split(date_tmp, ":")[0]
		//		date_time := time_tihuan(date_string[0],date_string[1])
		url1 := strings.Split(split[5], "?")[0]
		url := strings.Split(url1, "=")[0]
		code := split[1]
		resp, err := strconv.ParseFloat(split[2], 64)
		if err != nil {
			break
		}
		hourmap[date_string+":"+url+":"+code]++
		ipmap[date_string+":"+ip]++
		v, ok := resmap[date_string+":"+url]
		if ok {
			if resp <= 0.01 {
				a := time_res{v.times_10 + 1, v.times_50, v.times_100, v.times_500, v.times_dayu_500}
				resmap[date_string+":"+url] = a
			} else if resp > 0.01 && resp <= 0.05 {
				a := time_res{v.times_10, v.times_50 + 1, v.times_100, v.times_500, v.times_dayu_500}
				resmap[date_string+":"+url] = a
			} else if resp > 0.05 && resp <= 0.1 {
				a := time_res{v.times_10, v.times_50, v.times_100 + 1, v.times_500, v.times_dayu_500}
				resmap[date_string+":"+url] = a
			} else if resp > 0.1 && resp <= 0.5 {
				a := time_res{v.times_10, v.times_50, v.times_100, v.times_500 + 1, v.times_dayu_500}
				resmap[date_string+":"+url] = a
			} else {
				a := time_res{v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500 + 1}
				resmap[date_string+":"+url] = a
			}
		} else {
			if resp <= 0.01 {
				a := time_res{1, 0, 0, 0, 0}
				resmap[date_string+":"+url] = a
			} else if resp > 0.01 && resp <= 0.05 {
				a := time_res{0, 1, 0, 0, 0}
				resmap[date_string+":"+url] = a
			} else if resp > 0.1 && resp <= 0.5 {
				a := time_res{0, 0, 1, 0, 0}
				resmap[date_string+":"+url] = a
			} else if resp > 0.1 && resp <= 0.5 {
				a := time_res{0, 0, 0, 1, 0}
				resmap[date_string+":"+url] = a
			} else {
				a := time_res{0, 0, 0, 0, 1}
				resmap[date_string+":"+url] = a
			}
		}
	}
}

//时间转换函数
func time_tihuan(date_hour string) time.Time {
	//输入时间字符串并拼接
	//time_string := date_hour
	//获取服务器时区
	//loc, _ := time.LoadLocation("Asia/Chongqing")

	//字符串转为时间类型
	theTime, err := time.Parse("2006-01-02T15:04:05 -0700", date_hour)
	if err != nil {
		fmt.Println(err)
	}
	return theTime
}

func init() {
	RegisterDb("username", "password", "ipaddr", "port", "databasename")
}
func main() {
	var filename string
	flag.StringVar(&filename, "filename", "2017-12-35_mobile.log", "nginx access log filename!")
	flag.Parse()
	//read函数 执行后数据统计入map中
	read(filename)
	orm.Debug = true
	orm.RunSyncdb("default", false, true)
	project1 := strings.Split(filename, ".")[0]
	project := strings.Split(project1, "_")[1]
	var hourmap_one map[string]int = make(map[string]int, 0)
	for k, v := range hourmap {
		if v != 1 {
			a := strings.Split(k, ":")
			date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
			Add_access(project, date, a[2], a[1], v)
		} else {
			a := strings.Split(k, ":")
			hourmap_one[a[0]+":oneurl:200"]++
		}
	}
	for k, v := range hourmap_one {
		a := strings.Split(k, ":")
		date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
		Add_access(project, date, a[2], a[1], v)
	}
	for k, v := range ipmap {
		if v > 5 {
			a := strings.Split(k, ":")
			date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
			Add_ip(project, date, a[1], v)
		}
	}
	for k, v := range resmap {

		a := strings.Split(k, ":")
		date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
		Add_res(project, date, a[1], v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500)
	}

}

转载于:https://my.oschina.net/u/1763608/blog/1607871

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值