/**
* Nginx日志数据转换类
*/
object NginxLogParser{
/**
* 解析正则表达式
* .r用于指明PARTTERN是一个正则表达式对象
* 9个值:客户端访问IP、用户标识clientIdentd、用户userId、访问时间dateTime、请求方式mode、请求状态responseCode、返回文件的大小contentSize、跳转来源referrer、UA信息
*/
val PATTERN =
"""(\S+) (\S+) (\S+) (\[.*\]) (\".*\") (\d{3}) (\d+) (\".*?\") (\".*?\")""".r
def parseLog2Line(log: String): AccessLog = {
def makeWifiLogs(): AccessLog = {
new AccessLog("", "", "", "", "", 0, 0, "", "")
}
if (log.isEmpty) {
val logs = PATTERN.findFirstMatchIn(log)
if (logs.isEmpty) {
throw new RuntimeException("Cannot parse log line: " + log)
}
val m = logs.get
new AccessLog(m.group(1), m.group(2), m.group(3), m.group(4),m.group(5), m.group(6).toInt,
m.group(7).toLong, m.group(8), m.group(9))
}else{
makeWifiLogs()
}
}
def main(args: Array[String]) {
val line = """111.128.69.30 - - [11/Sep/2018:00:01:00 +0800] "GET /api/getinfo.php?tid=1e35c7b357cd&rid=059e15c97800&gw=gaoke.com&did=2424826&sn=GAOKE_Q330&action=run HTTP/1.1" 200 315 "http://gaoke.com:8848/wx.html?tid=1e35c7b357cd&rid=059e15c97800&gw=gaoke.com&did=2424826&sn=GAOKE_Q330" "Mozilla/5.0 (Linux; Android 6.0; S9 Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/55.0.2883.91 Mobile Safari/537.36" "-""""
val log = NginxLogParser.parseLog2Line(line);
println(log.ip)
println(log.clientIdent)
println(log.userId)
println(log.timestamp)
println(log.request)
println(log.responseCode)
println(log.contentSize)
println(log.referrer)
}
}
/**
* 日志文件对象
*/
case class AccessLog(
ip: String, //设备用户的真实ip地址
clientIdent: String, //用户标识
userId: String, //用户
timestamp: String, //访问日期时间
request: String, //请求信息,get/post,mac值等
responseCode: Int, //请求状态 200成功,304静态加载
contentSize: Long, //返回文件的大小
referrer: String, //跳转来源
ua: String //UA信息
// forward:String //跳转页面
) extends Serializable {
override def toString: String = ip+","+clientIdent+","+userId+","+timestamp+","+request+","+responseCode+","+contentSize+","+referrer+","+ua
}