Destined for Greatness
好活儿,名儿叫Drain3(读作der~润三,不是“只润三”)。为了避免诸君怀疑我没有活儿,本文包括以下三个部分。
A.Drain3是个啥。
B.Drain3能干啥。
C.Drain3怎么用。
WARNING!如下的内容只保证简单易懂,不保证严谨正确。
其次,本文不包括使用Drain3进行流式处理的方法,这主要是由于不好演示。
Ⅰ.Introduction
Drain3是一个日志模板挖掘器,正如我是一台烤面包机。
*Ⅱ.Overview
Drain3获得日志模板。
打个比方,假如我有如下的单一模板日志:
10月1日 晴 开心
10月2日 阴 难过
10月3日 雨 开心
……
那么,这个日志的模板理应是:
<:DATE:><:*:><:*:>
这个例子或许不恰当,我们来看源码给出的例子:
For the input:
connected to 10.0.0.1
connected to 192.168.0.1
Hex number 0xDEADBEAF
user davidoh logged in
user eranr logged in
Drain3 extracts the following templates:
ID=1 : size=2 : connected to <:IP:>
ID=2 : size=1 : Hex number <:HEX:>
ID=3 : size=2 : user <:*:> logged in
这个例子会更贴切。
总结来说,Drain3输入日志,其输出理应是(在鄙人粗浅的理解下)一个字典,而这个字典理应具有如下字段:
change_type
- indicates either if a new template was identified, an existing template was changed or message added to an existing cluster.cluster_id
- Sequential ID of the cluster that the log belongs to.cluster_size
- The size (message count) of the cluster that the log belongs to.cluster_count
- Count clusters seen so far.template_mined
- the last template of above cluster_id.
这一部分并不容易理解。
Ⅲ.Methodology
一句话说明白,这是棵树,懂的自然懂,不懂的去看论文。
一些问题
首先,drain倾向于把树深打满,对于许多我明明写了掩码的情况(比如hex),它实则无法正确掩膜,原因不得而知,有可能我的掩膜写的不好。
其次,当日志条目数过多时,日志挖掘器会在一段运行后报错,其原因不得而知,毕竟源码不是我写的。
另外,drain几乎无法在无掩膜的情况下有效运作,如下是我为一个数据集撰写的掩膜文件,仅供参考,各位可以想象一下我在这期间受了多少苦,但请务必想象我是快乐的:
[SNAPSHOT]
snapshot_interval_minutes = 10
compress_state = True
[MASKING]
masking = [
{"regex_pattern" : "severity:info,message:conversion request successful", "mask_with": "M-INFO:Conversion"},
{"regex_pattern" : "severity:info,message:Transaction processed: visa ending 0454 Amount: [A-Z]{3}\\d+\\.\\d+", "mask_with": "M-INFO:Transaction"},
{"regex_pattern" : "severity:info,message:Getting supported currencies...", "mask_with": "M-INFO:GetSupport"},
{"regex_pattern" : "severity:info,message:PaymentService#Charge invoked with request \\{\"amount\":\\{\"currency_code\":\"[A-Z]+\",\"units\":\"\\d+\",\"nanos\":\\d+\\},\"credit_card\":\\{\"credit_card_number\":\"\\d{4}-\\d{4}-\\d{4}-\\d{4}\",\"credit_card_cvv\":\\d{3},\"credit_card_expiration_year\":\\d{4},\"credit_card_expiration_month\":\\d{1,2}\\}\\}", "mask_with": "M-INFO:Paymentservice"},
{"regex_pattern" : "GetCartAsync called with userId=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "mask_with": "M-GetCartAsync"},
{"regex_pattern" : "AddItemAsync called with userId=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}, productId=[A-Z0-9]+, quantity=\\d+", "mask_with": "M-AddItemAsync"},
{"regex_pattern" : "severity:debug,message:serving product page", "mask_with": "M-DEBUG:ProductPage"},
{"regex_pattern" : "severity:warning,message:failed to retrieve ads", "mask_with": "M-WARN:Failed2RetrieveAds"},
{"regex_pattern" : "severity:debug,message:setting currency", "mask_with": "M-DEBUG:SettingCurrency"},
{"regex_pattern" : "severity:debug,message:request started", "mask_with": "M-DEBUG:RequestStarted"},
{"regex_pattern" : "severity:debug,message:request complete", "mask_with": "M-DEBUG:RequestComplete"},
{"regex_pattern" : "severity:info,message:home", "mask_with": "M-INFO:Home"},
{"regex_pattern" : "level:debug,message:sent request with \\d+ transaction[s]?, \\d+ span[s]?, \\d+ error[s]?, \\d+ metricset[s]?", "mask_with": "M-LEVEL:SentRequests"},
{"regex_pattern" : "severity:info,message:\\[GetQuote\\] completed request", "mask_with": "M-INFO:GetQuoteCompletedRequest"},
{"regex_pattern" : "severity:info,message:\\[GetQuote\\] received request", "mask_with": "M-INFO:GetQuoteReceivedRequest"},
{"regex_pattern" : "severity:INFO,message:\\[Recv ListRecommendations\\] product_ids=\\[(?:'[A-Z0-9]{10}'[,]?\\s?)+\\]", "mask_with": "M-INFO:RecvListRecommendations"},
{"regex_pattern" : "severity:info,message:\\[ShipOrder\\] received request", "mask_with": "M-INFO:ShipOrderReceivedRequest"},
{"regex_pattern" : "severity:info,message:\\[ShipOrder\\] completed request", "mask_with": "M-INFO:ShipOrderCompletedRequest"},
{"regex_pattern" : "severity:debug,message:ENV_PLATFORM is: local", "mask_with": "M-DEBUG:ENV_PLATFORM is local"},
{"regex_pattern" : "severity:info,message:payment went through \\(transaction_id: [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\\)", "mask_with": "M-INFO:PaymentWentThrough"},
{"regex_pattern" : "severity:debug,message:ENV_PLATFORM is: local", "mask_with": "M-DEBUG:ENV_PLATFORM is local"},
{"regex_pattern" : "severity:info,message:order confirmation email sent to \"someone@example\\.com\"", "mask_with": "M-INFO:EmailSent"},
{"regex_pattern" : "severity:debug,message:view user cart", "mask_with": "M-DEBUG:view user cart"},
{"regex_pattern" : "severity:info,message:\\[PlaceOrder\\] user_id=\"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\" user_currency=\"[A-Z]{3}\"", "mask_with": "M-INFO:PlaceOrder"},
{"regex_pattern" : "Failed to export traces, error code: StatusCode.UNIMPLEMENTED", "mask_with": "M-Failed to export traces"},
{"regex_pattern" : "severity:INFO,message:A request to send order confirmation email to someone@example\\.com has been received.", "mask_with": "M-INFO:EmailReceived"},
{"regex_pattern": "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}\\.\\d{6}Z\\s+info\\s+xdsproxy\\s+connected to upstream XDS server: istiod\\.istio-system\\.svc:\\d+", "mask_with": "M-xdsproxy"},
{"regex_pattern": "E\\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{9} +\\d{2,} hpack_parser\\.cc:1218\\]\\s+Error parsing metadata: error=invalid value key=content-type value=text/plain; charset=utf-8", "mask_with": "M-Error parsing metadata"},
{"regex_pattern": "severity:debug,message:adding to cart", "mask_with": "M-DEBUG:Adding2Cart"},
{"regex_pattern": "severity:info,message:order placed", "mask_with": "M-INFO:OrderPlaces"},
{"regex_pattern": "severity:debug,message:placing order", "mask_with": "M-INFO:PlacingOrder"},
{"regex_pattern": "level:debug,message:gathering metrics", "mask_with": "M-DEBUG:GatheringMetrics"},
{"regex_pattern": "panic\\(\\{0x[0-9a-f]{6}, 0x[0-9a-f]{6}\\}\\)", "mask_with": "M-panic"},
{"regex_pattern": "goroutine [0-9a-f]{6} [running]:", "mask_with": "M-goroutine"},
;{"regex_pattern": "info: Elastic\\.Apm\\[0\\]", "mask_with": "M-Elastic"},
{"regex_pattern": "Transient error StatusCode\\.UNAVAILABLE encountered while exporting traces, retrying in 1s\\.", "mask_with": "M-Transient error"},
{"regex_pattern": "/usr/local/go/src/net/http/server.go:[0-9a-f]+ \\+0x[0-9a-f]+", "mask_with": "M-/usr/server"},
{"regex_pattern": "/usr/local/go/src/runtime/panic.go:[0-9a-f]+ \\+0x[0-9a-f]+", "mask_with": "M-/usr/panic"},
{"regex_pattern": "EmptyCartAsync called with userId=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", "mask_with": "M-EmptyCartAsync"},
{"regex_pattern": "0x[0-9a-f]+", "mask_with": "M-HEX"},
{"regex_pattern": "[0-9]{4}/[0-9]{2}/[0-9]{2}", "mask_with": "M-DATE"}
]
mask_prefix = <:
mask_suffix = :>
[DRAIN]
sim_th = 0.4
depth = 4
max_children = 10
max_clusters = 200
extra_delimiters = ["_"]
[PROFILING]
enabled = True
report_sec = 30