日志文件,格式如下: # Http-Method IP/domain timesGET www.baidu.com 1234GET www.qq.com 123GET 127.0.0.1 123 GET 10.123.76.3 12 GET 10.123.76.4 343 统计对所有IP的GET请求数, 比如上述的例子已经返回478
import java.io._ import scala.io._ def parseLog(is: InputStream): Iterator[String] = { Source.fromInputStream(is).getLines() } def countGET(iter: Iterator[String]): Int ={ iter.filter{ line => val l = line.trim; l.length > 0 && !l.startsWith("#") }.filter{ line => val strs = line.trim.split("\\s+"); // println(strs.toList+" "+strs.length); strs(0).equalsIgnoreCase("GET") && strs(1).matches("""\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}"""); }.map{ line => line.trim.split("\\s+")(2); }.foldLeft(0){ (x, y) => x.toInt + y.toInt } } println(countGET(parseLog(new FileInputStream(args(0)))));原文:http://outofmemory.cn/code-snippet/37055/parse-yiduan-rizhi