Scala处理文件

import java.io.File
import java.util.concurrent.ConcurrentHashMap

import com.alibaba.fastjson.JSONObject

import scala.collection.{JavaConversions, mutable}
import scala.io.Source
import scala.reflect.io.{Directory, Path}
import scala.util.matching.Regex



class KafkaAnalysis {
   val fileregex : Regex = ".*\\.log$".r
   val dir : String = "D:\\securedownload"
   val cmaps : ConcurrentHashMap[String, JSONObject] = new ConcurrentHashMap()

   def listFiles() : Iterator[File] = {
     val dirs = new File(dir)
     val dirsFilter = Directory(Path.jfile2path(dirs)).walkFilter(path => path.isFile && fileregex.findAllIn(path.name).nonEmpty)
     dirsFilter.map(_.jfile)
   }

   def compact() : Unit = {
     val files = listFiles()
     files.toList.par.foreach{ file =>
       fetchParams(Source.fromFile(file).getLines())
     }
   }

   def fetchParams(iter : Iterator[String]) : Unit = {
     val lineRegex = ".*Topic:\\s*(\\S+)\\s*Partition:\\s*(\\d).*Leader:\\s*(\\d+)\\s*.*".r
     val timeRegex = "\\s*(2017-\\d+-\\d+\\s\\d+:\\d+:\\d+)\\s+.*".r
     var time : Option[String] = None
     val maps = new mutable.HashMap[String, String]()
     iter.foreach{ line =>
       val lineMatch = lineRegex findFirstMatchIn line
       if(lineMatch.isDefined && lineMatch.get.groupCount == 3){
         val key = lineMatch.get.group(1) + "-" + lineMatch.get.group(2)
         val value = lineMatch.get.group(3)
         maps.put(key, value)
       }else if(lineMatch.isEmpty){
         val timeMatch = timeRegex findFirstMatchIn line
         if(timeMatch.isDefined && timeMatch.get.groupCount == 1) {
           time = Some(timeMatch.get.group(1))
         }
       }
     }

     if(time.isDefined) {
       maps.foreach { map =>
         val key = map._1
         if(cmaps.containsKey(key)) {
           val json = cmaps.get(key)
           json.put(time.get, map._2)
         }else{
           val json = new JSONObject()
           json.put(time.get, map._2)
             if(!cmaps.containsKey(key)){
               cmaps.put(key, json)
             }else {
               val json = cmaps.get(key)
               json.put(time.get, map._2)
             }
         }
       }
     }
   }
}

object KafkaAnalysis {
  def main(args : Array[String]) : Unit = {
    val leo = new KafkaAnalysis()
    leo.compact()
    JavaConversions.mapAsScalaMap(leo.cmaps).toList.sortWith(_._1 > _._1)foreach{ case (k, v) =>
      print(k + "    ")
      JavaConversions.asScalaIterator(v.values().iterator())
                    .map(param => (param, 1)).toList.groupBy(_._1)
                    .foreach{case (gk, gc) =>
                        print(gk + ":")
                        print(gc.size)
                        print("  ")
                    }
      println()
    }
  }
}

文件数据为:

2017-06-01 22:00:01 checkpoint the topics information
Topic:A PartitionCount:20 ReplicationFactor:3 Configs:retention.ms=604800000
Topic: A Partition: 0 Leader: 11250 Replicas: 11250,11251,11252 Isr: 11251,11250,11252
Topic: A Partition: 1 Leader: 11251 Replicas: 11251,11252,11254 Isr: 11251,11252,11254
Topic: A Partition: 2 Leader: 11252 Replicas: 11252,11254,11255 Isr: 11255,11252,11254
Topic: A Partition: 3 Leader: 11254 Replicas: 11254,11255,11256 Isr: 11255,11256,11254
Topic: A Partition: 4 Leader: 11255 Replicas: 11255,11256,11257 Isr: 11257,11255,11256
Topic: A Partition: 5 Leader: 11256 Replicas: 11256,11257,11258 Isr: 11256,11258,11257
Topic: A Partition: 6 Leader: 11257 Replicas: 11257,11258,11247 Isr: 11257,11258,11247
Topic: A Partition: 7 Leader: 11258 Replicas: 11258,11247,11249 Isr: 11258,11247,11249
Topic: A Partition: 8 Leader: 11247 Replicas: 11247,11249,11250 Isr: 11250,11247,11249
Topic: A Partition: 9 Leader: 11249 Replicas: 11249,11250,11251 Isr: 11251,11250,11249
Topic: A Partition: 10 Leader: 11250 Replicas: 11250,11252,11254 Isr: 11250,11252,11254
Topic: A Partition: 11 Leader: 11251 Replicas: 11251,11254,11255 Isr: 11251,11255,11254
Topic: A Partition: 12 Leader: 11252 Replicas: 11252,11255,11256 Isr: 11255,11256,11252
Topic: A Partition: 13 Leader: 11254 Replicas: 11254,11256,11257 Isr: 11257,11256,11254
Topic: A Partition: 14 Leader: 11255 Replicas: 11255,11257,11258 Isr: 11257,11255,11258
Topic: A Partition: 15 Leader: 11256 Replicas: 11256,11258,11247 Isr: 11256,11258,11247
Topic: A Partition: 16 Leader: 11257 Replicas: 11257,11247,11249 Isr: 11257,11247,11249
Topic: A Partition: 17 Leader: 11258 Replicas: 11258,11249,11250 Isr: 11250,11258,11249
Topic: A Partition: 18 Leader: 11247 Replicas: 11247,11250,11251 Isr: 11251,11250,11247
Topic: A Partition: 19 Leader: 11249 Replicas: 11249,11251,11252 Isr: 11251,11252,11249
Topic:B PartitionCount:20 ReplicationFactor:3 Configs:retention.ms=604800000
Topic: B Partition: 0 Leader: 11249 Replicas: 11249,11247,11250 Isr: 11250,11247,11249
Topic: B Partition: 1 Leader: 11250 Replicas: 11250,11249,11251 Isr: 11251,11250,11249
Topic: B Partition: 2 Leader: 11251 Replicas: 11251,11250,11252 Isr: 11251,11250,11252
Topic: B Partition: 3 Leader: 11252 Replicas: 11252,11251,11254 Isr: 11251,11252,11254
Topic: B Partition: 4 Leader: 11254 Replicas: 11254,11252,11255 Isr: 11255,11252,11254
Topic: B Partition: 5 Leader: 11255 Replicas: 11255,11254,11256 Isr: 11255,11256,11254
Topic: B Partition: 6 Leader: 11256 Replicas: 11256,11255,11257 Isr: 11255,11256,11257
Topic: B Partition: 7 Leader: 11257 Replicas: 11257,11256,11258 Isr: 11257,11256,11258
Topic: B Partition: 8 Leader: 11258 Replicas: 11258,11257,11247 Isr: 11257,11258,11247
Topic: B Partition: 9 Leader: 11247 Replicas: 11247,11258,11249 Isr: 11258,11247,11249
Topic: B Partition: 10 Leader: 11249 Replicas: 11249,11250,11251 Isr: 11251,11250,11249
Topic: B Partition: 11 Leader: 11250 Replicas: 11250,11251,11252 Isr: 11251,11250,11252
Topic: B Partition: 12 Leader: 11251 Replicas: 11251,11252,11254 Isr: 11251,11252,11254
Topic: B Partition: 13 Leader: 11252 Replicas: 11252,11254,11255 Isr: 11255,11252,11254
Topic: B Partition: 14 Leader: 11254 Replicas: 11254,11255,11256 Isr: 11255,11256,11254
Topic: B Partition: 15 Leader: 11255 Replicas: 11255,11256,11257 Isr: 11257,11255,11256
Topic: B Partition: 16 Leader: 11256 Replicas: 11256,11257,11258 Isr: 11256,11258,11257
Topic: B Partition: 17 Leader: 11257 Replicas: 11257,11258,11247 Isr: 11257,11258,11247
Topic: B Partition: 18 Leader: 11258 Replicas: 11258,11247,11249 Isr: 11258,11247,11249
Topic: B Partition: 19 Leader: 11247 Replicas: 11247,11249,11250 Isr: 11250,11247,11249

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值