spark保存数据到hbase_Spark读取Hbase的数据

val conf = HBaseConfiguration.create()

conf.addResource(new Path("/opt/cloudera/parcels/CDH-5.4.4-1.cdh5.4.4.p0.4/lib/hbase/conf/hbase-site.xml"))

conf.addResource(new Path("/opt/cloudera/parcels/CDH-5.4.4-1.cdh5.4.4.p0.4/lib/hadoop/etc/hadoop/core-site.xml"))

conf.set(TableInputFormat.INPUT_TABLE, "FLOW")

//添加过滤条件,年龄大于 18 岁

//val scan = new Scan()

//conf.set(TableInputFormat.SCAN, convertScanToString(scan))

/*

scan.setFilter(new SingleColumnValueFilter("basic".getBytes, "age".getBytes,

CompareOp.GREATER_OR_EQUAL, Bytes.toBytes(18)))

*/

val usersRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],

classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],

classOf[org.apache.hadoop.hbase.client.Result])

val data1 = usersRDD.count()

val sf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSSSS")

println("data length:" + data1)

var map = HashMap[String, HashMap[String, collection.mutable.ArrayBuffer[Double]]]()

usersRDD.collect().map {

case (_, result) =>

val key = Bytes.toInt(result.getRow)

println("Key:" + key)

val ip = Bytes.toString(result.getValue("F".getBytes, "SADDR".getBytes))

val port = Bytes.toString(result.getValue("F".getBytes, "SPORT".getBytes))

val startTimeLong = Bytes.toString(result.getValue("F".getBytes, "STIME".getBytes))

val endTimeLong = Bytes.toString(result.getValue("F".getBytes, "LTIME".getBytes))

val protocol = Bytes.toString(result.getValue("F".getBytes, "PROTO".getBytes))

val sumTime = Bytes.toString(result.getValue("F".getBytes, "DUR".getBytes))

val sum = Bytes.toString(result.getValue("F".getBytes, "DBYTES".getBytes)).toDouble

println("ip:" + ip + ",port:" + port + ",startTime:" + startTimeLong + ",endTime:" + endTimeLong + ",protocol:" + protocol + ",sum:" + sum)

//ip+port+udp,14:02 14:07 List

//ip+port+tcp,15:02 15:07 List

val startTimeDate = sf.parse(startTimeLong)

val endTimeLongDate = sf.parse(endTimeLong)

val startHours = startTimeDate.getHours

val startMinutes = startTimeDate.getMinutes

val endHours = endTimeLongDate.getHours

val endMinutes = endTimeLongDate.getMinutes

val key1 = ip + "_" + port + "_" + protocol

println("key1:" + key1)

val key2 = startHours + ":" + startMinutes + "_" + endHours + ":" + endMinutes

println("key2:" + key2)

val tmpMap = map.get(key1)

if (!tmpMap.isEmpty) {

println("--------------------map is not null:" + tmpMap.size + "--------------------")

val sumArray = tmpMap.get.get(key2)

if (!sumArray.isEmpty) {

sumArray.get += sum

}

} else {

println("--------------------map is null--------------------")

//如果当前Key不存在的话,是一个全新的Ip

val sumArray = collection.mutable.ArrayBuffer[Double]()

sumArray += sum

val secondMap = HashMap[String, collection.mutable.ArrayBuffer[Double]]()

secondMap += (key2 -> sumArray)

map += (key1 -> secondMap)

}

map

println("map size-----------------:" + map.size)

}

println("map size:" + map.size)

map.map(e => {

println("--------------------Statistics start --------------------")

val resultKey1 = e._1

val resultVal1 = e._2

println("resultKey1:" + resultKey1)

resultVal1.foreach(f => {

val resultKey2 = f._1

val resultVal2 = f._2

println("resultKey2:" + resultKey2)

println("-----------------resultVal2:" + resultVal2.length)

resultVal2.map(f=>{

println("------------------------f:"+f)

})

val dataArray = resultVal2.map(f => Vectors.dense(f))

val summary: MultivariateStatisticalSummary = Statistics.colStats(sc.parallelize(dataArray))

//

println("--------------------mean:" + summary.mean + " --------------------")

println("--------------------variance:" + summary.variance + " --------------------")

println("--------------------mean apply 0:" + summary.mean.toArray.apply(0) + " --------------------")

println("--------------------variance apply 0:" + summary.variance.apply(0) + " --------------------")

val upbase = summary.mean.toArray.apply(0) + 1.960 * Math.sqrt(summary.variance.apply(0))

val downbase = summary.mean.toArray.apply(0) - 1.960 * Math.sqrt(summary.variance.apply(0))

println("------------------- " + upbase + " ---------- " + downbase)

val df = new DecimalFormat(".##")

val upbaseString = df.format(upbase)

val downbaseString = df.format(downbase)

//resultMap.put(key, value)

val result3 = HashMap[Double, Double]()

//result3 +=(upbase -> downbase)

println("ip port:" + resultKey1 + ",time:" + resultKey2 + ",upbase:" + upbase + ",downbase:" + downbase)

})

})

println("--------------------baseLine end --------------------")

sc.stop()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值