sparksql--hbase

 val conf = new SparkConf()
  conf.setMaster("local")
  conf.setAppName("patition")
  val sc = new SparkContext(conf)
  //hbase
  val hbaseConf = HBaseConfiguration.create()
  //设置zooKeeper集群地址,也可以通过将hbase-site.xml导入classpath,但是建议在程序里这样设置 
  hbaseConf.set("zookeeper.znode.parent", "/hbase-cluster");
  hbaseConf.set("hbase.zookeeper.quorum","node0,node1,node2")
  //设置zookeeper连接端口,默认2181
  hbaseConf.set("hbase.zookeeper.property.clientPort", "2181")

  val tablename = "candle:test"
  //初始化jobconf,TableOutputFormat必须是org.apache.hadoop.hbase.mapred包下的!  
  val jobConf = new JobConf(hbaseConf)
  jobConf.setOutputFormat(classOf[TableOutputFormat])
  jobConf.set(TableOutputFormat.OUTPUT_TABLE,tablename)

  val sourceRDD = sc.parallelize(List(
    "site1, user1, 2016-11-20 02:18:33",
    "site1, user2, 2016-11-20 02:18:33",
    "site1, user3, 2016-11-20 02:18:33",
    "site1, user2, 2016-11-20 02:18:33",
    "site1, user3, 2016-11-20 02:18:33",
    "site1, user1, 2016-11-20 02:18:33"
  ))


  //输出结果
  //((2016-11-20,02,site1), (6,3))


  sourceRDD
    .map(line => {
      val time = line.split(",")(2).trim
      val data = time.split(" ")(0)
      val hour = time.split(" ")(1).split(":")(0)
      val site = line.split(",")(0).trim
      val user = line.split(",")(1).trim
      (data+","+hour+","+site, user)
    })
    .groupByKey()
    //(2016-11-20,20,site1 ,[user1,.....] )
    .map(seq => {
    val pv = seq._2.toBuffer.size
    val uv = seq._2.toBuffer.distinct.size
    //(seq._1, (pv, uv))
    //rowkey
    val put = new Put(Bytes.toBytes(seq._1))
    //put.add(Bytes.toBytes("cf"),Bytes.toBytes("name"),Bytes.toBytes(arr(1)))
    put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("pv"), Bytes.toBytes(pv.toInt))
    put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("uv"), Bytes.toBytes(uv.toInt))
    (new ImmutableBytesWritable, put)
  })

    .saveAsHadoopDataset(jobConf)

}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值