第一步
依赖
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.4</version>
</dependency>
drver编写
package com.baizhi.demo04
import com.baizhi.HBaseSink
import org.apache.hadoop.hbase.client.{ConnectionFactory, Put}
import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants, TableName}
import org.apache.spark.{SparkConf, SparkContext}
object TestForeach04{
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf()
.setMaster("local[6]")
.setAppName("wordcount")
val sc=new SparkContext(sparkConf)
sc.textFile("hdfs://CentOS:9000/demo/words/",3)
.flatMap(_.split("\\s+"))
.map((_,1))
.reduceByKey(_+_)
.sortBy(_._2,false,3)
.foreachPartition(vs =>{
val hb=HBaseSink
hb.saveToHbase(vs)
})
sc.stop();
}
}
package com.baizhi
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.{HBaseConfiguration, HConstants, TableName}
import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Put}
object HBaseSink {
val conf = HBaseConfiguration.create
conf.set(HConstants.ZOOKEEPER_QUORUM, "CentOS")
val conn = ConnectionFactory.createConnection(conf)
Runtime.getRuntime.addShutdownHook(new Thread(){
override def run(): Unit = {
conn.close()
}
})
def saveToHbase(iterable:Iterator[(String,Int)]):Unit={
val table = conn.getTable(TableName.valueOf("baizhi:t_user"))
iterable.foreach(v=>{
val put = new Put("wordcount".getBytes)
put.addColumn("cf1".getBytes, v._1.getBytes(), (v._2 + "").getBytes)
table.put(put)
})
table.close()
}
}