//输入数据的样例类
case class Stu(id:Int,name:String,course:String,score:Int)
object HBseSinkTestStudent {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val ds: DataStream[String] = env.readTextFile("input/stu.txt")
val dataStr: DataStream[Stu] = ds.map(x => {
val dar: Array[String] = x.split(" ")
Stu(dar(0).toInt, dar(1), dar(2),dar(3).trim.toInt)
})
dataStr.addSink(new MyHBaseSink())
//执行
env.execute()
}
}
class MyHBaseSink() extends RichSinkFunction[Stu]{
//创建链接
var conn:Connection = _
//创建BufferedMutator
//实现批量异步操作
var mutator:BufferedMutator = null
//初始化
override def open(parameters: Configuration): Unit = {
//创建配置文件
val config: conf.Configuration = HBaseConfiguration.create()
//设置zookeeper主机名
config.set(HConstants.ZOOKEEPER_QUORUM,“spark1,spark2,spark3”)
//设置端口号
config.set(HConstants.ZOOKEEPER_CLIENT_PORT,“2181”)
//传入配置
conn = ConnectionFactory.createConnection(config)
val tname: TableName = TableName.valueOf("stu")
val params = new BufferedMutatorParams(tname)
//一批写入需要用,放入1兆的数据
// params.writeBufferSize(1024*1024)
mutator = conn.getBufferedMutator(params)
}
//数据处理
override def invoke(value: Stu, context: SinkFunction.Context[_]): Unit = {
val family = “info”
val put = new Put(Bytes.toBytes(value.id.toString))
put.addColumn(Bytes.toBytes(family),Bytes.toBytes(“name”),Bytes.toBytes(value.name))
put.addColumn(Bytes.toBytes(family),Bytes.toBytes(“course”),Bytes.toBytes(value.course))
put.addColumn(Bytes.toBytes(family),Bytes.toBytes(“score”),Bytes.toBytes(value.score))
mutator.mutate(put)
}
override def close(): Unit = {
mutator.close()
conn.close()
}
}