实现的功能:
从hive表中读取订单表,计算每个用户在30天内下单的金额,然后存到HBase
package pro3
import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName}
import org.apache.hadoop.hbase.client.{HBaseAdmin, Put}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
import org.apache.spark.sql.{DataFrame, SparkSession}
import scala.util.Random
/*
将数据写入到hbase中
*/
object monthAmount {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local")
val sc = new SparkContext(sparkConf)
val spark2= SparkSession.builder()
.appName("test1")
.master("local[*]")
// .config("spark.sql.warehouse.dir","hdfs://mini1:9000/spark-warehouse")
.config("spark.sql.warehouse.dir","hdfs://mini1:9000/user/hive/warehouse")
.enableHiveSupport()
.getOrCreate()
spark2.sql("use pro3_dw")
val df1: DataFrame = spark2.sql("" +
" select userid,sum(order_amount) from dw_order2 where ctime>date_sub('2019-05-28',30) group by userid limit 4" )
val rdd1 = df1.rdd
val rdd2 = rdd1.map(x => {
val uid: String = x.getString(0)
val amount: Any = x.get(1)
val put = new Put(Bytes.toBytes(uid))
put.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("tag"), Bytes.toBytes("B220U015_001"))
put.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("tagweight"), Bytes.toBytes(amount.toString))
// list.append(put)
(new ImmutableBytesWritable, put)
})
val conf = HBaseConfiguration.create()
conf.set("hbase.zookeeper.quorum","mini1:2181,mini2:2181,mini3:2181");
conf.set("fs:defaultFS","hdfs://mini1:9000");
val tableName = "userPhoto"
val admin = new HBaseAdmin(conf)
if(!admin.tableExists(TableName.valueOf(tableName))){
println("不存在")
val desc = new HTableDescriptor(TableName.valueOf(tableName))
val hcd = new HColumnDescriptor("cf1")
desc.addFamily(hcd)
admin.createTable(desc)
}
//初始化jobconf,TableOutputFormat必须是org.apache.hadoop.hbase.mapred包下的!
val jobConf = new JobConf(conf)
jobConf.setOutputFormat(classOf[TableOutputFormat])
jobConf.set(TableOutputFormat.OUTPUT_TABLE, tableName)
rdd2.saveAsHadoopDataset(jobConf)
sc.stop()
}
}