需要的包:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import java.util.HashMap;
import java.util.Map;
功能代码:
public class SaveIntoHBase {
//声明其无需序列化,否则程序会报Task序列化相关错误
transient static Configuration conf;
public static void main(String[] args){
//各项配置初始化
SparkConf sparkConf = new SparkConf().setAppName("SparkHBase").setMaster("local[*]");
sparkConf.set("spark.sql.shuffle.partitions", "2");
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
SQLContext sqlContext = new SQLContext(jsc);
String tableName = "ysyy_org_clean";
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","master,slave1,slave2,slave3,slave4,slave5,slave6");
conf.set("hbase.zookeeper.property.clientPort", "2181");
conf.set(TableOutputFormat.OUTPUT_TABLE,tableName);
//加载mysql表->df
Mapoptions = new HashMap<>();
options.put("url", "jdbc:mysql://192.168.1.20:3306/test?useUnicode=true&characterEncoding=utf8&TreatTinyAsBoolean=false&tinyInt1isBit=false");
options.put("driver", "com.mysql.jdbc.Driver");
options.put("user", "*****");
options.put("password", "*****");
options.put("dbtable", tableName);
Datasetdf_org = sqlContext.read().format("jdbc").options(options).load();
df_org.createOrReplaceTempView("ysyy_org");
Datasetdf_order = sqlContext.sql("select org_id,org_name,org_pid,zone_id,area_id from ysyy_org");
//建HBase表
HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
HColumnDescriptor addFamily = new HColumnDescriptor("org".getBytes());
desc.addFamily(addFamily);
try {
HBaseAdmin admin = new HBaseAdmin(conf);
if (admin.tableExists(tableName)) {
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
admin.createTable(desc);
Job job = new Job(jsc.hadoopConfiguration());
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Result.class);
JavaRDDrowJavaRDD = df_order.toJavaRDD();
rowJavaRDD.foreach(new VoidFunction() {
private static final long serialVersionUID = 1L;
@Override
public void call(Row row) throws Exception{
Put put = new Put(Bytes.toBytes(row.get(0)+""));
put.addColumn("org".getBytes(), "org_name".getBytes(), Bytes.toBytes(row.get(1)+""));
put.addColumn("org".getBytes(), "org_pid".getBytes(), Bytes.toBytes(row.get(2)+""));
put.addColumn("org".getBytes(), "zone_id".getBytes(), Bytes.toBytes(row.get(3)+""));
put.addColumn("org".getBytes(), "area_id".getBytes(), Bytes.toBytes(row.get(4)+""));
HTable table = new HTable(conf, tableName.getBytes());
table.put(put);
}
});
} catch (Exception e) {
e.printStackTrace();
}
}
}
结果:
可能有些步骤处理有些不妥,欢迎各路大佬指正