package com.hdfs.hbase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class TestHdfsToHBase {
static class RatingToHBaseMapper extends Mapper<LongWritable, Text, Text, Text> {
StringBuffer buffer = new StringBuffer();
Text mk = new Text();
Text mv = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] splites = value.toString().split("::");
//电影ID作为key
this.mk.set(splites[1]);
this.buffer.setLength(0);
this.buffer.append(splites[0]).append(",");
this.buffer.append(splites[2]).append(",").append(splites[3]);
this.mv.set(this.buffer.toString());
context.write(this.mk, this.mv);
}
}
static class RatingToHBaseReducer extends TableReducer<Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
String[] splites = null;
for(Text value : values) {
splites = value.toString().split(",");
//电影ID和影评放到一个列簇
Put put = new Put(key.toString().getBytes());
//put.addColumn("movie".getBytes(), "movieid".getBytes(), key.toString().getBytes());
put.addColumn("movie".getBytes(), "rating".getBytes(), splites[1].getBytes());
//用户ID和影评内容放在一个列簇
put.addColumn("other".getBytes(), "userid".getBytes(), key.toString().getBytes());
put.addColumn("other".getBytes(), "rating_content".getBytes(), splites[1].getBytes());
context.write(key, put);
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
System.setProperty("HADOOP_USER_NAME", "hadoop");
Configuration conf = new Configuration();
//conf.set("fs.defaultFS", "hdfs://hdp2018/");
conf.set("hbase.zookeeper.quorum", "hdp101:2181,hdp102:2181,hdp103:2181");
Job job = Job.getInstance(conf);
job.setJarByClass(TestHdfsToHBase.class);
job.setMapperClass(RatingToHBaseMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
final String tableName = "ratings:tb_ratings";
Connection conn = ConnectionFactory.createConnection(conf);
Admin admin = conn.getAdmin();
if(!admin.tableExists(TableName.valueOf(tableName.getBytes()))) {
HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName.getBytes()));
HColumnDescriptor hcd = new HColumnDescriptor("movie");
htd.addFamily(hcd);
hcd = new HColumnDescriptor("other");
htd.addFamily(hcd);
admin.createTable(htd);
}
//TableMapReduceUtil.initTableReducerJob(tableName, RatingToHBaseReducer.class, job, null, null, null, null, false);
TableMapReduceUtil.initTableReducerJob(tableName, RatingToHBaseReducer.class, job);
FileInputFormat.addInputPath(job, new Path("hdfs://hdp2018/datas/ratings"));
job.waitForCompletion(true);
}
}
HdfsToHBase
最新推荐文章于 2022-07-29 16:54:50 发布