HdfsToHBase

最新推荐文章于 2022-07-29 16:54:50 发布

YZY_001

最新推荐文章于 2022-07-29 16:54:50 发布

阅读量254

点赞数

分类专栏： Hbase

本文链接：https://blog.csdn.net/YZY_001/article/details/85224568

版权

Hbase 专栏收录该内容

7 篇文章 0 订阅

订阅专栏

package com.hdfs.hbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class TestHdfsToHBase {
	static class RatingToHBaseMapper extends Mapper<LongWritable, Text, Text, Text> {
		StringBuffer buffer = new StringBuffer();
		Text mk = new Text();
		Text mv = new Text();
		
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			String[] splites = value.toString().split("::");
			
			//电影ID作为key
			this.mk.set(splites[1]);
			
			this.buffer.setLength(0);
			this.buffer.append(splites[0]).append(",");
			this.buffer.append(splites[2]).append(",").append(splites[3]);
			
			this.mv.set(this.buffer.toString());
			context.write(this.mk, this.mv);
		}
	}
	
	static class RatingToHBaseReducer extends TableReducer<Text, Text, Text> {
		@Override
		protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
			String[] splites = null;
			
			for(Text value : values) {
				splites = value.toString().split(",");
				
				//电影ID和影评放到一个列簇
				Put put = new Put(key.toString().getBytes());
				//put.addColumn("movie".getBytes(), "movieid".getBytes(), key.toString().getBytes());
				put.addColumn("movie".getBytes(), "rating".getBytes(), splites[1].getBytes());
				
				//用户ID和影评内容放在一个列簇
				put.addColumn("other".getBytes(), "userid".getBytes(), key.toString().getBytes());
				put.addColumn("other".getBytes(), "rating_content".getBytes(), splites[1].getBytes());
				
				context.write(key, put);
			}
		}
	}
	
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		
		Configuration conf = new Configuration();
		//conf.set("fs.defaultFS", "hdfs://hdp2018/");
		conf.set("hbase.zookeeper.quorum", "hdp101:2181,hdp102:2181,hdp103:2181");
		
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(TestHdfsToHBase.class);
		job.setMapperClass(RatingToHBaseMapper.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		final String tableName = "ratings:tb_ratings";
		
		Connection conn = ConnectionFactory.createConnection(conf);
		Admin admin = conn.getAdmin();
		
		if(!admin.tableExists(TableName.valueOf(tableName.getBytes()))) {
			HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName.getBytes()));
			HColumnDescriptor hcd = new HColumnDescriptor("movie");
			htd.addFamily(hcd);
			
			hcd = new HColumnDescriptor("other");
			htd.addFamily(hcd);
			
			admin.createTable(htd);
		}
		
		//TableMapReduceUtil.initTableReducerJob(tableName, RatingToHBaseReducer.class, job, null, null, null, null, false);
		TableMapReduceUtil.initTableReducerJob(tableName, RatingToHBaseReducer.class, job);
		FileInputFormat.addInputPath(job, new Path("hdfs://hdp2018/datas/ratings"));
		
		job.waitForCompletion(true);
	}
}

YZY_001

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
HdfsToHBase

package com.hdfs.hbase;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HColumnDescriptor;import org.apache...
复制链接

扫一扫