HdfsToHBase

package com.hdfs.hbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

public class TestHdfsToHBase {
	static class RatingToHBaseMapper extends Mapper<LongWritable, Text, Text, Text> {
		StringBuffer buffer = new StringBuffer();
		Text mk = new Text();
		Text mv = new Text();
		
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			String[] splites = value.toString().split("::");
			
			//电影ID作为key
			this.mk.set(splites[1]);
			
			this.buffer.setLength(0);
			this.buffer.append(splites[0]).append(",");
			this.buffer.append(splites[2]).append(",").append(splites[3]);
			
			this.mv.set(this.buffer.toString());
			context.write(this.mk, this.mv);
		}
	}
	
	static class RatingToHBaseReducer extends TableReducer<Text, Text, Text> {
		@Override
		protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
			String[] splites = null;
			
			for(Text value : values) {
				splites = value.toString().split(",");
				
				//电影ID和影评放到一个列簇
				Put put = new Put(key.toString().getBytes());
				//put.addColumn("movie".getBytes(), "movieid".getBytes(), key.toString().getBytes());
				put.addColumn("movie".getBytes(), "rating".getBytes(), splites[1].getBytes());
				
				//用户ID和影评内容放在一个列簇
				put.addColumn("other".getBytes(), "userid".getBytes(), key.toString().getBytes());
				put.addColumn("other".getBytes(), "rating_content".getBytes(), splites[1].getBytes());
				
				context.write(key, put);
			}
		}
	}
	
	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		System.setProperty("HADOOP_USER_NAME", "hadoop");
		
		Configuration conf = new Configuration();
		//conf.set("fs.defaultFS", "hdfs://hdp2018/");
		conf.set("hbase.zookeeper.quorum", "hdp101:2181,hdp102:2181,hdp103:2181");
		
		Job job = Job.getInstance(conf);
		
		job.setJarByClass(TestHdfsToHBase.class);
		job.setMapperClass(RatingToHBaseMapper.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		final String tableName = "ratings:tb_ratings";
		
		Connection conn = ConnectionFactory.createConnection(conf);
		Admin admin = conn.getAdmin();
		
		if(!admin.tableExists(TableName.valueOf(tableName.getBytes()))) {
			HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName.getBytes()));
			HColumnDescriptor hcd = new HColumnDescriptor("movie");
			htd.addFamily(hcd);
			
			hcd = new HColumnDescriptor("other");
			htd.addFamily(hcd);
			
			admin.createTable(htd);
		}
		
		//TableMapReduceUtil.initTableReducerJob(tableName, RatingToHBaseReducer.class, job, null, null, null, null, false);
		TableMapReduceUtil.initTableReducerJob(tableName, RatingToHBaseReducer.class, job);
		FileInputFormat.addInputPath(job, new Path("hdfs://hdp2018/datas/ratings"));
		
		job.waitForCompletion(true);
	}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值