HBase 结合 MapReduce(HDFSToHBase)

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_41851454/article/details/79781099

题目要求:

把HDFS上的一个student.txt文件,传输到HBase上。

源文件:

95002,刘晨,女,19,IS
95017,王风娟,女,18,IS
95018,王一,女,19,IS
95013,冯伟,男,21,CS
95014,王小丽,女,19,CS
95019,邢小丽,女,19,IS
95020,赵钱,男,21,IS
95003,王敏,女,22,MA
95004,张立,男,19,IS
95012,孙花,女,20,CS
95010,孔小涛,男,19,CS
95005,刘刚,男,18,MA
95006,孙庆,男,23,CS
95007,易思玲,女,19,MA
95008,李娜,女,18,CS
95021,周二,男,17,MA
95022,郑明,男,20,MA
95001,李勇,男,20,CS
95011,包小柏,男,18,MA
95009,梦圆圆,女,18,MA
95015,王君,男,18,MA

主程序代码:

package mrhbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * 从HDFS上传输到HBase
 * @author potter
 *
 */
/**
 * 需求:读取HDFS上的数据。插入到HBase库中
 * 
 * hbase.zookeeper.quorum == potter2:2181
 */
public class MRHBase2 extends Configured implements Tool{

	private static final String connect_key = "hbase.zookeeper.quorum";
	private static final String connect_value = "potter2:2181,potter3:2181";
	
	
	public static void main(String[] args) throws Exception {
		int run = ToolRunner.run(new MRHBase2(), args);
		System.exit(run);
		
		
	}

	@Override
	public int run(String[] args) throws Exception {
		Configuration config = HBaseConfiguration.create();
		config.set(connect_key, connect_value);
		config.set("fs.defaultFS", "hdfs://myha01/");
		config.addResource("config/core-site.xml");
		config.addResource("config/hdfs-site.xml");
		
		System.setProperty("HADOOP_USER_NAME", "potter");
		Job job = Job.getInstance(config, "MRHBase2");
		job.setJarByClass(MRHBase2.class);
		job.setMapperClass(MRHBase2Mapper.class);
		
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(NullWritable.class);
		
		//设置数据读取组件
		job.setInputFormatClass(TextInputFormat.class);
		//设置数据的输出组件
		TableMapReduceUtil.initTableReducerJob("student", MRHBase2Reducer.class,  job, null, null,null,null,false);
		//设置读取HDFS上文件的路径
		FileInputFormat.addInputPath(job, new Path("/student/input/"));
		
		boolean isdone = job.waitForCompletion(true);
		
		
		return isdone ? 0 : 1;
	}
	
	public static class MRHBase2Mapper extends Mapper<LongWritable, Text, Text, NullWritable>{
		/**
		 * 每次读取一行数据
		 * 
		 * Put  : 构造一个put对象的时候,需要
		 * put 'stduent','95001','cf:name','liyong'
		 * 
		 * 
		 * name:huangbo
		 * age:18
		 * 
		 * name:xuzheng
		 * 
		 */
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, NullWritable>.Context context)
				throws IOException, InterruptedException {
			
			context.write(value, NullWritable.get());
		}
	}
	
	public static class MRHBase2Reducer extends TableReducer<Text, NullWritable, NullWritable>{
		/**
		 * key  ===  95011,包小柏,男,18,MA
		 * 
		 * 95001:  rowkey
		 * 包小柏 : name
		 * 18 : age
		 * 男  : sex
		 * MA : department
		 * 
		 * column family :  cf
		 */
		@Override
		protected void reduce(Text key, Iterable<NullWritable> values,Context context)
				throws IOException, InterruptedException {
			
			String[] split = key.toString().split(",");
			Put put = new Put(split[0].getBytes());
			put.addColumn("info".getBytes(), "name".getBytes(), split[1].getBytes());
			put.addColumn("info".getBytes(), "age".getBytes(), split[3].getBytes());
			put.addColumn("info".getBytes(), "sex".getBytes(), split[2].getBytes());
			put.addColumn("info".getBytes(), "department".getBytes(), split[4].getBytes());
			System.out.println(put);
			context.write(NullWritable.get(), put);
			
		}
	}
}

结果展示:

 95001                                     column=info:age, timestamp=1522577362980, value=20                                                                          
 95001                                     column=info:department, timestamp=1522577362980, value=CS                                                                   
 95001                                     column=info:name, timestamp=1522577362980, value=\xE6\x9D\x8E\xE5\x8B\x87                                                   
 95001                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7                                                                
 95002                                     column=info:age, timestamp=1522577362980, value=19                                                                          
 95002                                     column=info:department, timestamp=1522577362980, value=IS                                                                   
 95002                                     column=info:name, timestamp=1522577362980, value=\xE5\x88\x98\xE6\x99\xA8                                                   
 95002                                     column=info:sex, timestamp=1522577362980, value=\xE5\xA5\xB3                                                                
 95003                                     column=info:age, timestamp=1522577362980, value=22                                                                          
 95003                                     column=info:department, timestamp=1522577362980, value=MA                                                                   
 95003                                     column=info:name, timestamp=1522577362980, value=\xE7\x8E\x8B\xE6\x95\x8F                                                   
 95003                                     column=info:sex, timestamp=1522577362980, value=\xE5\xA5\xB3                                                                
 95004                                     column=info:age, timestamp=1522577362980, value=19                                                                          
 95004                                     column=info:department, timestamp=1522577362980, value=IS                                                                   
 95004                                     column=info:name, timestamp=1522577362980, value=\xE5\xBC\xA0\xE7\xAB\x8B                                                   
 95004                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7                                                                
 95005                                     column=info:age, timestamp=1522577362980, value=18                                                                          
 95005                                     column=info:department, timestamp=1522577362980, value=MA                                                                   
 95005                                     column=info:name, timestamp=1522577362980, value=\xE5\x88\x98\xE5\x88\x9A                                                   
 95005                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7                                                                
 95006                                     column=info:age, timestamp=1522577362980, value=23                                                                          
 95006                                     column=info:department, timestamp=1522577362980, value=CS                                                                   
 95006                                     column=info:name, timestamp=1522577362980, value=\xE5\xAD\x99\xE5\xBA\x86                                                   
 95006                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7                                                                
 95007                                     column=info:age, timestamp=1522577362980, value=19                                                                          
 95007                                     column=info:department, timestamp=1522577362980, value=MA                                                                   
 95007                                     column=info:name, timestamp=1522577362980, value=\xE6\x98\x93\xE6\x80\x9D\xE7\x8E\xB2                                       
 95007                                     column=info:sex, timestamp=1522577362980, value=\xE5\xA5\xB3                                                                
 95008                                     column=info:age, timestamp=1522577362980, value=18                                                                          
 95008                                     column=info:department, timestamp=1522577362980, value=CS                                                                   
 95008                                     column=info:name, timestamp=1522577362980, value=\xE6\x9D\x8E\xE5\xA8\x9C                                                   
 95008                                     column=info:sex, timestamp=1522577362980, value=\xE5\xA5\xB3                                                                
 95009                                     column=info:age, timestamp=1522577362980, value=18                                                                          
 95009                                     column=info:department, timestamp=1522577362980, value=MA                                                                   
 95009                                     column=info:name, timestamp=1522577362980, value=\xE6\xA2\xA6\xE5\x9C\x86\xE5\x9C\x86                                       
 95009                                     column=info:sex, timestamp=1522577362980, value=\xE5\xA5\xB3                                                                
 95010                                     column=info:age, timestamp=1522577362980, value=19                                                                          
 95010                                     column=info:department, timestamp=1522577362980, value=CS                                                                   
 95010                                     column=info:name, timestamp=1522577362980, value=\xE5\xAD\x94\xE5\xB0\x8F\xE6\xB6\x9B                                       
 95010                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7                                                                
 95011                                     column=info:age, timestamp=1522577362980, value=18                                                                          
 95011                                     column=info:department, timestamp=1522577362980, value=MA                                                                   
 95011                                     column=info:name, timestamp=1522577362980, value=\xE5\x8C\x85\xE5\xB0\x8F\xE6\x9F\x8F                                       
 95011                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7                                                                
 95012                                     column=info:age, timestamp=1522577362980, value=20                                                                          
 95012                                     column=info:department, timestamp=1522577362980, value=CS                                                                   
 95012                                     column=info:name, timestamp=1522577362980, value=\xE5\xAD\x99\xE8\x8A\xB1                                                   
 95012                                     column=info:sex, timestamp=1522577362980, value=\xE5\xA5\xB3                                                                
 95013                                     column=info:age, timestamp=1522577362980, value=21                                                                          
 95013                                     column=info:department, timestamp=1522577362980, value=CS                                                                   
 95013                                     column=info:name, timestamp=1522577362980, value=\xE5\x86\xAF\xE4\xBC\x9F                                                   
 95013                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7                                                                
 95014                                     column=info:age, timestamp=1522577362980, value=19                                                                          
 95014                                     column=info:department, timestamp=1522577362980, value=CS                                                                   
 95014                                     column=info:name, timestamp=1522577362980, value=\xE7\x8E\x8B\xE5\xB0\x8F\xE4\xB8\xBD                                       
 95014                                     column=info:sex, timestamp=1522577362980, value=\xE5\xA5\xB3                                                                
 95015                                     column=info:age, timestamp=1522577362980, value=18                                                                          
 95015                                     column=info:department, timestamp=1522577362980, value=MA                                                                   
 95015                                     column=info:name, timestamp=1522577362980, value=\xE7\x8E\x8B\xE5\x90\x9B                                                   
 95015                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7                                                                
 95017                                     column=info:age, timestamp=1522577362980, value=18                                                                          
 95017                                     column=info:department, timestamp=1522577362980, value=IS                                                                   
 95017                                     column=info:name, timestamp=1522577362980, value=\xE7\x8E\x8B\xE9\xA3\x8E\xE5\xA8\x9F                                       
 95017                                     column=info:sex, timestamp=1522577362980, value=\xE5\xA5\xB3                                                                
 95018                                     column=info:age, timestamp=1522577362980, value=19                                                                          
 95018                                     column=info:department, timestamp=1522577362980, value=IS                                                                   
 95018                                     column=info:name, timestamp=1522577362980, value=\xE7\x8E\x8B\xE4\xB8\x80                                                   
 95018                                     column=info:sex, timestamp=1522577362980, value=\xE5\xA5\xB3                                                                
 95019                                     column=info:age, timestamp=1522577362980, value=19                                                                          
 95019                                     column=info:department, timestamp=1522577362980, value=IS                                                                   
 95019                                     column=info:name, timestamp=1522577362980, value=\xE9\x82\xA2\xE5\xB0\x8F\xE4\xB8\xBD                                       
 95019                                     column=info:sex, timestamp=1522577362980, value=\xE5\xA5\xB3                                                                
 95020                                     column=info:age, timestamp=1522577362980, value=21                                                                          
 95020                                     column=info:department, timestamp=1522577362980, value=IS                                                                   
 95020                                     column=info:name, timestamp=1522577362980, value=\xE8\xB5\xB5\xE9\x92\xB1                                                   
 95020                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7                                                                
 95021                                     column=info:age, timestamp=1522577362980, value=17                                                                          
 95021                                     column=info:department, timestamp=1522577362980, value=MA                                                                   
 95021                                     column=info:name, timestamp=1522577362980, value=\xE5\x91\xA8\xE4\xBA\x8C                                                   
 95021                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7                                                                
 95022                                     column=info:age, timestamp=1522577362980, value=20                                                                          
 95022                                     column=info:department, timestamp=1522577362980, value=MA                                                                   
 95022                                     column=info:name, timestamp=1522577362980, value=\xE9\x83\x91\xE6\x98\x8E                                                   
 95022                                     column=info:sex, timestamp=1522577362980, value=\xE7\x94\xB7


完成!!


展开阅读全文

没有更多推荐了,返回首页