package com.uplooking.bigdata.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
/**
* 将HBase上面的一张表t中的内容,导出到hdfs的一个目录下面
*
* hdfs中的文件内容格式:
* 1 zhangsan 13
* 2 lisi 14
* 要通过mr来进行操作
*
* 我们可以使用hbase自带的导出工具Export,进行导出,将特定表中的数据导出到hdfs上面,生成一个SequenceFile
* 格式:
* hbase org.apache.hadoop.hbase.mapreduce.Export t /output/hbase/h2hdfs
* 有到出,就有导入,参见:Import
* hbase org.apache.hadoop.hbase.mapreduce.Import <tableName> <inputpath>
*/
public class HBase2HDFSApp {
public static void main(String[] args) throws Exception {
if(args == null || args.length < 3) {
System.err.println("Parameter Errors ! Usage : <hbase_t_name outputpath>");
System.exit(-1);
}
String tblName = args[1];
Path outputPath = new Path(args[2]);
Configuration conf = new Configuration();
String jobName = HBase2HDFSApp.class.getSimpleName();
Job job = Job.getInstance(conf, jobName);
job.setJarByClass(HBase2HDFSApp.class);
//设置输入--->hbase中的一张表
/**
* Use this before submitting a TableMap job. It will appropriately set up
* the job.
* table The table name to read from.
* scan The scan instance with the columns, time range etc.
* mapper The mapper class to use.
* outputKeyClass The class of the output key.
* outputValueClass The class of the output value.
* job The current job to adjust. Make sure the passed job is
*/
System.out.println("tableName=====" + tblName );
System.out.println("outputPath=====" + outputPath.toString());
TableMapReduceUtil.initTableMapperJob(tblName,
new Scan(),
HBase2HDFSMapper.class,
Text.class,
NullWritable.class,
job);
//输出,就是mapper输出,;没有reducer
/* job.setMapperClass(HBase2HDFSMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);*/
outputPath.getFileSystem(conf).delete(outputPath, true);
FileOutputFormat.setOutputPath(job, outputPath);
job.setOutputFormatClass(TextOutputFormat.class);
job.setNumReduceTasks(0);//没有reduce
job.waitForCompletion(true);
}
}
public class HBase2HDFSMapper extends TableMapper<Text, NullWritable> {
@Override
protected void map(ImmutableBytesWritable key, Result rs, Context context)
throws IOException, InterruptedException {
byte[] nameBytes = rs.getValue("cf".getBytes(), "name".getBytes());
byte[] ageBytes = rs.getValue("cf".getBytes(), "age".getBytes());
byte[] keyBytes = key.get();
String rk = new String(keyBytes);
String name = new String(nameBytes);
String age = new String(ageBytes);
context.write(new Text(rk + "\t" + name + "\t" + age), NullWritable.get());
}
}
/*
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.client.Scan
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
* 因为在通过mr运行的过程中,mr的classpath中并没有关于hbase的相关的依赖jar包,所以找不到,
* 两种方式,
* 一、在打jar的同时,把hbase的相关依赖打进去
* 二、将hbase的依赖添加到hadoop的classpath里面
*
* */