一、目的
把hbase中某张表的数据导出到hdfs上一份。
实现方式这里介绍两种:一种是自己写mr程序来完成,一种是使用hbase提供的类来完成。
二、自定义mr程序将hbase数据导出到hdfs上
2.1首先看看hbase中t1表中的数据:
2.2mr的代码如下:
比较重要的语句是
job.setNumReduceTasks(0);//为什么要设置reduce的数量是0呢?读者可以自己考虑下
TableMapReduceUtil.initTableMapperJob(args[0], new Scan(),HBaseToHdfsMapper.class ,Text.class, Text.class, job);//这行语句指定了mr的输入是hbase的哪张表,scan可以对这个表进行filter操作。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
public
class
HBaseToHdfs {
public
static
void
main(String[] args)
throws
Exception {
Configuration conf = HBaseConfiguration.create();
conf .set("hbase.zookeeper.quorum", "192.168.1.101");
Job job = Job.getInstance(conf, HBaseToHdfs.
class
.getSimpleName());
job.setJarByClass(HBaseToHdfs.
class
);
job.setMapperClass(HBaseToHdfsMapper.
class
);
job.setMapOutputKeyClass(Text.
class
);
job.setMapOutputValueClass(Text.
class
);
job.setNumReduceTasks(
0
);
TableMapReduceUtil.initTableMapperJob(args[
0
],
new
Scan(),HBaseToHdfsMapper.
class
,Text.
class
, Text.
class
, job);
//TableMapReduceUtil.addDependencyJars(job);
job.setOutputFormatClass(TextOutputFormat.
class
);
FileOutputFormat.setOutputPath(job,
new
Path(args[
1
]));
job.waitForCompletion(
true
);
}
public
static
class
HBaseToHdfsMapper
extends
TableMapper<Text, Text> {
private
Text outKey =
new
Text();
private
Text outValue =
new
Text();
@Override
protected
void
map(ImmutableBytesWritable key, Result value, Context context)
throws
IOException, InterruptedException {
//key在这里就是hbase的rowkey
byte
[] name =
null
;
byte
[] age =
null
;
byte
[] gender =
null
;
byte
[] birthday =
null
;
try
{
name = value.getColumnLatestCell(
"f1"
.getBytes(),
"name"
.getBytes()).getValue();
}
catch
(Exception e) {}
try
{
age = value.getColumnLatestCell(
"f1"
.getBytes(),
"age"
.getBytes()).getValue();
}
catch
(Exception e) {}
try
{
gender = value.getColumnLatestCell(
"f1"
.getBytes(),
"gender"
.getBytes()).getValue();
}
catch
(Exception e) {}
try
{
birthday = value.getColumnLatestCell(
"f1"
.getBytes(),
"birthday"
.getBytes()).getValue();
}
catch
(Exception e) {}
outKey.set(key.get());
String temp = ((name==
null
|| name.length==
0
)?
"NULL"
:
new
String(name)) +
"\t"
+ ((age==
null
|| age.length==
0
)?
"NULL"
:
new
String(age)) +
"\t"
+ ((gender==
null
||gender.length==
0
)?
"NULL"
:
new
String(gender)) +
"\t"
+ ((birthday==
null
||birthday.length==
0
)?
"NULL"
:
new
String(birthday));
System.out.println(temp);
outValue.set(temp);
context.write(outKey, outValue);
}
}
}
|
2.3打包执行
hadoop jar hbaseToDfs.jar com.lanyun.hadoop2.HBaseToHdfs t1 /t1
2.4查看hdfs上的文件
(my_python_env)[root@hadoop26 ~]# hadoop fs -cat /t1/part* 1 zhangsan 10 male NULL 2 lisi NULL NULL NULL 3 wangwu NULL NULL NULL 4 zhaoliu NULL NULL 1993
至此,导出成功
三、使用hbase自带的工具进行导出
hbase自带的工具是:org.apache.hadoop.hbase.mapreduce.Export
3.1如何使用这个工具呢?查看帮助信息
(my_python_env)[root@hadoop26 ~]# hbase org.apache.hadoop.hbase.mapreduce.Export ERROR: Wrong number of arguments: 0 Usage: Export [-D <property=value>]* <tablename> <outputdir> [<versions> [<starttime> [<endtime>]] [^[regex pattern] or [Prefix] to filter]]
3.2使用工具来导出
hbase org.apache.hadoop.hbase.mapreduce.Export t1 /t2
至此已经完成导出。