一、Hbase结合mapreduce
为什么需要用 mapreduce 去访问 hbase 的数据?
——加快分析速度和扩展分析能力
Mapreduce 访问 hbase 数据作分析一定是在离线分析的场景下应用
1、HbaseToHDFS
从 hbase 中读取数据,分析之后然后写入 hdfs,代码实现:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
package
com.ghgj.hbase.hbase2hdfsmr;
import
java.io.IOException;
import
java.util.List;
import
org.apache.hadoop.conf.Configuration;
import
org.apache.hadoop.fs.FileSystem;
import
org.apache.hadoop.fs.Path;
import
org.apache.hadoop.hbase.Cell;
import
org.apache.hadoop.hbase.HBaseConfiguration;
import
org.apache.hadoop.hbase.client.Result;
import
org.apache.hadoop.hbase.client.Scan;
import
org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import
org.apache.hadoop.hbase.mapreduce.TableMapper;
import
org.apache.hadoop.hbase.util.Bytes;
import
org.apache.hadoop.io.NullWritable;
import
org.apache.hadoop.io.Text;
import
org.apache.hadoop.mapreduce.Job;
import
org.apache.hadoop.mapreduce.Mapper;
import
org.apache.hadoop.mapreduce.Reducer;
import
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
* 作用:从hbase中读取user_info这个表的数据,然后写出到hdfs
*/
public
class
HBaseToHDFSMR {
private
static
final
String ZK_CONNECT =
"hadoop03:2181,hadoop04:2181,hadoop05:2181"
;
public
static
void
main(String[] args)
throws
Exception {
Configuration conf = HBaseConfiguration.create();
conf.set(
"hbase.zookeeper.quorum"
, ZK_CONNECT);
System.setProperty(
"HADOOP_USER_NAME"
,
"hadoop"
);
// conf.set("fs.defaultFS", "hdfs://myha01/");
Job job = Job.getInstance(conf);
job.setJarByClass(HBaseToHDFSMR.
class
);
Scan scan =
new
Scan();
scan.addColumn(Bytes.toBytes(
"base_info"
), Bytes.toBytes(
"name"
));
/**
* TableMapReduceUtil:以util结尾:工具
* MapReduceFactory:以factory结尾,它是工厂类,最大作用就是管理对象的生成
*/
TableMapReduceUtil.initTableMapperJob(
"user_info"
, scan,
HBaseToHDFSMRMapper.
class
, Text.
class
, NullWritable.
class
, job);
job.setReducerClass(HBaseToHDFSMRReducer.
class
);
job.setOutputKeyClass(Text.
class
);
|