1、将HBASE中的数据使用MR写入HDFS
hadoop jar /ajar/Hbase2Hdfs.jar hbase_mr.Hbase2Hdfs /hbase2hdfs
package hbase_mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class Hbase2Hdfs {
static class MyMapper extends TableMapper<Text, NullWritable>{
private Text k=new Text();
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
CellScanner cellScanner = value.cellScanner();
StringBuffer sb=new StringBuffer();
while (cellScanner.advance()){
Cell current = cellScanner.current();
sb.append(new String(CellUtil.cloneQualifier(current),"utf-8"));
sb.append(":");
sb.append(new String(CellUtil.cloneValue(current),"utf-8"));
sb.append("\t");
}
k.set(sb.toString());
context.write(k,NullWritable.get());
}
}
private static Scan getScan(){
Scan sc=new Scan();
return sc;
}
public static void main(String[] args) {
Configuration conf =new Configuration();
conf.set("hbase.zookeeper.quorum","mini1:2181,mini2:2181,mini3:2181");
conf.set("fs:defaultFS","hdfs://mini1:9000");
try {
Job job=Job.getInstance(conf,"hbaseTohdfs");
job.setJarByClass(Hbase2Hdfs.class);
TableMapReduceUtil.initTableMapperJob(
"ns1:t_userinfo1",
getScan(),
MyMapper.class,
Text.class,
NullWritable.class,
job
);
FileSystem fs=FileSystem.get(conf);
Path out=new Path(args[0]);
if(fs.exists(out)){
fs.delete(out);
}
FileOutputFormat.setOutputPath(job,out);
boolean b= job.waitForCompletion(true);
System.exit(b?0:1);
} catch (Exception e) {
e.printStackTrace();
}
}
}
2、将HDFS中的数据使用MR写入HBASE:
hadoop jar /ajar/Hdfs2Hbase.jar hbase_mr.Hdfs2Hbase /zgm/agecount.txt
package hbase_mr;
import HbaseApi.HbaseUtilTool;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.io.IOException;
public class Hdfs2Hbase {
static class MyMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
Text k=new Text();
IntWritable v=new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] kv = value.toString().split("\t")[0].split(":");
k.set(kv[1]);
context.write(k,v);
}
}
static class MyReducer extends TableReducer <Text,IntWritable, ImmutableBytesWritable>{
int count=0;
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
for(IntWritable l:values){
count+=l.get();
}
Put p=new Put(Bytes.toBytes(key.toString()));
p.addColumn(Bytes.toBytes("f1"),Bytes.toBytes("age"),Bytes.toBytes(key.toString()));
p.addColumn(Bytes.toBytes("f1"),Bytes.toBytes("count"),Bytes.toBytes(count));
context.write(new ImmutableBytesWritable(Bytes.toBytes(key.toString())),p);
}
}
public static void main(String[] args) {
Configuration conf=new Configuration();
//连接hbase的参数
conf.set("hbase.zookeeper.quorum","mini1:2181,mini2:2181,mini3:2181");
//连接HDFS
conf.set("fs.defaultFS","hdfs://mini1:9000");
try {
Job job=Job.getInstance(conf,"hdfs2hbase_agecount");
job.setJarByClass(Hdfs2Hbase.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
createTable("ns1:ageCount");
TableMapReduceUtil.initTableReducerJob("ns1:ageCount",
MyReducer.class,job);
//设置输入路径
FileInputFormat.setInputPaths(job,new Path(args[0]));
boolean b= job.waitForCompletion(true);
System.exit(b?0:1);
} catch (Exception e) {
e.printStackTrace();
}
}
private static void createTable(String s){
Admin ad= HbaseUtilTool.getAdmin();
TableName tableName=TableName.valueOf(s);
try {
if(ad.tableExists(tableName)){
return;
}
HTableDescriptor ht=new HTableDescriptor(tableName);
HColumnDescriptor hc=new HColumnDescriptor(Bytes.toBytes("f1"));
hc.setTimeToLive(24*60*60);
hc.setBloomFilterType(BloomType.ROW);
hc.setVersions(1,5);
ht.addFamily(hc);
ad.createTable(ht);
} catch (Exception e) {
e.printStackTrace();
}
}
}
pom文件中要加入:
<dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-common</artifactId> <version>1.2.1</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-common --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>1.2.1</version> </dependency>
写好的MR程序要打包到集群,之后:
将写好的程序打包上传到linux上执行报错
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/hbase/client/Scan at hbase_mr.Hbase2Hdfs.getScan(Hbase2Hdfs.java:96) at hbase_mr.Hbase2Hdfs.main(Hbase2Hdfs.java:75) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.RunJar.run(RunJar.java:221) at org.apache.hadoop.util.RunJar.main(RunJar.java:136) Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.client.Scan at java.net.URLClassLoader.findClass(URLClassLoader.java:381) at java.lang.ClassLoader.loadClass(ClassLoader.java:424) at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
原因:hadoop使用了hbase的包,但是hadoop中没有
解决: 1、打包时将所有依赖包都打入jar包内 2、export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/apps/hbase-1.2.1/lib/* 存在弊端,只对当前session有效,如果hbase的相关操作在map方法获取reduce方法中执行,这种错误依然出现 3、vi hadoop-env.sh (推荐的使用方法) 加入 export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/apps/hbase-1.2.1/lib/* 同步到所有节点。重启集群 4、copy hbase的依赖包/apps/hbase-1.2.1/lib/* 到hadoop的classpath目录下,但是这种容易造成依赖包冲突