HBaseJavaAPI
地址
HBase官方文档中文版
IDEA中开始写
把member表备份到member_bak中
package com.kgc.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @Date: 2020/8/24 9:48
* @Description: 使用mapreduce完成HBase表数据的拷贝
* 把member表备份到member_bak中
* <p>
* create 'member','info','address'
* put 'member','tom','info:age','18'
* put 'member','tom','info:address','BeiJing'
* scan 'member'
* ROW COLUMN+CELL
* tom column=info:address, timestamp=1598234615578, value=BeiJing
* tom column=info:age, timestamp=1598234602279, value=18
* 1 row(s) in 0.0210 seconds
* put 'member','jason','info:age','25'
* <p>
* 下面建个空的 把上表中的年龄放到这个表里
* create 'member_bak','info','address'
*使用命令打包
*E:\大数据学习PPT\项目练习\itrip-redis这块练习\demo3\test-hdfs>mvn package -DskipTests
*
* $HADOOP_CLASSPATH=`${hbase_home}/bin/hbase classpath`
* export $HADOOP_CLASSPATH=`${hbase_home}/bin/hbase classpath`
* 上面两个都不对 下面这个终于对了
* export HADOOP_CLASSPATH=`${hbase_home}/bin/hbase classpath`
* com.kgc.hbase.HBaseCopyApp01
* hadoop jar ~/app/tmp/libs/test-hdfs-1.0-SNAPSHOT.jar com.kgc.hbase.HBaseCopyApp01 member member_bak
*我们member_bak里本没有东西 我们复制
* hbase(main):001:0> scan'member_bak'
* ROW COLUMN+CELL
* jason column=info:age, timestamp=1598255370591, value=25
* tom column=info:age, timestamp=1598255346426, value=18
* 2 row(s) in 0.3440 seconds
*/
public class HBaseCopyApp01 {
public static class MyMapper extends TableMapper<Text, Put> {
//rowkey:字符串类型
Text mapOutPutKey = new Text();
//alt+Ins 找方法中的map
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//ImmutableBytesWritable:rowkey的数据类型
//key就是hbase中的rowkey
//hbase数据构成:rowkey-列族(列键:列修饰符)-值
//设置输出的rowkey
mapOutPutKey.set(Bytes.toString(key.get()));
//创建Put,添加rowkey
Put p = new Put(key.get());
//只备份cf:info中的age
//遍历result:ResultScanner
for (Cell cell : value.listCells()) {
//判断列族是否为info
if ("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))) {
//判断列修饰符是否为age
if ("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
p.add(cell);
}
}
}
context.write(mapOutPutKey, p);
}
}
public static class MyTableReducer extends TableReducer<Text, Put, ImmutableBytesWritable> {
@Override
protected void reduce(Text key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
for (Put put : values) {
context.write(null, put);
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration config = HBaseConfiguration.create();
Job job = new Job(config, "HBaseCopyApp01");
job.setJarByClass(HBaseCopyApp01.class); // class that contains mapper and reducer
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
// set other scan attrs
TableMapReduceUtil.initTableMapperJob(
args[0], // input table
scan, // Scan instance to control CF and attribute selection
MyMapper.class, // mapper class
Text.class, // mapper output key
Put.class, // mapper output value
job);
TableMapReduceUtil.initTableReducerJob(
args[1], // output table
MyTableReducer.class, // reducer class
job);
job.setNumReduceTasks(1); // at least one, adjust as required
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
}
}
}
虚拟机中的操作
上面是代码实现从一个表复制想要内容到另外一个表
因此需要两个表 一个放数据 一个空表接收复制来的数据
create 'member','info','address'
put 'member','tom','info:age','18'
put 'member','tom','info:address','BeiJing'
scan 'member'
put 'member','jason','info:age','25'
create 'member_bak','info','address'
代码写完打成jar包(采取命令打包:mvn package -DskipTests)
把到好的包上传到:/home/hadoop/app/tmp/libs
常见错误参考
# 报下面错误需要执行的命令 (hbase_home是你配置的环境变量 里面用的大(小)写这里就大(小)写)
#导出这个全局变量
export HADOOP_CLASSPATH=`${hbase_home}/bin/hbase classpath`
hadoop jar ~/app/tmp/libs/test-hdfs-1.0-SNAPSHOT.jar com.kgc.hbase.HBaseCopyApp01 member member_bak
HBase导入数据
package com.kgc.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import java.io.IOException;
/**
* @Author:
* @Date: 2020/8/24 15:29
* @Description:
*/
public class HDFSToHBaseApp {
public static class MyMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
//rowkey
ImmutableBytesWritable rowKey = new ImmutableBytesWritable();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//按逗号分割数据
String[] splits = value.toString().split(",");
//设置rowkey,取值我分割后的第一段内容
Put put = new Put(Bytes.toBytes(splits[0]));
/*
* zhangsan,30,company1
lisi,40,company2
wangwu,50,company3
*/
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"),Bytes.toBytes(splits[1]));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("company"),Bytes.toBytes(splits[2]));
//
rowKey.set(Bytes.toBytes(splits[0]));
context.write(rowKey,put);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration config = HBaseConfiguration.create();
Job job = new Job(config,"HDFSToHBaseApp");
job.setJarByClass(HBaseCopyApp01.class); // class that contains mapper and reducer
//map设置
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
FileInputFormat.addInputPath(job,new Path(args[0]));
TableMapReduceUtil.initTableReducerJob(
args[1], // output table
null, // reducer class
job);
job.setNumReduceTasks(1); // at least one, adjust as required
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
}
}
}
然后打包 继续弄到libs下 删除原来的
mvn package -DskipTests
hadoop jar ~/app/tmp/libs/test-hdfs-1.0-SNAPSHOT.jar com.kgc.hbase.HDFSToHBaseApp /data/employee.txt employee