package cn.itcast_01_hbase
import java.util.ArrayList
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.Cell
import org.apache.hadoop.hbase.CellUtil
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.HColumnDescriptor
import org.apache.hadoop.hbase.HTableDescriptor
import org.apache.hadoop.hbase.MasterNotRunningException
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.ZooKeeperConnectionException
import org.apache.hadoop.hbase.client.Connection
import org.apache.hadoop.hbase.client.ConnectionFactory
import org.apache.hadoop.hbase.client.Delete
import org.apache.hadoop.hbase.client.Get
import org.apache.hadoop.hbase.client.HBaseAdmin
import org.apache.hadoop.hbase.client.HConnection
import org.apache.hadoop.hbase.client.HConnectionManager
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.client.ResultScanner
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.client.Table
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter
import org.apache.hadoop.hbase.filter.CompareFilter
import org.apache.hadoop.hbase.filter.FilterList
import org.apache.hadoop.hbase.filter.FilterList.Operator
import org.apache.hadoop.hbase.filter.RegexStringComparator
import org.apache.hadoop.hbase.filter.RowFilter
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
import org.apache.hadoop.hbase.util.Bytes
import org.junit.After
import org.junit.Before
import org.junit.Test
public class HbaseTest {
static Configuration config = null
private Connection connection = null
private Table table = null
@Before
public void init() throws Exception {
config = HBaseConfiguration.create()
config.set("hbase.zookeeper.quorum", "master,work1,work2")
config.set("hbase.zookeeper.property.clientPort", "2181")
connection = ConnectionFactory.createConnection(config)
table = connection.getTable(TableName.valueOf("user"))
}
@Test
public void createTable() throws Exception {
// 创建表管理类
HBaseAdmin admin = new HBaseAdmin(config)
// 创建表描述类
TableName tableName = TableName.valueOf("test3")
HTableDescriptor desc = new HTableDescriptor(tableName)
// 创建列族的描述类
HColumnDescriptor family = new HColumnDescriptor("info")
// 将列族添加到表中
desc.addFamily(family)
HColumnDescriptor family2 = new HColumnDescriptor("info2")
// 将列族添加到表中
desc.addFamily(family2)
// 创建表
admin.createTable(desc)
}
@Test
@SuppressWarnings("deprecation")
public void deleteTable() throws MasterNotRunningException,
ZooKeeperConnectionException, Exception {
HBaseAdmin admin = new HBaseAdmin(config)
admin.disableTable("test3")
admin.deleteTable("test3")
admin.close()
}
@SuppressWarnings({ "deprecation", "resource" })
@Test
public void insertData() throws Exception {
table.setAutoFlushTo(false)
table.setWriteBufferSize(534534534)
ArrayList<Put> arrayList = new ArrayList<Put>()
for (int i = 21
Put put = new Put(Bytes.toBytes("1234"+i))
put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes("wangwu"+i))
put.add(Bytes.toBytes("info"), Bytes.toBytes("password"), Bytes.toBytes(1234+i))
arrayList.add(put)
}
//插入数据
table.put(arrayList)
//提交
table.flushCommits()
}
@Test
public void uodateData() throws Exception {
Put put = new Put(Bytes.toBytes("1234"))
put.add(Bytes.toBytes("info"), Bytes.toBytes("namessss"), Bytes.toBytes("lisi1234"))
put.add(Bytes.toBytes("info"), Bytes.toBytes("password"), Bytes.toBytes(1234))
//插入数据
table.put(put)
//提交
table.flushCommits()
}
@Test
public void deleteDate() throws Exception {
Delete delete = new Delete(Bytes.toBytes("1234"))
table.delete(delete)
table.flushCommits()
}
@Test
public void queryData() throws Exception {
Get get = new Get(Bytes.toBytes("1234"))
Result result = table.get(get)
System.out.println(Bytes.toInt(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("password"))))
System.out.println(Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("namessss"))))
System.out.println(Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("sex"))))
}
@Test
public void scanData() throws Exception {
Scan scan = new Scan()
//scan.addFamily(Bytes.toBytes("info"))
//scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("password"))
scan.setStartRow(Bytes.toBytes("wangsf_0"))
scan.setStopRow(Bytes.toBytes("wangwu"))
ResultScanner scanner = table.getScanner(scan)
for (Result result : scanner) {
System.out.println(Bytes.toInt(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("password"))))
System.out.println(Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name"))))
//System.out.println(Bytes.toInt(result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("password"))))
//System.out.println(Bytes.toString(result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("name"))))
}
}
@Test
public void scanDataByFilter1() throws Exception {
// 创建全表扫描的scan
Scan scan = new Scan()
//过滤器:列值过滤器
SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("info"),
Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL,
Bytes.toBytes("zhangsan2"))
// 设置过滤器
scan.setFilter(filter)
// 打印结果集
ResultScanner scanner = table.getScanner(scan)
for (Result result : scanner) {
System.out.println(Bytes.toInt(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("password"))))
System.out.println(Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name"))))
//System.out.println(Bytes.toInt(result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("password"))))
//System.out.println(Bytes.toString(result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("name"))))
}
}
@Test
public void scanDataByFilter2() throws Exception {
// 创建全表扫描的scan
Scan scan = new Scan()
//匹配rowkey以wangsenfeng开头的
RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator("^12341"))
// 设置过滤器
scan.setFilter(filter)
// 打印结果集
ResultScanner scanner = table.getScanner(scan)
for (Result result : scanner) {
System.out.println(Bytes.toInt(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("password"))))
System.out.println(Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name"))))
//System.out.println(Bytes.toInt(result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("password"))))
//System.out.println(Bytes.toString(result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("name"))))
}
}
@Test
public void scanDataByFilter3() throws Exception {
// 创建全表扫描的scan
Scan scan = new Scan()
//匹配rowkey以wangsenfeng开头的
ColumnPrefixFilter filter = new ColumnPrefixFilter(Bytes.toBytes("na"))
// 设置过滤器
scan.setFilter(filter)
// 打印结果集
ResultScanner scanner = table.getScanner(scan)
for (Result result : scanner) {
System.out.println("rowkey:" + Bytes.toString(result.getRow()))
System.out.println("info:name:"
+ Bytes.toString(result.getValue(Bytes.toBytes("info"),
Bytes.toBytes("name"))))
// 判断取出来的值是否为空
if (result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")) != null) {
System.out.println("info:age:"
+ Bytes.toInt(result.getValue(Bytes.toBytes("info"),
Bytes.toBytes("age"))))
}
// 判断取出来的值是否为空
if (result.getValue(Bytes.toBytes("info"), Bytes.toBytes("sex")) != null) {
System.out.println("infi:sex:"
+ Bytes.toInt(result.getValue(Bytes.toBytes("info"),
Bytes.toBytes("sex"))))
}
// 判断取出来的值是否为空
if (result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("name")) != null) {
System.out
.println("info2:name:"
+ Bytes.toString(result.getValue(
Bytes.toBytes("info2"),
Bytes.toBytes("name"))))
}
// 判断取出来的值是否为空
if (result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("age")) != null) {
System.out.println("info2:age:"
+ Bytes.toInt(result.getValue(Bytes.toBytes("info2"),
Bytes.toBytes("age"))))
}
// 判断取出来的值是否为空
if (result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("sex")) != null) {
System.out.println("info2:sex:"
+ Bytes.toInt(result.getValue(Bytes.toBytes("info2"),
Bytes.toBytes("sex"))))
}
}
}
@Test
public void scanDataByFilter4() throws Exception {
// 创建全表扫描的scan
Scan scan = new Scan()
//过滤器集合:MUST_PASS_ALL(and),MUST_PASS_ONE(or)
FilterList filterList = new FilterList(Operator.MUST_PASS_ONE)
//匹配rowkey以wangsenfeng开头的
RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator("^wangsenfeng"))
//匹配name的值等于wangsenfeng
SingleColumnValueFilter filter2 = new SingleColumnValueFilter(Bytes.toBytes("info"),
Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL,
Bytes.toBytes("zhangsan"))
filterList.addFilter(filter)
filterList.addFilter(filter2)
// 设置过滤器
scan.setFilter(filterList)
// 打印结果集
ResultScanner scanner = table.getScanner(scan)
for (Result result : scanner) {
System.out.println("rowkey:" + Bytes.toString(result.getRow()))
System.out.println("info:name:"
+ Bytes.toString(result.getValue(Bytes.toBytes("info"),
Bytes.toBytes("name"))))
// 判断取出来的值是否为空
if (result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")) != null) {
System.out.println("info:age:"
+ Bytes.toInt(result.getValue(Bytes.toBytes("info"),
Bytes.toBytes("age"))))
}
// 判断取出来的值是否为空
if (result.getValue(Bytes.toBytes("info"), Bytes.toBytes("sex")) != null) {
System.out.println("infi:sex:"
+ Bytes.toInt(result.getValue(Bytes.toBytes("info"),
Bytes.toBytes("sex"))))
}
// 判断取出来的值是否为空
if (result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("name")) != null) {
System.out
.println("info2:name:"
+ Bytes.toString(result.getValue(
Bytes.toBytes("info2"),
Bytes.toBytes("name"))))
}
// 判断取出来的值是否为空
if (result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("age")) != null) {
System.out.println("info2:age:"
+ Bytes.toInt(result.getValue(Bytes.toBytes("info2"),
Bytes.toBytes("age"))))
}
// 判断取出来的值是否为空
if (result.getValue(Bytes.toBytes("info2"), Bytes.toBytes("sex")) != null) {
System.out.println("info2:sex:"
+ Bytes.toInt(result.getValue(Bytes.toBytes("info2"),
Bytes.toBytes("sex"))))
}
}
}
@After
public void close() throws Exception {
table.close()
connection.close()
}
}
package cn.itcast_01_hbase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
/**
* mapreduce操作hbase
* @author wilson
*
*/
public class HBaseMr {
/**
* 创建hbase配置
*/
static Configuration config = null;
static {
config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "slave1,slave2,slave3");
config.set("hbase.zookeeper.property.clientPort", "2181");
}
/**
* 表信息
*/
public static final String tableName = "word";
public static final String colf = "content";
public static final String col = "info";
public static final String tableName2 = "stat";
/**
* 初始化表结构,及其数据
*/
public static void initTB() {
HTable table=null;
HBaseAdmin admin=null;
try {
admin = new HBaseAdmin(config);
if (admin.tableExists(tableName)||admin.tableExists(tableName2)) {
System.out.println("table is already exists!");
admin.disableTable(tableName);
admin.deleteTable(tableName);
admin.disableTable(tableName2);
admin.deleteTable(tableName2);
}
HTableDescriptor desc = new HTableDescriptor(tableName);
HColumnDescriptor family = new HColumnDescriptor(colf);
desc.addFamily(family);
admin.createTable(desc);
HTableDescriptor desc2 = new HTableDescriptor(tableName2);
HColumnDescriptor family2 = new HColumnDescriptor(colf);
desc2.addFamily(family2);
admin.createTable(desc2);
table = new HTable(config,tableName);
table.setAutoFlush(false);
table.setWriteBufferSize(500);
List<Put> lp = new ArrayList<Put>();
Put p1 = new Put(Bytes.toBytes("1"));
p1.add(colf.getBytes(), col.getBytes(), ("The Apache Hadoop software library is a framework").getBytes());
lp.add(p1);
Put p2 = new Put(Bytes.toBytes("2"));p2.add(colf.getBytes(),col.getBytes(),("The common utilities that support the other Hadoop modules").getBytes());
lp.add(p2);
Put p3 = new Put(Bytes.toBytes("3"));
p3.add(colf.getBytes(), col.getBytes(),("Hadoop by reading the documentation").getBytes());
lp.add(p3);
Put p4 = new Put(Bytes.toBytes("4"));
p4.add(colf.getBytes(), col.getBytes(),("Hadoop from the release page").getBytes());
lp.add(p4);
Put p5 = new Put(Bytes.toBytes("5"));
p5.add(colf.getBytes(), col.getBytes(),("Hadoop on the mailing list").getBytes());
lp.add(p5);
table.put(lp);
table.flushCommits();
lp.clear();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if(table!=null){
table.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* MyMapper 继承 TableMapper
* TableMapper<Text,IntWritable>
* Text:输出的key类型,
* IntWritable:输出的value类型
*/
public static class MyMapper extends TableMapper<Text, IntWritable> {
private static IntWritable one = new IntWritable(1);
private static Text word = new Text();
@Override
protected void map(ImmutableBytesWritable key, Result value,
Context context) throws IOException, InterruptedException {
String words = Bytes.toString(value.getValue(Bytes.toBytes(colf), Bytes.toBytes(col)));
String itr[] = words.toString().split(" ");
for (int i = 0; i < itr.length; i++) {
word.set(itr[i]);
context.write(word, one);
}
}
}
/**
* MyReducer 继承 TableReducer
* TableReducer<Text,IntWritable>
* Text:输入的key类型,
* IntWritable:输入的value类型,
* ImmutableBytesWritable:输出类型,表示rowkey的类型
*/
public static class MyReducer extends
TableReducer<Text, IntWritable, ImmutableBytesWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
Put put = new Put(Bytes.toBytes(key.toString()));
put.add(Bytes.toBytes(colf), Bytes.toBytes(col),Bytes.toBytes(String.valueOf(sum)));
context.write(new ImmutableBytesWritable(Bytes.toBytes(key.toString())),put);
}
}
public static void main(String[] args) throws IOException,
ClassNotFoundException, InterruptedException {
config.set("df.default.name", "hdfs://master:9000/");
config.set("hadoop.job.ugi", "hadoop,hadoop");
config.set("mapred.job.tracker", "master:9001");
initTB();
Job job = new Job(config, "HBaseMr");
job.setJarByClass(HBaseMr.class);
Scan scan = new Scan();
scan.addColumn(Bytes.toBytes(colf), Bytes.toBytes(col));
TableMapReduceUtil.initTableMapperJob(tableName, scan, MyMapper.class,Text.class, IntWritable.class, job);
TableMapReduceUtil.initTableReducerJob(tableName2, MyReducer.class, job);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}