Hbase多种方式查询结果
采用不同的方式查询Hbase里的内容
命令方式
scan方式
scan 't1', {COLUMNS=>'cf',VERSIONS=>20,TIMESTAMP=>1566300936}
scan 't1' ,FILTER=>"ValueFilter(=,'binary:2014-01-30')" //过滤出等于此的值
get方式
get 't1', 'rowkey_name', {COLUMNS=>'cf',VERSIONS=>2147483647}
get 't1','rowkey_name',{COLUMN=>'cf',VERSIONS=>20,TIMERANGE=>[1566300936,1566300988],FORMATTER => 'toString'} //时间戳范围及输出类型
scan方式是遍历全表的,所以会相对get方式慢
调用API get方式
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
public class Query {
public static Configuration conf = null;
public static Connection connection = null;
public static Admin admin = null;
static Table table = null;
static {
conf = new Configuration();
try {
conf.addResource(new File("hbase-site.xml").toURI().toURL());
} catch (MalformedURLException e1) {
e1.printStackTrace();
}
try {
connection = ConnectionFactory.createConnection(conf);
admin = connection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void queryTable(List<String> rowkeyList) throws IOException {
String tableName = "table_name";
table = connection.getTable(TableName.valueOf(tableName));// 获取表
for (String rowkey : rowkeyList){
Get get = new Get(Bytes.toBytes(rowkey));
get.setMaxVersions(20);
Result result = table.get(get);
for (Cell kv : result.rawCells()) {
String[] s = new String[] {Bytes.toString(kv.getRow()),String.valueOf(kv.getTimestamp()),Bytes.toString(kv.getFamily()),Bytes.toString(kv.getQualifier()),Bytes.toString(kv.getValue())};
System.out.println(s);
}
}
}
public static void main(String[] args) throws IOException {
List<String> rowkeyList = new ArrayList<>();
rowkeyList.add("0000001");
rowkeyList.add("0000002");
rowkeyList.add("0000003");
queryTable(rowkeyList);
}
}
Batch 方式查询
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Row;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
public class BatchTest {
static List<String> uids;
public static Configuration conf = null;
public static Connection connection = null;
public static Admin admin = null;
static {
conf = new Configuration();
try {
conf.addResource(new File("hbase-site.xml").toURI().toURL());
} catch (MalformedURLException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
connection = ConnectionFactory.createConnection(conf);
admin = connection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
}
static Table table = null;
public static void queryBatchTable(List<String> rowkeyList) throws IOException {
String tableName = "table_name";
int num0=0;
int num1=0;
int num2=0;
long start = System.currentTimeMillis();
int len = rowkeyList.size();
//声明一个数组来保存所有的操作
List<Row> batch = new ArrayList<Row>();
table = connection.getTable(TableName.valueOf(tableName));// 获取表
for(String uid:rowkeyList) {
Get get = new Get(Bytes.toBytes(uid));
get.setMaxVersions(20);
batch.add(get);
}
Object[] results = new Object[batch.size()];
try {
table.batch(batch, results);
} catch (Exception e) {
System.err.println("Error: " + e);
}
for (int i = 0; i < results.length; i++) {
Result result = (Result) results[i];
for (Cell kv : result.rawCells()) {
String[] s = new String[] {Bytes.toString(kv.getRow()),String.valueOf(kv.getTimestamp()),Bytes.toString(kv.getFamily()),Bytes.toString(kv.getQualifier()),Bytes.toString(kv.getValue())};
System.out.println(s);
}
}
table.close();
connection.close();
}
public static void main(String[] args) throws IOException {
List<String> rowkeyList = new ArrayList<>();
rowkeyList.add("0000001");
rowkeyList.add("0000002");
rowkeyList.add("0000003");
queryBatchTable(rowkeyList);
}
}
这两种方式为单条查询和批量查询,总体上batch方式查询较快,查询100条记录时,我的数据差了3s。虽然这是batch方式肉眼可见的优势,但是当我查询1w条记录时,batch的程序在运行中把RegionServers搞死掉嘞,get方式效率虽低,但不会出现这种情况。还在解决batch所造成的问题。