HBase 下载地址 https://mirrors.bfsu.edu.cn/apache/hbase/2.3.3/
在linux系统安装java,并配置对应环境变量
例如
export JAVA_HOME=/usr/local/jdk1.8.0_144
export PATH=.:$JAVA_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
解压文件
修改conf 下面的 hbase-env.sh
取消对下面语句的注释
export HBASE_MANAGES_ZK=true
修改hbase-site.xml
内容如下:
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<configuration>
<!--
The following properties are set for running HBase as a single process on a
developer workstation. With this configuration, HBase is running in
"stand-alone" mode and without a distributed file system. In this mode, and
without further configuration, HBase and ZooKeeper data are stored on the
local filesystem, in a path under the value configured for `hbase.tmp.dir`.
This value is overridden from its default value of `/tmp` because many
systems clean `/tmp` on a regular basis. Instead, it points to a path within
this HBase installation directory.
Running against the `LocalFileSystem`, as opposed to a distributed
filesystem, runs the risk of data integrity issues and data loss. Normally
HBase will refuse to run in such an environment. Setting
`hbase.unsafe.stream.capability.enforce` to `false` overrides this behavior,
permitting operation. This configuration is for the developer workstation
only and __should not be used in production!__
See also https://hbase.apache.org/book.html#standalone_dist
-->
<property>
<name>hbase.cluster.distributed</name>
<value>false</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>./tmp</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>file:///home/se7en/hbase/data</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
</configuration>
其它参数可以根据具体需要配置
使用bin目录下面的 start-hbase.sh启动hbase
jps测试是否安装成功
hbase shell 进入hbase管理命令行,进行相应的操作
Java 操作代码
主要类
public List<Put> getHbaseData(String cf) { List<Put> puts = new ArrayList<Put>(); for (int i = 0; i < 10000; i++) { Put put = new Put(Bytes.toBytes("kkkk" + i)); // for (int j = 0; j < 100; i++) { put.addColumn(Bytes.toBytes(cf), Bytes.toBytes("ttt"), Bytes.toBytes("ttt")); // } puts.add(put); } return puts; } public static void main(String[] args) { // List<PointInfo> points = ExcelUtils.getExcelPoint(); Application app = new Application(); // String qq = "269466275"; // String c = content.replace("qqCodeTmp", qq); // String data = SoapUtils.getValue(wsdl, c); // System.out.println(data); String tableName = ConfigPropsManager.getProperty(Constants.HBASE_TABLE_NAME); String cf = ConfigPropsManager.getProperty(Constants.HBASE_TABLE_CLOUMN_FAMILY); int batchSize = ConfigPropsManager.getInteger(Constants.HBASE_PUT_BATCH_SIZE); List<Put> puts = app.getHbaseData(cf); HBaseUtils.put(puts, tableName, batchSize); }
public static void put(List<Put> puts, String tableName, int batchSize) {
final byte[] tableNameBytes = Bytes.toBytes(tableName);
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "hbase");
config.set("hbase.zookeeper.property.clientPort", "2181");
// config.set("hbase.rpc.timeout", "60");
// config.set("hbase.client.operation.timeout", "60");
// config.set("hbase.client.scanner.timeout.period", "60");
Connection conn = null;
try {
Long start = System.currentTimeMillis();
conn = ConnectionFactory.createConnection(config);
Long end = System.currentTimeMillis();
System.out.println("connect to hbase time(ms):" + (end - start));
Table hbTableName = conn.getTable(TableName.valueOf(tableNameBytes));
// hbTableName.setOperationTimeout(30);
// hbTableName.setRpcTimeout(30);
Long start1 = System.currentTimeMillis();
List<Put> putList = new ArrayList<Put>();
System.out.println(new Date() + ": total puts count:" + puts.size());
int count = 0;
while (puts.size() > 0) {
Put put = puts.get(0);
putList.add(put);
count++;
if (count > 0 && count % batchSize == 0) {
hbTableName.put(putList);
System.out.println(new Date() + ": total puts count:" + putList.size());
putList.clear();
count = 0;
}
puts.remove(put);
}
// 发送到Hbase
if (putList.size() > 0) {
hbTableName.put(putList);
putList.clear();
}
Long end1 = System.currentTimeMillis();
System.out.println("put to hbase time(ms):" + (end1 - start1));
} catch (IOException e) {
e.printStackTrace();
} finally {
if (conn != null) {
try {
conn.close();
} catch (IOException e) {
}
}
}
}
一定要注意上段代码标红的位置,值hbase一定要在hosts文件中加入对应关系,如果对应的地址有多个一定要注意其先后顺序