此项目由二部分组成
一 配置文件
1配置文件的位置
D:\d\笔记目录\jdbchbase\datasource.properties
2配置文件的内容
#java连接hbase配置
hbase.zookeeper.quorum=192.168.131.200
#java连接mysql配置
mysql.driver=com.mysql.jdbc.Driver
mysql.url=jdbc:mysql://192.168.131.200:3306/test
mysql.username=root
mysql.password=12345678
mysql.hbase.table=test:userinfo
#mysql表名
mysql.table=user_info
#mysql查where条件
mysql.where=user_id between 1 and 19
#mysql表导出路径
java.file.dir=C:/Users/Administrator/Desktop/mysqldata
#hbase和mysql中列簇和列的映射关系
mysql.hbase.fields.map=:rowKey:user_id,base:user_name,base:user_gender,base:user_pid,contact:user_phone,contact:user_province,contact:user_city,contact:user_district,contact:user_address,account:user_account,account:user_pass,account:user_balance
3配置文件的用途
1 有注释详情 里面放了 mysql的连接信息 以及hbase的输出库表路径
2 其中mysql.file.dir 后面的值写入的是:
在windows下的idea中运行就是 windows路径
在centos7 中以胖包的方式运行就是 centos7 路径
3 其中的mysql.hbase.fields.map后面的值写入的是:
mysql和hbase的表对应关系 以谁作为行键(唯一键rowKey) 那些列放入一个列簇(分类base)
4连入配置文件的方式
在此处写入配置文件的文件路径 上面写着主类路径
二 项目主体
1 项目框架
项目主体
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.kgc</groupId>
<artifactId>java2hbase</artifactId>
<version>1.0</version>
<name>java2hbase</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.47</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>2.3.5</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>3.1.3</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.3</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<main-class>cn.kgc.App</main-class>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
log4j.properties
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=log/hd.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
Config
package cn.kgc.base;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
public class Config {
protected static Properties pro;
protected static void close(AutoCloseable...closes){
for (AutoCloseable close : closes) {
if (null!=close) {
try {
close.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
public static void init(String configPath) {
pro = new Properties();
FileReader fr = null;
try {
pro.load(fr=new FileReader(configPath));
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
} finally {
close(fr);
}
}
public static void main(String[] args) {
init("C:\\Users\\lenovo\\Desktop\\user_config\\datasource.properties");
}
}
Factory 没有内容
JavaHbase
package cn.kgc.base;
import com.jcraft.jsch.IO;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
public class Java2HBase extends Config{
private static Configuration config = null;
static {
config = HBaseConfiguration.create();
Enumeration<?> e = pro.propertyNames();
while (e.hasMoreElements()) {
String key = e.nextElement().toString();
if(key.startsWith("hbase")){
config.set(key,pro.getProperty(key));
}
}
}
private static Connection con() throws IOException {
return ConnectionFactory.createConnection(config);
}
private static Admin admin(Connection con) throws IOException {
return con.getAdmin();
}
private static boolean nameSpaceExists(Connection con,String nameSpace){
Admin admin = null;
try {
admin = admin(con);
for (String ns : admin.listNamespaces()) {
if (nameSpace.equals(ns)) {
return true;
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
close(admin);
}
return false;
}
private static void createNameSpace(Connection con,String nameSpace){
Admin admin = null;
try {
admin = admin(con);
admin.createNamespace(NamespaceDescriptor
.create(nameSpace).build());
System.out.println("namespace [ "+nameSpace+" ] created in success");
} catch (IOException e) {
e.printStackTrace();
}finally {
close(admin);
}
}
private static void createTable(Connection con,String tableName,String columnFamily,String...columnFamilies){
Admin admin = null;
try {
admin = admin(con);
TableName tn = TableName.valueOf(tableName);
//根据表名创建 表描述构造器
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tn);
//创建列簇集合
List<ColumnFamilyDescriptor> list = new ArrayList<>();
if(null != columnFamily) {
list.add(ColumnFamilyDescriptorBuilder.of(columnFamily));
}
for (String family : columnFamilies) {
list.add(ColumnFamilyDescriptorBuilder.of(family));
}
//向表描述器中添加列簇
builder.setColumnFamilies(list);
admin.createTable(builder.build());
System.out.println("table [ "+tableName+" ] created in success");
} catch (IOException e) {
e.printStackTrace();
} finally {
close(admin);
}
}
private static boolean tableExists(Connection con,TableName tableName){
Admin admin = null;
try {
admin = admin(con);
return admin.tableExists(tableName);
} catch (IOException e) {
e.printStackTrace();
return false;
} finally {
close(admin);
}
}
/**
* 将file路径指向的文件数据映射至hbase
* 文件名即表名,为了防止命名冲突: tablename_timestamp
* 文件首行为表结构: :key,cf:col,...
* @param data
*/
public static void putBatch(File data,String regexSep){
Connection con = null;
BufferedMutator mutator = null;
BufferedReader br = null;
try{
//解析hbase表名
String tableName = pro.getProperty("mysql.hbase.table");
TableName tn = TableName.valueOf(tableName);
con = con();
//验证hbase表是否存在,不存在,则创建
String nameSpace = tableName.split(":")[0];
if(!nameSpaceExists(con,nameSpace)){
createNameSpace(con,nameSpace);
}
if(!tableExists(con,tn)){
String fields = pro.getProperty("mysql.hbase.fields.map");
Set<String> cfs = new HashSet<>();
for (String pair : fields.split(",")) {
if(pair.startsWith(":rowKey")){
continue;
}
cfs.add(pair.split(":")[0]);
}
String[] faimlies = new String[cfs.size()];
faimlies = cfs.toArray(faimlies);
createTable(con,tableName,null,faimlies);
}
//通过文件首行解析hbase表结构
br = new BufferedReader(new FileReader(data));
String line = null;
if(null == (line=br.readLine())){
throw new IOException("file [ "+ data.getPath() + " ] empty error");
}
String[] ps = line.split(regexSep);
//创建批量插入异常侦听
DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
BufferedMutator.ExceptionListener listener = (e,_mutator)->{
System.err.println("put data into table [ "+tableName+" ] error "
+e.getNumExceptions()+" rows, retry put at "+dtf.format(LocalDateTime.now()));
int count = 0;
for (int i = 0; i < e.getNumExceptions() ; i++) {
Row row = e.getRow(i);
try {
_mutator.mutate((Put)row);
count++;
} catch (IOException ex) {
ex.printStackTrace();
System.err.println("retry put "+row+" error, please check it");
}
}
System.err.println("retry put data into table [ "+tableName+" ] from error total "
+e.getNumExceptions()+" rows, finish "+ count +" rows, at "+dtf.format(LocalDateTime.now()));
};
BufferedMutatorParams bmp = new BufferedMutatorParams(tn)
.writeBufferSize(8 * 1024 * 1024)
.listener(listener);
mutator = con.getBufferedMutator(bmp);
int count = 0,CAPACITY = 1000;
List<Put> list = new ArrayList<>(CAPACITY);
Put put = null;
while (null != (line=br.readLine())){
String[] arr = line.split(regexSep);
put = new Put(Bytes.toBytes(arr[0]));
for (int i = 1; i <ps.length ; i++) {
String[] ts = ps[i].split(":");
put.addColumn(Bytes.toBytes(ts[0]),Bytes.toBytes(ts[1]),Bytes.toBytes(arr[i]));
}
list.add(put);
if(list.size()==CAPACITY){
mutator.mutate(list);
count += list.size();
list.clear();
}
}
mutator.mutate(list);
count += list.size();
list.clear();
System.err.println("batch put into [ "+tableName+" , "+count+" rows ] from [ "+data.getPath()+" ] in success");
}catch (IOException e){
e.printStackTrace();
System.err.println("batch put from [ "+data.getPath()+" ] in failure");
}finally {
close(br,mutator,con);
}
}
public static void deleteByRowKey(String tableName,String...rks){
Connection con = null;
Admin admin = null;
Table table = null;
TableName tn = TableName.valueOf(tableName);
try {
con=con();
admin = admin(con);
if(!admin.tableExists(tn)){
throw new IOException("table [ "+tableName+" ] not exist error");
}
if(rks.length==0){
if(admin.isTableEnabled(tn)){
admin.disableTable(tn);
}
admin.truncateTable(tn,false);
if(admin.isTableDisabled(tn)){
admin.enableTable(tn);
}
}else{
table = con.getTable(tn);
List<Delete> deletes = new ArrayList<>();
for (String rk : rks) {
deletes.add(new Delete(Bytes.toBytes(rk)));
}
table.delete(deletes);
}
System.err.println("delete [ "+tableName+" ] in success");
} catch (IOException e) {
e.printStackTrace();
System.err.println("delete [ "+tableName+" ] in error : "+e.getMessage());
} finally {
close(table,admin,con);
}
}
}
Mysql2Hbase
package cn.kgc.base;
import java.io.File;
public class Mysql2Hbase extends Config {
public static void transfer(String configPath){
init(configPath);
String path = pro.getProperty("java.file.dir");
String where = pro.getProperty("mysql.where");
File dir = new File(path);
if(!dir.exists()) throw new RuntimeException("directory "+path+" not exist exception");
Java2HBase.putBatch(Mysql2Java.selectWrite(dir, where),",");
}
}
Mysql2Java
package cn.kgc.base;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.sql.*;
public class Mysql2Java extends Config{
static {
try {
Class.forName(pro.getProperty("mysql.driver"));
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
private static Connection con() throws SQLException {
return DriverManager.getConnection(
pro.getProperty("mysql.url"),
pro.getProperty("mysql.username"),
pro.getProperty("mysql.password")
);
}
private static PreparedStatement pst(Connection con,String sql,Object...params) throws SQLException {
PreparedStatement pst = con.prepareStatement(sql);
if(params.length>0){
for (int i = 0; i < params.length; i++) {
pst.setObject(i+1,params[i]);
}
}
return pst;
}
public static File selectWrite(File path, String where, Object...params){
File dest = null;
String header = pro.getProperty("mysql.hbase.fields.map");
StringBuilder sqlCmd = new StringBuilder("select ");
String[] split = header.split(",");
for (String pair : split) {
String[] ff = pair.split(":");
sqlCmd.append(ff[ff.length-1]);
sqlCmd.append(",");
}
sqlCmd.deleteCharAt(sqlCmd.length()-1);
sqlCmd.append(" from ");
sqlCmd.append(pro.getProperty("mysql.table"));
if(null!=where && where.startsWith("where") && where.trim().length()>5){
sqlCmd.append(" ");
sqlCmd.append(where);
}
Connection con = null;
PreparedStatement pst = null;
ResultSet rst = null;
BufferedWriter bw = null;
try {
con = con();
pst = pst(con,sqlCmd.toString(),params);
rst = pst.executeQuery();
dest = new File(path.getPath()+"/"+System.currentTimeMillis());
bw = new BufferedWriter(new FileWriter(dest));
bw.write(header);
int colCnt = rst.getMetaData().getColumnCount();
while (rst.next()) {
StringBuilder line = new StringBuilder();
line.append(rst.getObject(1).toString());
for (int i = 2; i <= colCnt ; i++) {
line.append(",");
line.append(rst.getObject(i).toString());
}
bw.newLine();
bw.write(line.toString());
}
bw.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
close(rst,pst,con,bw);
}
return dest;
}
}
Factory
package cn.kgc.imitate;
import cn.kgc.base.Java2HBase;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.MessageFormat;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class Factory {
static final String PATH = "C:\\Users\\lenovo\\Desktop\\datasource";
static final String TMP_PATH = PATH+"\\tmp";
static final String UPLOAD_PATH = PATH+"\\upload";
static ExecutorService es = Executors.newFixedThreadPool(5);
static ScheduledExecutorService ses = Executors.newScheduledThreadPool(2);
static Random rand = new Random();
private static void make(int no) {
final int SLEEP_TIME = 5+rand.nextInt(996);
File dest = new File(TMP_PATH+"\\dsj_test_"+System.currentTimeMillis());
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new FileWriter(
dest, true));
bw.write(":key,cf1:name,cf1:age,cf2:pos,cf2:salary,cf3:rst");
bw.newLine();
Random rand = new Random();
for (int i = 0; i < 1000; i++) {
bw.write(MessageFormat.format(no + "_zbstu{0},henry{0},{1},market clerk,{2},how are you"
, i, 18 + rand.nextInt(20), 1 + rand.nextInt(3)));
bw.newLine();
Thread.sleep(5);
}
bw.close();
dest.renameTo(new File(UPLOAD_PATH + "\\" + dest.getName()));
}catch (Exception e){
e.printStackTrace();
}
}
public static void produce(){
es.submit(()->{
for (int i = 1; true ; i++) {
make(i);
Thread.sleep(30000);
}
});
}
public static void consume(){
ses.scheduleWithFixedDelay(()->{
File dir = new File(UPLOAD_PATH);
for(File file : dir.listFiles((f)->f.isFile()
&& !f.getName().endsWith(".COMPLETED")
&& !f.getName().endsWith(".PARSING"))){
System.out.println(file.getName()+" beginning");
File dest = new File(file.getPath() + ".PARSING");
file.renameTo(dest);
es.submit(()->{
System.out.println(dest.getName()+" beginning");
Java2HBase.putBatch(dest,",");
System.err.println(dest.getName()+" finished");
dest.renameTo(new File(dest.getPath().replace(".PARSING",".COMPLETED")));
});
}
},1,5, TimeUnit.SECONDS);
}
public static void main(String[] args) throws IOException, InterruptedException {
produce();
consume();
}
}
App
package cn.kgc;
import cn.kgc.base.Mysql2Hbase;
public class App
{
public static void main( String[] args ){
Mysql2Hbase.transfer(args[0]);
}
}
每次测试 运行只需修改配置文件信息 以及配置配置文件路径即可
打胖包
新建一个目录 把胖包放进去 并把配置文件放进去
运行命令:
java -jar java2hbase-1.0.jar-with-dependencies.jar datasource.properties
测试成功:
hbase 查找是否进入
list
scan ‘test:user_info’