一、导出Hbase的表文件到HDFS -------------------------------------------------------------------------- 1.复制hbase的jar文件和metrices-core-xxx.jar文件到hadoop类路径下. $>cd /soft/hbase/lib $>ls | grep hbase | cp `xargs` /soft/hadoop/share/hadoop/common/lib $>ls | grep metric | cp `xargs` /soft/hadoop/share/hadoop/common/lib 2.执行hbase-server-VERSION.jar下的MR程序,导出hbase的数据[TSV文件]到hdfs中。这个就是hbase库的数据 $> cd /soft/hbase/lib $> hadoop jar hbase-server-1.2.6.jar export call:calllogs /data/HbaseTableDataout 二、Hbase Bulk Load Hbase大批量数据的迁移 A表-->B空表[可以跨空间] ------------------------------------------------------------------------- 1.原理 B是空表,和A的表结构相同 将A表在HDFS上的数据文件直接拷贝到B表中,从而使B表拥有和A表相同的文件 2.复制hbase的jar文件和metrices-core-xxx.jar文件到hadoop类路径下. $>cd /soft/hbase/lib $>ls | grep hbase | cp `xargs` /soft/hadoop/share/hadoop/common/lib $>ls | grep metric | cp `xargs` /soft/hadoop/share/hadoop/common/lib 3.hbase上建B表 $hbase> create 'ns1:mytable1' , 'f1'. 'f2' 4.通过hbase的completebulkload命令实现TSV数据加载到B库的ns1:mytable表中 $> cd /soft/hbase/lib $> hadoop jar hbase-server-1.2.6.jar completebulkload /hbase/data/call/calllogs/4471f0b068b2b425fdec957d25d4ab02 ns1:mytable1 [/hbase/data/call/calllogs/4471f0b068b2b425fdec957d25d4ab02 就是A表在hbase中的数据路径] 三、MySQL数据通过MR导入到Hbase表中 ------------------------------------------------------------- 1.添加依赖
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>groupId</groupId>
<artifactId>TestHbase</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.17</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.6</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-auth -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.7.3</version>
</dependency>
</dependencies>
</project>
2.主函数App
package hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
/**
* AppMian
*/
public class MyApp {
public static void main(String [] args)
{
try {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE,"ns1:customers");
job.getConfiguration().set(TableOutputFormat.QUORUM_ADDRESS,"s100:2181:/hbase");
//设置job
job.setJobName("WC");
job.setJarByClass(MyApp.class);
//配置数据库信息
DBConfiguration.configureDB(job.getConfiguration(),
"com.mysql.jdbc.Driver",
"jdbc:mysql://192.168.43.1:3306/mydata",
"mysql",
"mysql");
//配置数据输入源
DBInputFormat.setInput(job, MyDBWritable.class,
"select id,name,age from myhbase ",
"select count(*) from myhbase");
//设置输出路径--输出到Hbase
job.setOutputFormatClass(TableOutputFormat.class);
//设定map和reduce
job.setMapperClass(MyMapper.class);
//设定任务属性
job.setNumReduceTasks(0);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Put.class);
//
job.waitForCompletion(true);
} catch (Exception e) {
e.printStackTrace();
}
}
}
3.自定义Writable
package hbase;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
public class MyDBWritable implements Writable,DBWritable {
private int id = 0;
private String name;
private int age;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
//数据库的串行化,将数据写入stats表中对用的第1,2列数据中
public void write(PreparedStatement ppst) throws SQLException {
// ppst.setString(1, word);
// ppst.setInt(2,count);
}
//数据库的反串行化,从DB中读取数据,从words表中读取第2,3列的数据
public void readFields(ResultSet rs) throws SQLException {
id = rs.getInt(1);
name = rs.getString(2);
age = rs.getInt(3);
}
//序列化和反序列化
public void write(DataOutput out) throws IOException {
out.writeInt(id);
out.writeUTF(name);
out.writeInt(age);
}
public void readFields(DataInput in) throws IOException {
id = in.readInt();
name = in.readUTF();
age = in.readInt();
}
}
4.Map类
package hbase;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* Mapper类
*/
public class MyMapper extends Mapper<LongWritable, MyDBWritable, NullWritable,Put> {
@Override
protected void map(LongWritable key, MyDBWritable value, Context context) throws IOException, InterruptedException {
int id = value.getId();
String name = value.getName();
int age = value.getAge();
Put put = new Put(Bytes.toBytes(id));
put.add(Bytes.toBytes("f1"), Bytes.toBytes("id"), Bytes.toBytes(id));
put.add(Bytes.toBytes("f1"), Bytes.toBytes("name"), Bytes.toBytes(name));
put.add(Bytes.toBytes("f1"), Bytes.toBytes("age"), Bytes.toBytes(age));
context.write(NullWritable.get(), put);
}
}