在网上搜了一下,找到一篇博文讲了关于如何将hbase中的数据读取出来写入mysql中,这里做一下记录.
参考博文: 参考博文
看网上的博文,自己做了一下小改动.
这里是自定义的类,map和reduce中都会使用到,作为输入输出的类型,这里的write和readFields方法可以优化,不然一个对象有上百个字段,就要写死了.这里提供了一种方法.
package mapreduce;
import java.beans.PropertyDescriptor;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.BeanWrapper;
import org.springframework.beans.PropertyAccessorFactory;
public class StuHbase implements WritableComparable<StuHbase>, DBWritable {
private String name;
private String year;
public StuHbase() {}
public StuHbase(String name,String year) {
this.name = name;
this.year = year;
}
//这里写和读的字段的顺序要一样
public void write(DataOutput dataOutput) throws IOException {
// dataOutput.writeUTF(name);
// dataOutput.writeUTF(year);
//以前的方法会将属性一个一个列出来,很繁琐,这里做了一点改动,也可以使用反射将类属性读取出来,然后调用.当然下面相应的write和readFields都需要相应的改动.
PropertyDescriptor[] pds = BeanUtils.getPropertyDescriptors(this.getClass());
BeanWrapper beanWrapper = PropertyAccessorFactory.forBeanPropertyAccess(this);
for (PropertyDescriptor propertyDescriptor : pds) {
String properName = propertyDescriptor.getName();
if("class".equals(properName)) {
continue;
}
String value = String.valueOf(beanWrapper.getPropertyValue(properName));
dataOutput.writeUTF(value);
}
}
public void readFields(DataInput dataInput) throws IOException {
this.name = dataInput.readUTF();
this.year = dataInput.readUTF();
}
public void write(PreparedStatement preparedStatement) throws SQLException {
// 类似于jdbc是使用preparedStatement,进行赋值
int index = 1;
preparedStatement.setString(index++, name);
preparedStatement.setString(index++, year);
}
public void readFields(ResultSet resultSet) throws SQLException { // 类似于jdbc进行查询
int index = 1;
name = resultSet.getString(index++);
year = resultSet.getString(index++);
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getYear() {
return year;
}
public void setYear(String year) {
this.year = year;
}
@Override
public String toString() {
return name + "\t" + year;
}
public int compareTo(StuHbase o) {
return 0;
}
}
这个是map实现.需要注意的是,如果jar包不同(版本不同),读取cell中值的方法是有差异的..具体的就需要自己对应自己的版本来修正了,可以参考我的另一篇博文,里面有详细介绍如何运行mapreduce,和需要哪些jar包.参考博文
package mapreduce;
import java.io.IOException;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
public class HBaseMap extends TableMapper<Text, StuHbase> {
public void map(ImmutableBytesWritable rows, Result result, Context context)
throws IOException, InterruptedException {
String v1 = "";
String v2 = "";
// 把取到的值直接打印
for (Cell cell : result.listCells()) { // 遍历每一行的各列
// 假如我们当时插入HBase的时候没有把int、float等类型的数据转换成String,这里就会乱码了
// String row = new String(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), "UTF-8");
// String family =
// new String(kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(), "UTF-8");
// String qualifier = new String(kv.getQualifierArray(), kv.getQualifierOffset(),
// kv.getQualifierLength(), "UTF-8");
// String value =
// new String(kv.getValueArray(), kv.getValueOffset(), kv.getValueLength(), "UTF-8");
//
// System.out.println("row:" + row);
// System.out.println("family:" + family);
// System.out.println("qualifier:" + qualifier);
// System.out.println("value:" + value);
// System.out.println("timestamp:" + kv.getTimestamp());
// System.out.println("-------------------------------------------");
String name = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(),
cell.getQualifierLength());
String value =
Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
if("enterpriseName".equals(name)) {
v1 = value;
}else if("year".equals(name)) {
v2 = value;
}
}
context.write(new Text(v1),new StuHbase(v1,v2));
System.out.println("0");
}
}
接着是reduce的实现 ,这里偷了一个懒,就是reduce实现中的方法体直接使用for循环来输出,这样是没有意义的.当然,具体的逻辑还是要根据业务来写的,所以这里是演示,就乱写了.
package mapreduce;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class HbaseReducer extends Reducer<Text, StuHbase, StuHbase, Text> {
@Override
protected void reduce(Text key, Iterable<StuHbase> values, Context context)
throws IOException, InterruptedException {
for (StuHbase text : values) {
context.write(text, null);
}
}
}
接着就是driver了.需要注意的就是设置好自定的输入输出类型,如果需要重新自定也,这里的类型也要相应作出更改.
package mapreduce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class StuHbaseDriver extends Configured implements Tool {
public static void main(String[] args) throws Exception {
final Configuration conf = HBaseConfiguration.create();
// 想要对hbase进行操作,需要连接zookeeper,不管是读还是写,都是先从zookeeper中获取元数据信息
conf.set("hbase.zookeeper.quorum",
"hadoop2:2181,hadoop3:2181,hadoop4:2181");//这里的hadoop2,hadoop3,hadoop4是我的hadoop地址,也可以直接写IP地址如:192.168.0.1:2181
ToolRunner.run(conf, new StuHbaseDriver(), args);
}
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 设定要写入的mysql的url和用户名和密码
//
DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
"jdbc:mysql://localhost:3306/test", "root", "root");
Job job = Job.getInstance(conf);
job.setJarByClass(StuHbaseDriver.class);
Scan scan = new Scan();
scan.setCacheBlocks(false);
scan.setCaching(500);
// 设置map,表名,scan,Map类.class,输出的key,输出的value,job
TableMapReduceUtil.initTableMapperJob("EIIAPASSETSPOJO", scan, HBaseMap.class, Text.class, StuHbase.class, job);
// 设置reduce的类
job.setReducerClass(HbaseReducer.class);
// 设置输出格式是DataBase
job.setOutputFormatClass(DBOutputFormat.class);
// 设置输出时的k,v类型
job.setOutputKeyClass(StuHbase.class);
job.setOutputValueClass(StuHbase.class);
// 设置job 输出到mysql时 的 表名,这里的列名顺序需要和StuHbase中的读和写方法一样.不然顺序会乱
DBOutputFormat.setOutput(job,"表名","name", "year");
job.waitForCompletion(true);
return 0;
}
}