Hbase几种数据入库方式比较

1. 预先生成HFile入库

这个地址有详细的说明http://blog.csdn.net/dajuezhao/archive/2011/04/26/6365053.aspx

2. 通过MapReduce入库

import java.io.IOException;

import org.apache.commons.logging.Log;

import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.client.Put;

import org.apache.hadoop.hbase.util.Bytes;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class HBaseImport extends Configured implements Tool{

static final Log LOG = LogFactory.getLog(HBaseImport.class);

public static final String JOBNAME = "MRImport ";

public static class Map extends Mapper{

Configuration configuration = null;

HTable xTable = null;

private boolean wal = true;

static long count = 0;

@Override

protected void cleanup(Context context) throws IOException,

InterruptedException {

// TODO Auto-generated method stub

super.cleanup(context);

xTable.flushCommits();

xTable.close();

}

@Override

protected void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException {

String all[] = value.toString().split("/t");

If(all.length==2){

put = new Put(Bytes.toBytes(all[0]))); put.add(Bytes.toBytes("xxx"),Bytes.toBytes("20110313"),Bytes.toBytes(all[1]));

}

if (!wal) {

put.setWriteToWAL(false);

}

xTable.put(put);

if ((++count % 100)==0) {

context.setStatus(count +" DOCUMENTS done!");

context.progress();

System.out.println(count +" DOCUMENTS done!");

}

}

@Override

protected void setup(Context context) throws IOException,

InterruptedException {

// TODO Auto-generated method stub

super.setup(context);

configuration = context.getConfiguration();

xTable = new HTable(configuration,"testKang");

xTable.setAutoFlush(false);

xTable.setWriteBufferSize(12*1024*1024);

wal = true;

}

}

@Override

public int run(String[] args) throws Exception {

String input = args[0];

Configuration conf = HBaseConfiguration.create(getConf());

conf.set("hbase.master", "m0:60000");

Job job = new Job(conf,JOBNAME);

job.setJarByClass(HBaseImport.class);

job.setMapperClass(Map.class);

job.setNumReduceTasks(0);

job.setInputFormatClass(TextInputFormat.class);

TextInputFormat.setInputPaths(job, input);

job.setOutputFormatClass(NullOutputFormat.class);

return job.waitForCompletion(true)?0:1;

}

public static void main(String[] args) throws IOException {

Configuration conf = new Configuration();

String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

int res = 1;

try {

res = ToolRunner.run(conf, new HBaseImport (), otherArgs);

} catch (Exception e) {

e.printStackTrace();

}

System.exit(res);

}

}

 

注意mapper类中的抽象方法:

3. 通过Java程序入库

import java.io.BufferedReader;

import java.io.File;

import java.io.FileReader;

import java.io.IOException;

import java.util.ArrayList;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.hbase.HBaseConfiguration;

import org.apache.hadoop.hbase.client.HTable;

import org.apache.hadoop.hbase.client.Put;

public class InsertContactJava {

public static long startTime;

public static long rowkey = 0; //起始rowkey

public static final int lineCount = 100000; //每次提交时录入的行数

public static String tableName = "usercontact_kang"; //录入目的表名

public static int countLie = 8; //表的列数

public static void main(String[] args) throws IOException {

startTime = System.currentTimeMillis() / 1000;

System.out.println("start time = " + startTime);

Thread t1 = new Thread() {

@Override

public void run() {

try {

insert_one("/run/jar/123");

//loadByLieWithVector("/run/jar/123");

//loadByLieWithArrayList("/run/jar/123");

} catch (IOException e) {

e.printStackTrace();

}

}

};

t1.start();

}

public static void insert_one(String path) throws IOException {

Configuration conf = HBaseConfiguration.create();

HTable table = new HTable(conf, tableName);

File f = new File(path);

ArrayList list = new ArrayList();

BufferedReader br = new BufferedReader(new FileReader(f));

String tmp = br.readLine();

int count = 0;

while (tmp != null) {

if (list.size() > 10000) {

table.put(list);

table.flushCommits();

list.clear();

} else {

String arr_value[] = tmp.toString().split("/t", 10);

String first[] = arr_value[0].split("~", 5);

String second[] = arr_value[1].split("~", 5);

String rowname = getIncreasRowKey();

String firstaccount = first[0];

String firstprotocolid = first[1];

String firstdomain = first[2];

String inserttime = Utils.getToday("yyyyMMdd");

String secondaccount = second[0];

String secondprotocolid = second[1];

String seconddomain = second[2];

String timescount = Integer.valueOf(arr_value[2]).toString();

Put p = new Put(rowname.getBytes());

p.add(("ucvalue").getBytes(), "FIRSTACCOUNT".getBytes(),

firstaccount.getBytes());

p.add(("ucvalue").getBytes(), "FIRSTDOMAIN".getBytes(),

firstdomain.getBytes());

p.add(("ucvalue").getBytes(), "FIRSTPROTOCOLID".getBytes(),

firstprotocolid.getBytes());

p.add(("ucvalue").getBytes(), "INSERTTIME".getBytes(),

inserttime.getBytes());

p.add(("ucvalue").getBytes(), "SECONDACCOUNT".getBytes(),

secondaccount.getBytes());

p.add(("ucvalue").getBytes(), "SECONDDOMAIN".getBytes(),

seconddomain.getBytes());

p.add(("ucvalue").getBytes(), "SECONDPROTOCOLID".getBytes(),

secondprotocolid.getBytes());

p.add(("ucvalue").getBytes(), "TIMESCOUNT".getBytes(),

timescount.getBytes());

list.add(p);

}

tmp = br.readLine();

count++;

}

if (list.size() > 0) {

table.put(list);

table.flushCommits();

}

table.close();

System.out.println("total = " + count);

long endTime = System.currentTimeMillis() / 1000;

long costTime = endTime - startTime;

System.out.println("end time = " + endTime);

System.out.println(path + ": cost time = " + costTime);

}

4. 入库方式比较

Ø 生成HFile方式:

生成HFile的过程比较慢,生成HFile后写入hbase非常快,基本上就是hdfs上的mv过程.对于生成HFile方式入库的时候有一个改进的方案,就是先对数据排序,然后生成HFile。

HFile方式在所有的加载方案里面是最快的,不过有个前提——数据是第一次导入,表是空的。如果表中已经有了数据。HFile再导入到hbase的表中会触发split操作,最慢的时候这种操作会耗时1小时。


Ø MapReduce方式:

开始会很快,但是由于mr和hbase竞争资源,到一个特定的时间点会变很慢

Ø Java程序方式:

多客户端,多线程同时入库,目前看来是最好的方式,client和regionserver分开,硬盘读写分开,瓶颈只在网络和内存上。咨询了一些牛人,大多推荐这种方式,并且一定要多客户端,多线程。

参考转自:http://blog.sina.com.cn/s/blog_6c994d8f01015fdr.html

 protected  void cleanup(Mapper.Context context)
          Called once at the end of the task.
protected  void map(KEYIN key, VALUEIN value, Mapper.Context context)
          Called once for each key/value pair in the input split.
 void run(Mapper.Context context)
          Expert users can override this method for more complete control over the execution of the Mapper.
protected  void setup(Mapper.Context context)
          Called once at the beginning of the task.
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值