HBase结合MapReduce批量导入

package hbase;

import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

public class BatchImport {
    static class BatchImportMapper extends
            Mapper<LongWritable, Text, LongWritable, Text> {
        SimpleDateFormat dateformat1 = new SimpleDateFormat("yyyyMMddHHmmss");
        Text v2 = new Text();

        protected void map(LongWritable key, Text value, Context context)
                throws java.io.IOException, InterruptedException {
            final String[] splited = value.toString().split("\t");
            try {
                final Date date = new Date(Long.parseLong(splited[0].trim()));
                final String dateFormat = dateformat1.format(date);
                String rowKey = splited[1] + ":" + dateFormat;//设置行键:手机号码+日期时间
                v2.set(rowKey + "\t" + value.toString());
                context.write(key, v2);
            } catch (NumberFormatException e) {
                final Counter counter = context.getCounter("BatchImport",
                        "ErrorFormat");
                counter.increment(1L);
                System.out.println("出错了" + splited[0] + " " + e.getMessage());
            }
        };
    }

    static class BatchImportReducer extends
            TableReducer<LongWritable, Text, NullWritable> {
        protected void reduce(LongWritable key,
                java.lang.Iterable<Text> values, Context context)
                throws java.io.IOException, InterruptedException {
            for (Text text : values) {
                final String[] splited = text.toString().split("\t");

                final Put put = new Put(Bytes.toBytes(splited[0]));//第一列行键
                put.add(Bytes.toBytes("cf"), Bytes.toBytes("date"),
                        Bytes.toBytes(splited[1]));//第二列日期
                put.add(Bytes.toBytes("cf"), Bytes.toBytes("msisdn"),
                        Bytes.toBytes(splited[2]));//第三列手机号码
                // 省略其他字段,调用put.add(....)即可
                context.write(NullWritable.get(), put);
            }
        };
    }

    public static void main(String[] args) throws Exception {
        final Configuration configuration = new Configuration();
        // 设置zookeeper
        configuration.set("hbase.zookeeper.quorum", "hadoop0");
        // 设置hbase表名称
        configuration.set(TableOutputFormat.OUTPUT_TABLE, "wlan_log");//先在shell下创建一个表:create 'wlan_log','cf'
        // 将该值改大,防止hbase超时退出
        configuration.set("dfs.socket.timeout", "180000");

        final Job job = new Job(configuration, "HBaseBatchImport");

        job.setMapperClass(BatchImportMapper.class);
        job.setReducerClass(BatchImportReducer.class);
        // 设置map的输出,不设置reduce的输出类型
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        // 不再设置输出路径,而是设置输出格式类型
        job.setOutputFormatClass(TableOutputFormat.class);

        FileInputFormat.setInputPaths(job, "hdfs://hadoop0:9000/input");//将手机上网日志文件上传到HDFS中的input文件中

        job.waitForCompletion(true);
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值