用mapreduce将hdfs的数据存入到hbase中

依赖
   <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.2.4</version>
        </dependency>

代码

package hdfs2hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import java.io.IOException;
import java.util.UUID;

public class HDFStoHbase {

    public static class MyMapper extends Mapper<LongWritable, Text, NullWritable, Text> {

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            context.write(NullWritable.get(), value);

        }
    }

    public static class MyReducer extends TableReducer<NullWritable, Text, NullWritable> {
        @Override
        protected void reduce(NullWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

            for (Text log : values) {


                String[] lines = log.toString().split(" ");
                Put put = new Put(lines[0].getBytes()); // uuid

                put.addColumn("cf1".getBytes(), "ip".getBytes(), lines[1].getBytes());
                put.addColumn("cf1".getBytes(), "userid".getBytes(), lines[2].getBytes());
                put.addColumn("cf1".getBytes(), "dateString".getBytes(), lines[3].getBytes());
                put.addColumn("cf1".getBytes(), "lvDname".getBytes(), lines[4].getBytes());
                put.addColumn("cf1".getBytes(), "ipAddress".getBytes(), lines[5].getBytes());
                put.addColumn("cf1".getBytes(), "name".getBytes(), lines[6].getBytes());
                context.write(NullWritable.get(), put);
            }

        }
    }

    public static void main(String[] args) throws Exception {
        System.setProperty("HADOOP_USER_NAME", "root");
        //1.封装job对象
        Configuration conf = new Configuration();
        conf.addResource("conf/core-site.xml");
        conf.addResource("conf/hdfs-site.xml");
        conf.addResource("conf/yarn-site.xml");
        conf.addResource("conf/mapred-site.xml");
        conf.set("mapreduce.app-submission.cross-platform", "true");

        conf.set("hbase.zookeeper.quorum", "spark");
        conf.set("hbase.zookeeper.property.clientPort", "2181");

        conf.set(MRJobConfig.JAR, "D:\\Program Files\\feiq\\Recv Files\\Hbase\\target\\Hbase-1.0-SNAPSHOT.jar");
        Job job = Job.getInstance(conf);

        //2.设置数据读入和写出格式
        job.setInputFormatClass(TextInputFormat.class);

        TextInputFormat.addInputPath(job, new Path("/access_log/19-07-31clean"));

        job.setMapperClass(MyMapper.class);

        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);


        job.setOutputFormatClass(TableOutputFormat.class);


        TableMapReduceUtil.initTableReducerJob(
                "baizhi:t_user",
                MyReducer.class,
                job);

        //6.提交任务
        //job.submit();
        job.waitForCompletion(true);


    }
}

效果

0 row(s) in 0.0120 seconds

=> Hbase::Table - baizhi:t_user
hbase(main):011:0> s = get_table 'baizhi:t_user'
0 row(s) in 0.0030 seconds

=> Hbase::Table - baizhi:t_user
hbase(main):012:0> s.scan
ROW                                      COLUMN+CELL
 1                                       column=cf1:name, timestamp=1565894707617, value=zhangsan
 65343231-e1ad-46e9-a7c0-a8e98e6967f6    column=cf1:dateString, timestamp=1566122938836, value=2019-08-01
 65343231-e1ad-46e9-a7c0-a8e98e6967f6    column=cf1:ip, timestamp=1566122938836, value=193.154.233.15
 65343231-e1ad-46e9-a7c0-a8e98e6967f6    column=cf1:ipAddress, timestamp=1566122938836, value=beijing
 65343231-e1ad-46e9-a7c0-a8e98e6967f6    column=cf1:lvDname, timestamp=1566122938836, value=SheratonHotel
 65343231-e1ad-46e9-a7c0-a8e98e6967f6    column=cf1:name, timestamp=1566122938836, value=wangwu
 65343231-e1ad-46e9-a7c0-a8e98e6967f6    column=cf1:userid, timestamp=1566122938836, value=65343231-e1ad-46e9-a7c0-a8e98e6967f6
2 row(s) in 0.0310 second
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值