hbase中的mapreduce

 1.读取hbase上的表,将结果输出到hdfs上

问题是:统计出现班级的次数,也是一个词频统计的案例,但是是从hbase上读取数据,将结果输出到hdfs上,

做出的改动就是需要map端继承不同的类,Driver端设置关联mapper端信息有一些变化

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class MRHbase01 {
    /**
     * public abstract class TableMapper<KEYOUT, VALUEOUT> extends Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT>
     *     TableMapper的k-v输入类型已经被限定了,所以我们只需要写出输出的数据类型
     */
    public class ReadHbaseMapper extends TableMapper<Text,IntWritable>{
        @Override
        protected void map(ImmutableBytesWritable key, Result value, Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context) throws IOException, InterruptedException{
            String id = Bytes.toString(key.get());
            //由于是统计做班级出现次数的统计,所以这里是获取result中的clazz列下面的内容
            String clazz = Bytes.toString(value.getValue("info".getBytes(), "clazz".getBytes()));
            context.write(new Text(clazz),new IntWritable(1));
        }
    }

    public class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sum=0;
            for (IntWritable value : values) {
                sum+=value.get();
            }
            context.write(key,new IntWritable(sum));
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
      
        Configuration conf = new Configuration();
        conf.set("hbase.zookeeper.quorum","master:2181,node1:2181,node2:2181");
        Job job = Job.getInstance(conf);
        job.setJarByClass(MRHbase01.class);

        job.setReducerClass(MyReducer.class);
        
        //值得注意的是这里设置Mapper类的时候与普通的mapreduce程序不大相同,用                        
        //TableMapRedeceUtil,设置mapper端,当然如果是reducer也可以使用
        //我们在这里指定了输出k-v类型和mapper类,普通的mr程序是分开的
        TableMapReduceUtil.initTableMapperJob(
                TableName.valueOf("student"),
                new Scan(),
                ReadHbaseMapper.class,
                Text.class,
                IntWritable.class,
                job
        );

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        
        
        //判断输出路径是否存在,存在就删除    
        Path outputPath = new Path("/output");
        FileOutputFormat.setOutputPath(job,outputPath);
        FileSystem fs=FileSystem.get(conf);
        if(fs.exists(outputPath)){
            fs.delete(outputPath,true);
        }
        
        //提交任务
        job.waitForCompletion(true);


    }
}

 程序写完后需要打包到集群上,但是我们还需要在porm文件中添加一些插件,这样才能识别我们的hbase

  <!-- 带依赖jar 插件-->
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

打包后会产生2个压缩包,都给上传了

 

运行命令:hadoop dfs jar 压缩包名(大的那个) 类名(有package的也要加上不然识别不出来)

 hadoop jar HbaseApi-1.0-SNAPSHOT-jar-with-dependencies.jar MRHbase01

输出结果:

补充:

我在写的时候可能是因为idea版本问题,重写了TableMapper方法的时候,Context没有前面的类名,使用最新版本的idea可以

也可以我们自己手动添加上,Ctrl点击TableMapper,进去就可以看看到MapperM<>,

我们把Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT>复制出来,

再根据自己的输入输出类型进行修改

public abstract class TableMapper<KEYOUT, VALUEOUT> extends Mapper<ImmutableBytesWritable, Result, KEYOUT, VALUEOUT> {
    public TableMapper() {
    }
}

 2.从hbase读取数据,并将结果写hbase

既然是将数据写入到hbase中,我们的准备工作就是创建一个空的表,等待被写入

create 'clazz_count','info';

实现代码

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;


public  class MR06Hbase  {

    /**
     * MapReduce读取HBase student表
     * 统计班级人数,并将最后的结果保存到hbase
     */
    public static class ReadHBaseMapper extends TableMapper<Text, IntWritable> {
        @Override
        protected void map(ImmutableBytesWritable key, Result value,  Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            String clazz = Bytes.toString(value.getValue("info".getBytes(), "clazz".getBytes()));
            context.write(new Text(clazz),new IntWritable(1));
        }
    }

    /**
     * public abstract class TableReducer<KEYIN, VALUEIN, KEYOUT> extends Reducer<KEYIN, VALUEIN, KEYOUT, Mutation> {
     *     public TableReducer() {
     *     }
     * }
     * TableReducer类后面需要跟上3个内容,输入k-v类型和map端的输出类型保持一致,第四个参数是Mutation
     * Mutation有一个子类Put,所以我们的输出类型Value就用Put,输出类型Key不需要
     * 在
     */

    public static class WriteHbaseReducer extends TableReducer<Text,IntWritable,NullWritable>{
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sum=0;
            for (IntWritable value : values) {
                sum+=value.get();
            }
            Put put = new Put(key.getBytes());
            put.addColumn("info".getBytes(),"counts".getBytes(),(sum+"").getBytes());
            context.write(NullWritable.get(),put);
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum", "master:2181,node1:2181,node2:2181");

        Job job = Job.getInstance(conf);
        job.setJobName("MR06Hbase");
        job.setJarByClass(MR06Hbase.class);

        //ctrl+p 查看参数慢慢敲
        TableMapReduceUtil.initTableMapperJob(
                TableName.valueOf("student"),
                new Scan(),
                ReadHBaseMapper.class,
                Text.class,
                IntWritable.class,
                job
        );

        //注意这里第一个参数提示是 :String table,只要传入一个字符串就行了
        TableMapReduceUtil.initTableReducerJob(
                "clazz_count",
                WriteHbaseReducer.class,
                job
        );
        job.waitForCompletion(true);

    }
 }

 打包上传并运行

 hadoop jar HbaseApi-1.0-SNAPSHOT-jar-with-dependencies.jar MR06Hbase

运行成功后查看hbase中表的内容 

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值