编写mapreduce程序从HBase的一张表中求某一列的平均数

 表中的数据

求HBase数据库中data_t表中的attention列的均值

package com.hbase.demo;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.List;

public class GetAttentionMean {

	public static void main(String[] args) throws Exception {
		Configuration conf = HBaseConfiguration.create();

		FileSystem fs = FileSystem.get(conf);

		Job job = Job.getInstance(conf);

		job.setJarByClass(GetAttentionMean.class);

		Scan scan = new Scan();
		scan.addColumn("Info".getBytes(), "attention".getBytes());

		TableMapReduceUtil.initTableMapperJob(
				"data_t".getBytes(), // 指定表名
				scan, // 指定扫描数据的条件
				MyMapper.class, // 指定mapper class
				Text.class, // mapper阶段的输出的key的类型
				DoubleWritable.class, // mapper阶段的输出的value的类型
				job // job对象
		);

		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(DoubleWritable.class);

		Path outputPath = new Path("/attention/mean");

		if (fs.exists(outputPath)) {
			fs.delete(outputPath, true);
		}

		FileOutputFormat.setOutputPath(job, outputPath);

		boolean isSuccess = job.waitForCompletion(true);
		
		if (!isSuccess) {
			throw new IOException("任务运行错误!");
		}

		System.exit(isSuccess ? 0 : 1);
	}

	public static class MyMapper extends TableMapper<Text, DoubleWritable> {

		Text outKey = new Text("attention_mean");
		DoubleWritable outValue = new DoubleWritable();

		@Override
		protected void map(ImmutableBytesWritable key, Result value, Context context)
				throws IOException, InterruptedException {

			boolean isContainsColumn = value.containsColumn("Info".getBytes(), "attention".getBytes());

			if (isContainsColumn) {
				List<Cell> listCells = value.getColumnCells("Info".getBytes(), "attention".getBytes());
				Cell cell = listCells.get(0);
				byte[] cloneValue = CellUtil.cloneValue(cell);
				double attention = Double.valueOf(Bytes.toString(cloneValue));
				outValue.set(attention);
				context.write(outKey, outValue);
			}

		}

	}

	public static class MyReducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {

		DoubleWritable outValue = new DoubleWritable();

		@Override
		protected void reduce(Text key, Iterable<DoubleWritable> values, Context context)
				throws IOException, InterruptedException {

			int count = 0;
			double sum = 0;
			for (DoubleWritable value : values) {
				count++;
				sum += value.get();
			}

			double attention_mean = sum / count;
			outValue.set(attention_mean);
			context.write(key, outValue);
		}
	}
}

结果:

 

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>com.hbase</groupId>
  <artifactId>demo</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>demo</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>

 <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>1.4</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
    
  <dependencies>
    <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.6.1</version>
        </dependency>
        <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
        </dependency>
        <!--HBase MapReduce API-->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.2.6.1</version>
        </dependency>
  </dependencies>
</project>

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值