编写mapreduce程序从HBase的一张表中求某一列的平均数

最新推荐文章于 2022-09-24 18:32:56 发布

云飞扬°

最新推荐文章于 2022-09-24 18:32:56 发布

阅读量744

点赞数 1

分类专栏：大数据

本文链接：https://blog.csdn.net/weixin_44706512/article/details/107973128

版权

大数据专栏收录该内容

2 篇文章 0 订阅

订阅专栏

表中的数据

求HBase数据库中data_t表中的attention列的均值

package com.hbase.demo;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.List;

public class GetAttentionMean {

	public static void main(String[] args) throws Exception {
		Configuration conf = HBaseConfiguration.create();

		FileSystem fs = FileSystem.get(conf);

		Job job = Job.getInstance(conf);

		job.setJarByClass(GetAttentionMean.class);

		Scan scan = new Scan();
		scan.addColumn("Info".getBytes(), "attention".getBytes());

		TableMapReduceUtil.initTableMapperJob(
				"data_t".getBytes(), // 指定表名
				scan, // 指定扫描数据的条件
				MyMapper.class, // 指定mapper class
				Text.class, // mapper阶段的输出的key的类型
				DoubleWritable.class, // mapper阶段的输出的value的类型
				job // job对象
		);

		job.setReducerClass(MyReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(DoubleWritable.class);

		Path outputPath = new Path("/attention/mean");

		if (fs.exists(outputPath)) {
			fs.delete(outputPath, true);
		}

		FileOutputFormat.setOutputPath(job, outputPath);

		boolean isSuccess = job.waitForCompletion(true);
		
		if (!isSuccess) {
			throw new IOException("任务运行错误！");
		}

		System.exit(isSuccess ? 0 : 1);
	}

	public static class MyMapper extends TableMapper<Text, DoubleWritable> {

		Text outKey = new Text("attention_mean");
		DoubleWritable outValue = new DoubleWritable();

		@Override
		protected void map(ImmutableBytesWritable key, Result value, Context context)
				throws IOException, InterruptedException {

			boolean isContainsColumn = value.containsColumn("Info".getBytes(), "attention".getBytes());

			if (isContainsColumn) {
				List<Cell> listCells = value.getColumnCells("Info".getBytes(), "attention".getBytes());
				Cell cell = listCells.get(0);
				byte[] cloneValue = CellUtil.cloneValue(cell);
				double attention = Double.valueOf(Bytes.toString(cloneValue));
				outValue.set(attention);
				context.write(outKey, outValue);
			}

		}

	}

	public static class MyReducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {

		DoubleWritable outValue = new DoubleWritable();

		@Override
		protected void reduce(Text key, Iterable<DoubleWritable> values, Context context)
				throws IOException, InterruptedException {

			int count = 0;
			double sum = 0;
			for (DoubleWritable value : values) {
				count++;
				sum += value.get();
			}

			double attention_mean = sum / count;
			outValue.set(attention_mean);
			context.write(key, outValue);
		}
	}
}

结果：

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>com.hbase</groupId>
  <artifactId>demo</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>demo</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  </properties>

 <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>1.4</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
    
  <dependencies>
    <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.6.1</version>
        </dependency>
        <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
        </dependency>
        <!--HBase MapReduce API-->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.2.6.1</version>
        </dependency>
  </dependencies>
</project>

云飞扬°

关注

1
点赞
踩
6

收藏

觉得还不错? 一键收藏
0
评论
编写mapreduce程序从HBase的一张表中求某一列的平均数

表中的数据求HBase数据库中data_t表中的attention列的均值package com.hbase.demo;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.Cell;import org.apache.hadoop.hbase.
复制链接

扫一扫