大数据mr

本文介绍了如何使用Maven初始化一个项目,并详细展示了WordCount项目的WordCountMapper、WordCountReducer和WordCountReducerMax类的实现。通过Hadoop MapReduce技术,实现了单词计数,包括Mapper处理输入数据和Reducer进行汇总。
摘要由CSDN通过智能技术生成

maven

在这里插入图片描述

在这里插入图片描述

mave的原理

新建maven项目

在这里插入图片描述

在这里插入图片描述

在这里插入图片描述

新建WordCountMapper

package hadoop03;
import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import com.sun.tools.javac.util.Name;
public class WordCountMapper extends Mapper<LongWritable,Text,Text,LongWritable>{
	@Override
	protected void map(LongWritable keyin, Text valuein, Mapper<LongWritable, Text, Text, LongWritable>.Context context)
			throws IOException, InterruptedException {
		String name=valuein.toString();
		String[] names=name.split(" ");
		Long name1=Long.valueOf(names[1]);
		LongWritable ONE = new LongWritable(name1);
		context.write(new Text(names[0]),ONE);
	}
}

WordCountReducer

package hadoop03;

import java.io.IOException;
import java.util.ArrayList;

import org.apache.commons.collections.keyvalue.TiedMapEntry;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReducer extends Reducer<Text, LongWritable, Text, DoubleWritable> {
	protected void reduce(Text keyout, Iterable<LongWritable> times,
			Reducer<Text, LongWritable, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
		int time3=0;
		ArrayList<Integer> list=new ArrayList<Integer>();
		for(LongWritable time:times){
			Long time1=time.get();
			String time2=String.valueOf(time1);
			time3=Integer.parseInt(time2);
			list.add(time3);
			
		}
		int time4=0;
		for (int i = 0; i < list.size(); i++) {
			time4=time4+list.get(i);
		}
		double time5=((double )time4)/(list.size());
		
		DoubleWritable two=new DoubleWritable(time5);
		context.write(keyout,two);
		
		
	}
	

}

WordCountReducerMax

package hadoop03;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import com.google.common.collect.Lists;

public class WordCountReducerMax extends Reducer<Text, LongWritable, Text, DoubleWritable>{

	@Override
	protected void reduce(Text keyout, Iterable<LongWritable> Times,
			Reducer<Text, LongWritable, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
		ArrayList<Integer> list =new ArrayList<Integer>();
		for(LongWritable time:Times){
			list.add(Integer.parseInt(String.valueOf(time.get())));
		}
		Collections.reverse(list);

		context.write(keyout,new DoubleWritable(list.get(0)));
		
	}
}

job

package hadoop03;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import hadoop03.WordCountMapper;
import hadoop03.WordCountReducer;
import hadoop03.job;

public class job {

	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		boolean isMax=args!=null&&args.length>0&&"max".equals(args[0]);
		Job job=Job.getInstance();
		job.setJarByClass(job.class);
		job.setMapperClass(WordCountMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(LongWritable.class);
		FileInputFormat.setInputPaths(job,new Path("/score.txt"));
		if(isMax){
			job.setReducerClass(WordCountReducerMax.class);
		}else
		job.setReducerClass(WordCountReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setMapOutputValueClass(LongWritable.class);
		FileOutputFormat.setOutputPath(job, new Path("/out/hadoop05"));
		job.waitForCompletion(true);
		// TODO Auto-generated method stub

	}

}

pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>hadoop03</groupId>
  <artifactId>hadoop03</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <properties>
   	<maven.compiler.target>1.8</maven.compiler.target>
   	<maven.compiler.suorce>1.8</maven.compiler.suorce>
   </properties>
   <dependencies>
  <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-common</artifactId>
    <version>2.7.1</version>
    </dependency>
     <dependency>
    <groupId>junit</groupId>
    <artifactId>junit</artifactId>
    <version>4.12</version>
    </dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
    <dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-hdfs</artifactId>
    <version>2.7.1</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-common -->
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-yarn-common</artifactId>
    <version>2.7.1</version>
</dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-mapreduce-client-core</artifactId>
    <version>2.7.1</version>
</dependency>
    
    </dependencies>
</project>
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值