MapReduce基础回顾题目

17人阅读 评论(0) 收藏 举报
分类:
week1,A0001,10,20
week1,A0002,8.5,15
week1,A0003,9.2,30
week1,B0001,10.5,50
week2,A0001,11,30
week2,A0002,8,20
week2,A0003,9.2,20
week2,B0001,10,55
week3,A0001,9.5,10
week3,A0002,8.8,30
week3,A0003,9.8,30
week3,B0001,9,58
week4,A0001,9.2,14
week4,A0002,8.5,22
week4,A0003,10.3,45
week4,B0001,7,12

要求:根据以上数据,用 MapReduce 统计出如下数据:
1、每种商品的销售总金额,并降序排序
2、每种商品销售额最多的三周

问题一:分组求和,总体降序排列。
该问题采用两个mapreduce程序进行解决的,第一个求出每种商品的销售总额,第二个进行降序排序。

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
 * 1、每种商品的销售总金额,并降序排序
 * 	周数  商品编号  单价  销量(件)
 * 数据:week1	A0001  10    20
 * 2、每种商品销售额最多的三周
 *
 */
public class QuestionMR_1_1 {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);
		
		/**
		 * 在本地跑把下面两行注释掉
		 */
		//conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
		//System.setProperty("HADOOP_USER_NAME", "hadoop");
		
		
		Job job = Job.getInstance(conf);
		job.setJarByClass(QuestionMR_1_1.class);
		
		job.setMapperClass(MRMapper.class);
		job.setReducerClass(MRReducer.class);
		
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(DoubleWritable.class);
		
		/**
		 * 自定义分区组件
		 */
		//job.setPartitionerClass(Object.class);
		
		/**
		 * 自定义分组组件
		 */
		//job.setGroupingComparatorClass(Object.class);
		
		/**
		 * 必要的时候使用自定义Combine组件
		 */
		//job.setCombinerClass(MRReducer.class);
		
	
		Path inputPath = new Path("G:/test/q1/input");
		Path outputPath = new Path("G:/test/q1/output_1_1");
		if(fs.exists(outputPath)){
			fs.delete(outputPath, true);
		}
		
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		
		
		
		Job job2 = Job.getInstance(conf);
		job2.setJarByClass(QuestionMR_1_1.class);
		
		job2.setMapperClass(MRMapper2.class);
		job2.setReducerClass(MRReducer2.class);
		
		job2.setOutputKeyClass(Product.class);
		job2.setOutputValueClass(NullWritable.class);
		
		
	
		Path inputPath2 = new Path("G:/test/q1/output_1_1");
		Path outputPath2 = new Path("G:/test/q1/output_1_2");
		if(fs.exists(outputPath2)){
			fs.delete(outputPath2, true);
		}
		
		FileInputFormat.setInputPaths(job2, inputPath2);
		FileOutputFormat.setOutputPath(job2, outputPath2);
		//使用连个mapreduce程序进行关联操作
		JobControl control = new JobControl("CF");
		
		ControlledJob conjob1 = new ControlledJob(job.getConfiguration());
		ControlledJob conjob2 = new ControlledJob(job2.getConfiguration());
		
		conjob2.addDependingJob(conjob1);
		
		control.addJob(conjob1);
		control.addJob(conjob2);
		
		Thread t = new Thread(control);
		t.start();
		
		while(!control.allFinished()){
			Thread.sleep(1000);
		}

		System.exit(0);
		
	}
	
	//week1	A0001商品  10单价  20数量
	//每种商品的销售总金额,并降序排序
	public static class MRMapper extends Mapper<LongWritable, Text, Text, DoubleWritable>{
		Text k = new Text();
		DoubleWritable v = new DoubleWritable();
		
		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			
			String[] line = value.toString().split(",");
			//根据商品ID作为Key这样在reduce阶段我们就能对每一种商品的销售额进行累加
			k.set(line[1]);
			//根据数据格式,求出每条记录的销售总金额
			double sum = Double.parseDouble(line[2])*Double.parseDouble(line[3]);
			v.set(sum);
			context.write(k, v);

		}
	}
	
	public static class MRReducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable>{
		DoubleWritable v = new DoubleWritable();
		@Override
		protected void reduce(Text key, Iterable<DoubleWritable> values, Context context)
				throws IOException, InterruptedException {
			
			double sum = 0D;
			//相同的商品会在一个reduceTask中进行处理,
			//这里能够对每一种商品的销售额进行累加
			for (DoubleWritable num : values) {
				sum += num.get(); 
			}
			v.set(sum);
			context.write(key,v);
		}
	}
	
	/**
	 * 对商品进行排序的时候这里选择使用自定义数据类型的方式进行
	 * 使用compareTo方法对数据进行排序
	 */
	public static class MRMapper2 extends Mapper<LongWritable, Text, Product, NullWritable>{
		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			
			String[] line = value.toString().split("\t");
			
			Product p = new Product(line[0], Double.parseDouble(line[1]),"");
			context.write(p, NullWritable.get());

		}
	}
	
	public static class MRReducer2 extends Reducer<Product, NullWritable, Product, NullWritable>{
		@Override
		protected void reduce(Product key, Iterable<NullWritable> values, Context context)
				throws IOException, InterruptedException {
			
			context.write(key, NullWritable.get());
			
		}
	}
}
问题二:每种商品销售额最多的三周是一种简单的分组取TopN
根据商品进行分组,每一组根据销售额进行倒序排序,取Top3即可。
这里使用了自定义分组组件,这样能够适当的减少reduce端的循环次数。

排序通过自定义数据类型Product中的compareTo方法来控制

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 *  数据:week1	A0001  10  20
 *  2、每种商品销售额最多的三周
 * @author Administrator
 *
 */
public class QuestionMR_1_2 {
	
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		FileSystem fs = FileSystem.get(conf);
		
		/**
		 * 在本地跑把下面两行注释掉
		 */
		//conf.set("fs.defaultFS", "hdfs://hadoop01:9000");
		//System.setProperty("HADOOP_USER_NAME", "hadoop");
		
		
		Job job = Job.getInstance(conf);
		job.setJarByClass(QuestionMR_1_1.class);
		
		job.setMapperClass(MRMapper.class);
		job.setReducerClass(MRReducer.class);
		
		job.setOutputKeyClass(Product.class);
		job.setOutputValueClass(NullWritable.class);
		
		/**
		 * 自定义分区组件
		 */
		//job.setPartitionerClass(Object.class);
		
		/**
		 * 自定义分组组件
		 */
		job.setGroupingComparatorClass(ProductGC.class);
		
		/**
		 * 必要的时候使用自定义Combine组件
		 */
		//job.setCombinerClass(MRReducer.class);
		
	
		Path inputPath = new Path("G:/exam/q1/input");
		Path outputPath = new Path("G:/exam/q1/output_2_1");
		if(fs.exists(outputPath)){
			fs.delete(outputPath, true);
		}
		
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		
		boolean isDone = job.waitForCompletion(true);
		System.exit(isDone?0:1);
		
	}
	
	//数据:week1	A0001  10  20
	public static class MRMapper extends Mapper<LongWritable, Text, Product, NullWritable>{
		Text k = new Text();
		DoubleWritable v = new DoubleWritable();
		//直接将数据读取以后构建自定义数据类型(bean)
		//然后通过自定义分组组件进行分组
		//自定义数据类型会根据compareTo方法自动排序
		@Override
		protected void map(LongWritable key, Text value,Context context)
				throws IOException, InterruptedException {
			
			String[] line = value.toString().split(",");
			double sum = Double.parseDouble(line[2])*Double.parseDouble(line[3]);
			Product p = new Product(line[1],sum,line[0]);
			context.write(p,NullWritable.get());

		}
	}
	
	public static class MRReducer extends Reducer<Product, NullWritable, Product, NullWritable>{
		@Override
		protected void reduce(Product key, Iterable<NullWritable> values, Context context)
				throws IOException, InterruptedException {
			
			int count = 0;
			for (NullWritable nv : values) {
				if(count < 3){
					context.write(key, nv);
				}
				count++;
			}
		}
	}
}

自定义分组组件:ProductGC

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class ProductGC extends WritableComparator{

	public ProductGC() {
		super(Product.class,true);
	}

	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		Product pa = (Product) a;	
		Product pb = (Product) b;	
		
		//根据商品的ID进行分组
		return pa.getNamenum().compareTo(pb.getNamenum());
	}
	
}

自定义数据类型:Product

package cn.zhao.exam.mapreduce.q1;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class Product implements WritableComparable<Product>{
	private String namenum;
	private double price;
	private int count;
	private double totalMoney;
	private String week;
	
	
	public String getWeek() {
		return week;
	}
	public void setWeek(String week) {
		this.week = week;
	}
	public String getNamenum() {
		return namenum;
	}
	public void setNamenum(String namenum) {
		this.namenum = namenum;
	}
	public double getPrice() {
		return price;
	}
	public void setPrice(double price) {
		this.price = price;
	}
	public int getCount() {
		return count;
	}
	public void setCount(int count) {
		this.count = count;
	}
	public double getTotalMoney() {
		return totalMoney;
	}
	public void setTotalMoney(double totalMoney) {
		this.totalMoney = totalMoney;
	}
	public Product() {
		super();
		// TODO Auto-generated constructor stub
	}
	
	
	
	public Product(String namenum, double totalMoney, String week) {
		super();
		this.namenum = namenum;
		this.totalMoney = totalMoney;
		this.week = week;
	}
	@Override
	public String toString() {
		if("".equals(week)){
			return  namenum +", "+ totalMoney;
		}else{
			return  namenum +", " + week +", " + totalMoney;
		}
	}
	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(namenum);
		out.writeDouble(totalMoney);
		out.writeUTF(week);
	}
	@Override
	public void readFields(DataInput in) throws IOException {
		namenum = in.readUTF();
		totalMoney = in.readDouble();
		week = in.readUTF();
		
	}
	
	@Override
	public int compareTo(Product o) {
		//这里使用两个字段是因为第二问也使用了这个bean
		if(o.getNamenum().compareTo(this.getNamenum()) == 0){
			//控制倒叙排序 o - this > 0 返回1 为降序
			//this - o > 0 返回 1 为升序
			double flag = o.totalMoney - this.totalMoney;
			if(flag == 0){
				return 0;
			}else if(flag > 0){
				return 1;
			}else{
				return -1;
			}
		}else{
			return o.getNamenum().compareTo(this.getNamenum());
		}
		
	}
	
	
}
查看评论

数据库sql题目讲解

-
  • 1970年01月01日 08:00

MapReduce原理及运行流程回顾

1、对数据进行Split分片,把数据分为512M或者1G每片; 2、每一个数据分片起一个线程对其进行Map操作,将数据转化成Key-Value对;(每个线程相互独立,线程之间无需通信,可以并行操作) ...
  • today0221
  • today0221
  • 2015-07-11 18:59:04
  • 254

面试题目回顾10

面试题目回顾10 面试题目 1.如何解决Unity中UI和场景中物体的,射线点击穿透问题? https://blog.csdn.net/OnafioO/article/details/485...
  • wwlcsdn000
  • wwlcsdn000
  • 2018-04-11 11:37:54
  • 40

dp再回顾之思路分析,及小知识点总结

dp题的步骤 1、构造问题所对应的过程。 2、思考过程的最后一个步骤(或一个),看看有哪些选择情况。 3、根据这个选择情况化为子问题 4、分析边界,考虑边界的各种处理方式 5、写出状态转移方程 ...
  • zhhx2001
  • zhhx2001
  • 2016-06-13 16:09:57
  • 480

分享Java基础知识:带你快速回顾常用基础知识

盖房子需要打好地基,学习需要良好的基础。如果你想更好的学习Java这门语言,那么概念很重要。对此,传智播客哈尔滨校区特意针对常用的基础知识做了总结。       1、JVM、JRE和JDK的区别:...
  • duolafawu
  • duolafawu
  • 2017-08-14 15:58:44
  • 166

Java基础回顾

访问权限: 作用域 当前类 同包 子孙类 其他 public      √         √        √       √ protected √        √        √   ...
  • u010525655
  • u010525655
  • 2016-01-17 13:18:42
  • 463

java基础回顾

今天放假了,打算整理一下java基础,看看自己容易忘和学的不扎实的点,学过的可以看看,没学过的,额,也看看吧。 1.关于负数二进制,负数的二进制表示是对应的正数的二进制按位取反再加1. 2.关于标...
  • MiracleWW
  • MiracleWW
  • 2015-08-31 15:27:30
  • 347

Java基础回顾之 —— TCP/UDP

一.网络简介与说明  为了实现两台计算机通信,必须有一个网络线路连接两台计算机。通常,由服务端,网络,客户端组成。有时,很难将服务端与客户端进行区分,因为两者是相对概念。通常,我们说的局域网(LAN...
  • Go_AheadY
  • Go_AheadY
  • 2018-02-05 23:36:58
  • 53

Hadoop,MapReduce,HDFS面试题

今天发这个的目的是为了给自己扫开迷茫,告诉自己该进阶了,以下内容不一定官方和正确,完全个人理解,欢迎大家留言讨论 1.什么是hadoop答:是google的核心算法MapReduce的一个开源实现。用...
  • AC_great
  • AC_great
  • 2015-08-23 01:04:13
  • 3667
    个人资料
    持之以恒
    等级:
    访问量: 2566
    积分: 456
    排名: 11万+
    文章存档
    最新评论