实验:求出各年销售笔数、各年销售总额
原始数据:
各字段说明如下:
字段名 | 类型 | 是否能为空 | 备注 |
---|---|---|---|
PROD_ID | int | 否 | 产品ID |
CUST_ID | int | 否 | 客户ID |
TIME | Date | 否 | 日期 |
HANNEL_ID | int | 否 | 渠道ID |
PROMO_ID | int | 否 | 促销ID |
QUANTITY_SOLD | int | 否 | 销售的数量(件) |
AMOUNT_SOLD | float(10,2) | 否 | 销售的总额(元) |

Map阶段: 读取数据,k1 为偏移量、v1 为一行数据,将销售笔数和销售额封装为java bean对象,作为 v2,k2为年份(有1998, 1999, 2000, 2001);
Shuffle阶段: 有 4个年份,因此设置 4个分区,排序、规约、分组采取系统默认;
Reduce阶段: 对 shuffle 过来的新的 v2,即每个集合的 bean 对象中的销售笔数和销售额分别进行相加,得到 v3,即每个年份的销售笔数和销售总额,k3是 k2,保持不变。

结果:




其他 MapReduce 实验:
本次实验的代码
sale_bean代码:
package lhr.word_count.homework;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class sale_bean implements WritableComparable<sale_bean> {
int QUANTITY_SOLD;
float AMOUNT_SOLD;
public void setQUANTITY_SOLD(int QUANTITY_SOLD) {
this.QUANTITY_SOLD = QUANTITY_SOLD;
}
public int getQUANTITY_SOLD() {
return QUANTITY_SOLD;
}
public float getAMOUNT_SOLD() {
return AMOUNT_SOLD;
}
public void setAMOUNT_SOLD(float AMOUNT_SOLD) {
this.AMOUNT_SOLD = AMOUNT_SOLD;
}
@Override
public String toString() {
return "销售笔数:" + QUANTITY_SOLD + "\t" + "销售总额:" + AMOUNT_SOLD;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(this.QUANTITY_SOLD);
dataOutput.writeFloat(this.AMOUNT_SOLD);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
this.QUANTITY_SOLD = dataInput.readInt();
this.AMOUNT_SOLD = dataInput.readFloat();
}
@Override
public int compareTo(sale_bean o) {
return 0;
}
}
Map代码:
package lhr.word_count.homework;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class sale_Mapper extends Mapper<LongWritable, Text, Text, sale_bean> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
/*
读取数据,k1为偏移量、v1为一行数据,将销售笔数和销售额封装为bean对象,作为v2,
k2为年份(有1998,1999,2000,2001)
*/
String[] mes = value.toString().split("\t");
sale_bean bean = new sale_bean();
bean.setQUANTITY_SOLD(Integer.parseInt(mes[5]));
bean.setAMOUNT_SOLD(Float.parseFloat(mes[6]));
context.write(new Text(mes[2].split("[-/]")[0]), bean);
}
}
Partiton代码:
package lhr.word_count.homework;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class sale_Partitioner extends Partitioner<Text, sale_bean> {
@Override
public int getPartition(Text text, sale_bean sale_bean, int i) {
/*
有4年,因此设置4个分区
*/
if (Integer.parseInt(text.toString()) == 1998) {
return 0;
} else if (Integer.parseInt(text.toString()) == 1999) {
return 1;
} else if (Integer.parseInt(text.toString()) == 2000) {
return 2;
} else return 3;
}
}
Reduce代码:
package lhr.word_count.homework;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class sale_Reducer extends Reducer<Text, sale_bean, Text, sale_bean> {
@Override
protected void reduce(Text key, Iterable<sale_bean> values, Context context) throws IOException, InterruptedException {
/*
对shuffle过来的新的v2,即每个集合的bean对象中的销售笔数和销售额分别进行相加,
得到v3,即每个年份的销售笔数和销售总额
k3是k2,不变
*/
int count1 = 0;
float count2 = 0;
for (sale_bean value : values) {
count1 += value.getQUANTITY_SOLD();
count2 += value.getAMOUNT_SOLD();
}
sale_bean sale_bean = new sale_bean();
sale_bean.setQUANTITY_SOLD(count1);
sale_bean.setAMOUNT_SOLD(count2);
context.write(key, sale_bean);
}
}
Main代码:
package lhr.word_count.homework;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.net.URI;
public class sale_Main extends Configured implements Tool {
@Override
public int run(String[] strings) throws Exception {
Job job = Job.getInstance(super.getConf(), "sale");
job.setJarByClass(sale_Main.class);
job.setInputFormatClass(TextInputFormat.class);
TextInputFormat.addInputPath(job, new Path("file:///D:\\input3"));
// TextInputFormat.addInputPath(job, new Path("hdfs://hadoop11:8020/sale_count"));
job.setMapperClass(sale_Mapper.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(sale_bean.class);
//指定分区的类
job.setPartitionerClass(sale_Partitioner.class);
job.setReducerClass(sale_Reducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(sale_bean.class);
//设置4个区
job.setNumReduceTasks(4);
job.setOutputFormatClass(TextOutputFormat.class);
// Path path = new Path("hdfs://hadoop11:8020/sale_count_result");
Path path = new Path("file:///D:\\output3");
TextOutputFormat.setOutputPath(job, path);
// FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop11:8020"), super.getConf(), "root");
// if (fileSystem.exists(path)) {
// fileSystem.delete(path, true);
// }
boolean b = job.waitForCompletion(true);
return b ? 0 : 1;
}
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration, new sale_Main(), args);
System.exit(run);
}
}