UserOrser.java
package com.igeekhome.mapreduce.model;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
*用户订购模型类
*
* 模型类实现hadoop的序列化和反序列化的步骤
*
*
* 1.模型类需要实现Writable
*
* 2.模型类中必须手动增加空参构造方法
*
* 3.重写序列化方法write(DataOutput dataOutput)
*
* 4.重写反序列化方法readFields(DataInput dataInput)
* */
public class UserOrder implements Writable {
//订单编号
private Integer orderId;
//用户名称
private String username;
//用户性别
private String sex;
//订购商品名称
private String goodsName;
//商品的单价
private Integer price;
//订购商品数量
private Integer saleCount;
//订购的总价
private Integer totalPrice;
//空参构造
public UserOrder(){
}
//重写序列化方法
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(orderId);
out.writeUTF(username);
out.writeUTF(sex);
out.writeUTF(goodsName);
out.writeInt(price);
out.writeInt(saleCount);
out.writeInt(totalPrice);
}
//重写反序列化方法
@Override
public void readFields(DataInput in) throws IOException {
//在反序列化的时候 属性的顺序需要和序列化时的顺序一致
this.orderId=in.readInt();
this.username=in.readUTF();
this.sex=in.readUTF();
this.goodsName=in.readUTF();
this.price=in.readInt();
this.saleCount=in.readInt();
this.totalPrice=in.readInt();
}
@Override
public String toString(){
return this.sex+" "+this.totalPrice.toString();
}
public Integer getOrderId() {
return orderId;
}
public void setOrderId(Integer orderId) {
this.orderId = orderId;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getSex() {
return sex;
}
public void setSex(String sex) {
this.sex = sex;
}
public String getGoodsName() {
return goodsName;
}
public void setGoodsName(String goodsName) {
this.goodsName = goodsName;
}
public Integer getPrice() {
return price;
}
public void setPrice(Integer price) {
this.price = price;
}
public Integer getSaleCount() {
return saleCount;
}
public void setSaleCount(Integer saleCount) {
this.saleCount = saleCount;
}
public Integer getTotalPrice() {
return totalPrice;
}
public void setTotalPrice(Integer totalPrice) {
this.totalPrice = totalPrice;
}
public void setTotalPrice() {
this.totalPrice = this.price*this.saleCount;
}
}
SexPartitioner.java
package com.igeekhome.mapreduce.order;
import com.igeekhome.mapreduce.model.UserOrder;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
//性别分区器
//Partitoner中的泛型是mapper阶段输出的kv类型
public class SexPartitioner extends Partitioner<Text, UserOrder> {
@Override
public int getPartition(Text text, UserOrder userOrder, int numPartitons) {
//获取用户的性别
String sex =userOrder.getSex();
//根据性别的不同,将数据划分到不同的分区中(最终结果文件part-r-0000+分区号
if(sex.equals("男")){
return 0;
}else if(sex.equals("女")){
return 1;
}else {
return 2;
}
}
}
UserOrderDriver.java
package com.igeekhome.mapreduce.order;
import com.igeekhome.mapreduce.model.UserOrder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class UserOrderDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//1.获取配置信息对象和job对象
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//2.关联driver类
job.setJarByClass(UserOrderDriver.class);
//3.设置mapper和reduce的类
job.setMapperClass(UserOrderMapper.class);
job.setReducerClass(UserOrderReducer.class);
//4.设置mapper输出的kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(UserOrder.class);
//5设置最终输出的kv类型(reducer输出的kv类型)
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(UserOrder.class);
//6.设置文件的输入路径和计算结果的输出路径
Path filePath1 = new Path("D:\\bigdata\\sale_details.txt");
FileInputFormat.setInputPaths(job,filePath1);
//设置计算结果的输出路径(不存在)
Path outputPath = new Path("D:\\bigdata\\sale_details_output");
FileOutputFormat.setOutputPath(job,outputPath);
//设置采用自定义分区
job.setPartitionerClass(SexPartitioner.class);
//根据最终结果文件的个数设置对应的reduce task任务的个数
job.setNumReduceTasks(3);
//7.提交任务,进行计算
boolean result=job.waitForCompletion(true);
System.out.println(result?"执行成功":"执行失败");
}
}
UserOrderMapper.java
package com.igeekhome.mapreduce.order;
import com.igeekhome.mapreduce.model.UserOrder;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class UserOrderMapper extends Mapper<LongWritable, Text,Text, UserOrder> {
//map阶段输出的kv中的kry(用户名)
private Text keyOut =new Text();
//新建用户订购对象
private UserOrder valueOut = new UserOrder();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//获取一行数据
String line = value.toString();
//根据文本间的分隔符对于单词进行拆分
String[] orderData = line.split(",");
//根据下标提取数据
String orderId = orderData[0];
String userName = orderData[1];
String sex = orderData[2];
String goodsName = orderData[3];
String price = orderData[4];
String saleCount = orderData[5];
//封装UserOrder对象
valueOut.setOrderId(Integer.parseInt(orderId));
valueOut.setUsername(userName);
valueOut.setSex(sex);
valueOut.setGoodsName(goodsName);
valueOut.setPrice(Integer.parseInt(price));
valueOut.setSaleCount(Integer.parseInt(saleCount));
//调用订购总价方法进行赋值
valueOut.setTotalPrice();
//对输出的key进行赋值
keyOut.set(userName);
//map阶段进行输出
context.write(keyOut,valueOut);
}
}
UserOrderReducer.java
package com.igeekhome.mapreduce.order;
import com.igeekhome.mapreduce.model.UserOrder;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class UserOrderReducer extends Reducer<Text, UserOrder,Text,UserOrder> {
//新建UserOrder对象,作为最终输出到文件中的对象
private UserOrder valueOut=new UserOrder();
//遍历迭代器 对订单总价进行累加
@Override
protected void reduce(Text key, Iterable<UserOrder> values, Context context) throws IOException, InterruptedException {
//定义同一个用户的最终订单价格
Integer userTotalPrice=0;
// 用于判断性别是否赋值的标识
boolean flag=false;
for(UserOrder userOrder:values){
//获取每个订单总价
Integer totalPrice=userOrder.getTotalPrice();
userTotalPrice+=totalPrice;
//给输出对象进行性别的复制
if(!flag) {
valueOut.setSex(userOrder.getSex());
flag = true;
}
}
//给输出对象进行赋值
valueOut.setTotalPrice(userTotalPrice);
//reduce阶段输出
context.write(key,valueOut);
}
}
sale_details.txt
运行结果: