mapreduce小demo_自定义分组聚合规则

mapreduce小demo

1.单词分类

a.txt    
hello tom   
hello jim   
hello kitty     
hello rose  

b.txt   
hello jerry     
hello jim   
hello kitty     
hello jack  

c.txt       
hello jerry     
hello java      
hello c++       
hello c++       

需要得到以下结果:   
hello  a.txt-->4  b.txt-->4  c.txt-->4      
java   c.txt-->1        
jerry  b.txt-->1  c.txt-->1 

1.1 思路

1 将单词个数以及文件位置 统计个数
2 将相同的单词 将数据合并为一条数据

1.2 代码

得到文件名
从输入切片信息中获取当前正在处理的一行数据所属的文件
InputSplit 的实现类 FileSplit inputSplit = (FileSplit)context.getInputSplit();

1. 第一步

  1. IndexesMapper
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class IndexesMapper extends Mapper<LongWritable, Text, Text , IntWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        FileSplit inputSplit = (FileSplit) context.getInputSplit();
        String fileName = inputSplit.getPath().getName();

        String[] str = value.toString().split(" ");

        for (String s : str) {
            context.write(new Text(s+"-"+fileName), new IntWritable(1));
        }
    }
}


  1. IndexesReduce
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class IndexesReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int temp = 0;
        for (IntWritable value : values) {
            temp+=1;
        }
        context.write(key,new IntWritable(temp));
    }
}

  1. IndexesSub

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class IndexesSub {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);

        job.setJarByClass(IndexesSub.class);

        job.setMapperClass(IndexesMapper.class);
        job.setReducerClass(IndexesReduce.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.setInputPaths(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\索引数据"));
        FileOutputFormat.setOutputPath(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\output"));

        job.waitForCompletion(true);
    }
}

1. 第二步

1 IndexSecMapper

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class IndexSecMapper extends Mapper<LongWritable, Text, Text, Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] str = value.toString().split("-");
        context.write(new Text(str[0]), new Text(str[1]));
    }
}

2 IndexSecReduce

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class IndexSecReduce extends Reducer<Text, Text, Text, Text> {
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        StringBuilder stringBuilder = new StringBuilder();
        for (Text value : values) {
            stringBuilder.append("  >"+value.toString());
        }
        context.write(key, new Text(stringBuilder.toString()));
    }
}

3 IndexSecSub

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class IndexSecSub {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);

        job.setJarByClass(IndexSecSub.class);

        job.setMapperClass(IndexSecMapper.class);
        job.setReducerClass(IndexSecReduce.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\output"));
        FileOutputFormat.setOutputPath(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\output2"));

        job.waitForCompletion(true);
    }
}

2.需要求出每一个订单中成交金额最大的三笔

order001,u001,小米6,1999.9,2
order001,u001,雀巢咖啡,99.0,2
order001,u001,安慕希,250.0,2
order001,u001,经典红双喜,200.0,4
order001,u001,防水电脑包,400.0,2
order002,u002,小米手环,199.0,3
order002,u002,榴莲,15.0,10
order002,u002,苹果,4.5,20
order002,u002,肥皂,10.0,40
order003,u001,小米6,1999.9,2
order003,u001,雀巢咖啡,99.0,2
order003,u001,安慕希,250.0,2
order003,u001,经典红双喜,200.0,4
order003,u001,防水电脑包,400.0,2

2.1 代码

2.1.1 OrderMapper

public class OrderMapper extends Mapper<LongWritable, Text, Text, OrderBean> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split(",");

        context.write(new Text(split[0]),
                new OrderBean(split[0],split[1],split[2],Float.parseFloat(split[3]),Integer.parseInt(split[4])));
    }
}

2.1.2 OrderReduce

public class OrderReduce extends Reducer<Text, OrderBean, OrderBean, NullWritable> {
    @Override
    protected void reduce(Text key, Iterable<OrderBean> values, Context context) throws IOException, InterruptedException {
        List<OrderBean> orderBeans = new ArrayList<OrderBean>();

        for (OrderBean value : values) {
            orderBeans.add(new OrderBean(value));
        }

        Collections.sort(orderBeans);

        for (int i = 0; i < 3; i++) {
            context.write(orderBeans.get(i), NullWritable.get());
        }
    }
}

2.1.3 OrderSub

public class OrderSub {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);

        job.setJarByClass(OrderSub.class);
        job.setMapperClass(OrderMapper.class);
        job.setReducerClass(OrderReduce.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(OrderBean.class);

        job.setOutputKeyClass(OrderBean.class);
        job.setOutputValueClass(NullWritable.class);

        FileInputFormat.setInputPaths(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\交金额最大的三笔\\input"));
        FileOutputFormat.setOutputPath(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\交金额最大的三笔\\output"));

        job.waitForCompletion(true);

    }
}

2.1.4 OrderBean

public class OrderBean implements WritableComparable<OrderBean> {
    private String orderId;
    private String userId;
    private String comName;
    private Float price;
    private Integer nub;
    private Float subPrice;

    @Override
    public String toString() {
        return orderId+"\t"+userId+"\t"+comName+"\t"+price+"\t"+nub+subPrice;
    }


    public int compareTo(OrderBean o) {
        Float temp = o.getSubPrice() - this.getSubPrice();
        if (temp == 0){
            return this.getComName().compareTo(o.getComName());
        }else if (temp>0){
            return 1;
        }else{
            return 0;
        }
    }

    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeUTF(this.orderId);
        dataOutput.writeUTF(this.userId);
        dataOutput.writeUTF(this.comName);
        dataOutput.writeFloat(this.price);
        dataOutput.writeInt(this.nub);
        dataOutput.writeFloat(this.subPrice);
    }

    public void readFields(DataInput dataInput) throws IOException {
        this.orderId = dataInput.readUTF();
        this.userId = dataInput.readUTF();
        this.comName = dataInput.readUTF();
        this.price = dataInput.readFloat();
        this. nub = dataInput.readInt();
        this.subPrice = price*nub;
    }

    public OrderBean() {}

    public OrderBean(OrderBean ob){
        this.orderId = ob.getOrderId();
        this.userId = ob.getUserId();
        this.comName = ob.getComName();
        this.price = ob.getPrice();
        this.nub = ob.getNub();
        this.subPrice = ob.getSubPrice();
    }

    public OrderBean(String orderId, String userId, String comName, Float price, Integer nub) {
        this.orderId = orderId;
        this.userId = userId;
        this.comName = comName;
        this.price = price;
        this.nub = nub;
        this.subPrice = price*nub;
    }

    public String getOrderId() {
        return orderId;
    }

    public void setOrderId(String orderId) {
        this.orderId = orderId;
    }

    public String getUserId() {
        return userId;
    }

    public void setUserId(String userId) {
        this.userId = userId;
    }

    public String getComName() {
        return comName;
    }

    public void setComName(String comName) {
        this.comName = comName;
    }

    public Float getPrice() {
        return price;
    }

    public void setPrice(Float price) {
        this.price = price;
    }

    public Integer getNub() {
        return nub;
    }

    public void setNub(Integer nub) {
        this.nub = nub;
    }

    public Float getSubPrice() {
        return subPrice;
    }

    public void setSubPrice(Float subPrice) {
        this.subPrice = subPrice;
    }
}

2.2 解释

  1. 排序
    1. bean 中调用 WritableComparable 接口
          public int compareTo(OrderBean o) {
                Float temp = o.getSubPrice() - this.getSubPrice();
                if (temp == 0){
                    return this.getComName().compareTo(o.getComName());
                }else if (temp>0){
                    return 1;
                }else{
                    return 0;
                }
            }
    
    1. OrderReduce中使用
         List<OrderBean> orderBeans = new ArrayList<OrderBean>();
        
        for (OrderBean value : values) {
            orderBeans.add(new OrderBean(value));
        }
    
        Collections.sort(orderBeans);
    

2.2 输出

order001	u001	雀巢咖啡	99.0	2198.0
order001	u001	安慕希	250.0	2500.0
order001	u001	小米6	1999.9	23999.8
order002	u002	榴莲	15.0	10150.0
order002	u002	肥皂	10.0	40400.0
order002	u002	苹果	4.5	2090.0
order003	u001	小米6	1999.9	23999.8
order003	u001	雀巢咖啡	99.0	2198.0
order003	u001	安慕希	250.0	2500.0

2.3 优化

自定义分发规则

  1. 自定义 mapper分发的规则
public class OptimizeOrderPartitione extends Partitioner<OrderBean, NullWritable> {

    @Override
    public int getPartition(OrderBean orderBean, NullWritable nullWritable, int i) {
        return (orderBean.getOrderId().hashCode() & Integer.MAX_VALUE)%i;
    }
}
  1. 自定义 reduce对比的规则(分组比较器)
public class OptimizeOrderGroupingComparator extends WritableComparator {

    public OptimizeOrderGroupingComparator(){
        super(OrderBean.class,true);
    }

    @Override
    public int compare(WritableComparable a, WritableComparable b) {
        OrderBean o1 = (OrderBean) a;
        OrderBean o2 = (OrderBean) b;

        //返回为零则代表相同
        return o1.getOrderId().compareTo(o2.getOrderId());
    }
}

  1. bean 中的compareTo
public int compareTo(OrderBean o) {
   return this.getOrderId().compareTo(o.getOrderId())==0 ? Float.compare(o.getSubPrice(),this.getSubPrice()):this.getOrderId().compareTo(o.getOrderId());
}

代码

public class OptimizeOrder {
    public static class OpOrdMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable> {
        NullWritable v = NullWritable.get();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(",");

            context.write(new OrderBean(
                    split[0],split[1],split[2],Float.parseFloat(split[3]),Integer.parseInt(split[4])),v);
        }
    }

    public static class OpOrdReduce extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
        @Override
        protected void reduce(OrderBean key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            int temp = 0;
            for (NullWritable value : values) {
                context.write(key, value);
                if (++temp==3){
                    return;
                }
            }
        }
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);

        job.setJarByClass(OptimizeOrder.class);

        job.setMapperClass(OpOrdMapper.class);
        job.setReducerClass(OpOrdReduce.class);

        job.setMapOutputKeyClass(OrderBean.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setOutputKeyClass(OrderBean.class);
        job.setOutputValueClass(NullWritable.class);

        //设置分割器
        job.setPartitionerClass(OptimizeOrderPartitione.class);
        //设置分组比较器
        job.setGroupingComparatorClass(OptimizeOrderGroupingComparator.class);



        FileInputFormat.setInputPaths(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\交金额最大的三笔\\input"));
        FileOutputFormat.setOutputPath(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\交金额最大的三笔\\output2"));

        job.waitForCompletion(true);
    }
}

3.共同好友分析

A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J

3.1 代码

  1. 第一步
public class MutualFriend {
    public static class MutualFriendMapper extends Mapper<LongWritable, Text, Text, Text>{
        Text k = new Text();
        Text v = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split(":");
            String[] split1 = split[1].split(",");
            v.set(split[0]);
            for (String s : split1) {
                k.set(s);
                context.write(k,v);
            }
        }
    }


    public static class MutualFriendReduce extends Reducer<Text,Text,Text,Text>{
        Text k = new Text();
        Text v = new Text();
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            List<String> list = new ArrayList<String>();
            for (Text value : values) {
                list.add(value.toString());
            }

            v.set(key);
            for (int i =0 ;i<list.size()-1;i++){
                for (int j = i+1; j<list.size();j++){
                    k.set(list.get(i)+"-"+list.get(j));
                    context.write(k,v);
                }
            }

        }
    }
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
        job.setJarByClass(MutualFriend.class);
        job.setMapperClass(MutualFriendMapper.class);
        job.setReducerClass(MutualFriendReduce.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\共同好友\\input"));
        FileOutputFormat.setOutputPath(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\共同好友\\output"));

        job.waitForCompletion(true);
    }
}

结果:

I-K	A
I-C	A
I-B	A
I-G	A
I-F	A
I-H	A
I-O	A
I-D	A
K-C	A
K-B	A
K-G	A
K-F	A
K-H	A
K-O	A
K-D	A
C-B	A
C-G	A
C-F	A
C-H	A
C-O	A
C-D	A
B-G	A
B-F	A
B-H	A
B-O	A
B-D	A
G-F	A
G-H	A
G-O	A
G-D	A
F-H	A
F-O	A
F-D	A
H-O	A
H-D	A
O-D	A
A-F	B
A-J	B
A-E	B
F-J	B
F-E	B
J-E	B
A-E	C
A-B	C
A-H	C
A-F	C
A-G	C
A-K	C
E-B	C
E-H	C
E-F	C
E-G	C
E-K	C
B-H	C
B-F	C
B-G	C
B-K	C
H-F	C
H-G	C
H-K	C
F-G	C
F-K	C
G-K	C
G-C	D
G-K	D
G-A	D
G-L	D
G-F	D
G-E	D
G-H	D
C-K	D
C-A	D
C-L	D
C-F	D
C-E	D
C-H	D
K-A	D
K-L	D
K-F	D
K-E	D
K-H	D
A-L	D
A-F	D
A-E	D
A-H	D
L-F	D
L-E	D
L-H	D
F-E	D
F-H	D
E-H	D
G-M	E
G-L	E
G-H	E
G-A	E
G-F	E
G-B	E
G-D	E
M-L	E
M-H	E
M-A	E
M-F	E
M-B	E
M-D	E
L-H	E
L-A	E
L-F	E
L-B	E
L-D	E
H-A	E
H-F	E
H-B	E
H-D	E
A-F	E
A-B	E
A-D	E
F-B	E
F-D	E
B-D	E
L-M	F
L-D	F
L-C	F
L-G	F
L-A	F
M-D	F
M-C	F
M-G	F
M-A	F
D-C	F
D-G	F
D-A	F
C-G	F
C-A	F
G-A	F
O-C	I
D-E	L
E-F	M
A-H	O
A-I	O
A-J	O
A-F	O
H-I	O
H-J	O
H-F	O
I-J	O
I-F	O
J-F	O
  1. 第二步
public class MutualFriend2 {
    public static class MutualFriend2Mapper extends Mapper<LongWritable, Text, Text, Text>{
        Text k = new Text();
        Text v = new Text();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().split("\t");
            k.set(split[0]);
            v.set(split[1]);
            context.write(k,v);
        }
    }

    public static class MutualFriend2Reduce extends Reducer<Text,Text,Text,Text>{
        Text k = new Text();
        Text v = new Text();
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            k.set(key+">");
            StringBuilder stringBuilder = new StringBuilder();

            for (Text value : values) {
                stringBuilder.append(value.toString()+"\t");
            }
            v.set(stringBuilder.toString());

            context.write(k, v);
        }
    }
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
        job.setJarByClass(MutualFriend2.class);
        job.setMapperClass(MutualFriend2Mapper.class);
        job.setReducerClass(MutualFriend2Reduce.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\共同好友\\output"));
        FileOutputFormat.setOutputPath(job, new Path("E:\\soft\\java\\ideaProject\\hadoop\\file\\倒排索引demo\\共同好友\\output2"));

        job.waitForCompletion(true);
    }
}

结果:

A-B>	E	C	
A-D>	E	
A-E>	D	C	B	
A-F>	C	D	B	O	E	
A-G>	C	
A-H>	D	O	C	
A-I>	O	
A-J>	O	B	
A-K>	C	
A-L>	D	
B-D>	E	A	
B-F>	C	A	
B-G>	C	A	
B-H>	C	A	
B-K>	C	
B-O>	A	
C-A>	D	F	
C-B>	A	
C-D>	A	
C-E>	D	
C-F>	D	A	
C-G>	F	A	
C-H>	A	D	
C-K>	D	
C-L>	D	
C-O>	A	
D-A>	F	
D-C>	F	
D-E>	L	
D-G>	F	
E-B>	C	
E-F>	M	C	
E-G>	C	
E-H>	C	D	
E-K>	C	
F-B>	E	
F-D>	E	A	
F-E>	B	D	
F-G>	C	
F-H>	D	A	
F-J>	B	
F-K>	C	
F-O>	A	
G-A>	E	D	F	
G-B>	E	
G-C>	D	
G-D>	E	A	
G-E>	D	
G-F>	D	E	A	
G-H>	E	A	D	
G-K>	D	C	
G-L>	D	E	
G-M>	E	
G-O>	A	
H-A>	E	
H-B>	E	
H-D>	A	E	
H-F>	C	E	O	
H-G>	C	
H-I>	O	
H-J>	O	
H-K>	C	
H-O>	A	
I-B>	A	
I-C>	A	
I-D>	A	
I-F>	A	O	
I-G>	A	
I-H>	A	
I-J>	O	
I-K>	A	
I-O>	A	
J-E>	B	
J-F>	O	
K-A>	D	
K-B>	A	
K-C>	A	
K-D>	A	
K-E>	D	
K-F>	D	A	
K-G>	A	
K-H>	D	A	
K-L>	D	
K-O>	A	
L-A>	F	E	
L-B>	E	
L-C>	F	
L-D>	E	F	
L-E>	D	
L-F>	E	D	
L-G>	F	
L-H>	E	D	
L-M>	F	
M-A>	F	E	
M-B>	E	
M-C>	F	
M-D>	F	E	
M-F>	E	
M-G>	F	
M-H>	E	
M-L>	E	
O-C>	I	
O-D>	A	
  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值