【云星数据---Apache Flink实战系列(精品版)】:Apache Flink批处理API详解与编程实战026--DateSet实用API详解026

183 篇文章 0 订阅
86 篇文章 57 订阅

Reduce

element为粒度,对element进行合并操作。最后只能形成一个结果。
执行程序:
package code.book.batch.dataset.advance.api;

import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;

public class ReduceFunction001java {
    public static void main(String[] args) throws Exception {
        // 1.设置运行环境,准备运行的数据
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Integer> text = env.fromElements(1, 2, 3, 4, 5, 6,7);

        //2.对DataSet的元素进行合并,这里是计算累加和
        DataSet<Integer> text2 = text.reduce(new ReduceFunction<Integer>() {
            @Override
            public Integer reduce(Integer intermediateResult, Integer next) throws Exception {
                return intermediateResult + next;
            }
        });
        text2.print();

        //3.对DataSet的元素进行合并,这里是计算累乘积
        DataSet<Integer> text3 = text.reduce(new ReduceFunction<Integer>() {
            @Override
            public Integer reduce(Integer intermediateResult, Integer next) throws Exception {
                return intermediateResult * next;
            }
        });
        text3.print();

        //4.对DataSet的元素进行合并,逻辑可以写的很复杂
        DataSet<Integer> text4 = text.reduce(new ReduceFunction<Integer>() {
            @Override
            public Integer reduce(Integer intermediateResult, Integer next) throws Exception {
                if (intermediateResult % 2 == 0) {
                    return intermediateResult + next;
                } else {
                    return intermediateResult * next;
                }
            }
        });
        text4.print();

        //5.对DataSet的元素进行合并,可以看出intermediateResult是临时合并结果,next是下一个元素
        DataSet<Integer> text5 = text.reduce(new ReduceFunction<Integer>() {
            @Override
            public Integer reduce(Integer intermediateResult, Integer next) throws Exception {
                System.out.println("intermediateResult=" + intermediateResult + " ,next=" + next);
                return intermediateResult + next;
            }
        });
        text5.collect();
    }
}
执行结果:
text2.print()
28

text3.print()
5040

text4.print()
157

text5.print()
intermediateResult=1 ,next=2
intermediateResult=3 ,next=3
intermediateResult=6 ,next=4
intermediateResult=10 ,next=5
intermediateResult=15 ,next=6
intermediateResult=21 ,next=7

reduceGroup

对每一组的元素分别进行合并操作。与reduce类似,不过它能为每一组产生一个结果。
如果没有分组,就当作一个分组,此时和reduce一样,只会产生一个结果。
执行程序:
package code.book.batch.dataset.advance.api;

import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;
import java.util.Iterator;

public class GroupReduceFunction001java {
    public static void main(String[] args) throws Exception {
        // 1.设置运行环境,准备运行的数据
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        DataSet<Integer> text = env.fromElements(1, 2, 3, 4, 5, 6, 7);

        //2.对DataSet的元素进行合并,这里是计算累加和
        DataSet<Integer> text2 = text.reduceGroup(new GroupReduceFunction<Integer, Integer>() {
            @Override
            public void reduce(Iterable<Integer> iterable, 
            Collector<Integer> collector) throws Exception {
                int sum = 0;
                Iterator<Integer> itor = iterable.iterator();
                while (itor.hasNext()) {
                    sum += itor.next();
                }
                collector.collect(sum);
            }
        });
        text2.print();

        //3.对DataSet的元素进行分组合并,这里是分别计算偶数和奇数的累加和
        DataSet<Tuple2<Integer, Integer>> text3 = text.reduceGroup(
        new GroupReduceFunction<Integer, Tuple2<Integer, Integer>>() {
            @Override
            public void reduce(Iterable<Integer> iterable,
            Collector<Tuple2<Integer, Integer>> collector)throws Exception {
                int sum0 = 0;
                int sum1 = 0;
                Iterator<Integer> itor = iterable.iterator();
                while (itor.hasNext()) {
                    int v = itor.next();
                    if (v % 2 == 0) {
                        sum0 += v;
                    } else {
                        sum1 += v;
                    }
                }
                collector.collect(new Tuple2<Integer, Integer>(sum0, sum1));
            }
        });
        text3.print();

        //4.对DataSet的元素进行分组合并,这里是对分组后的数据进行合并操作,统计每个人的工资总和
        //(每个分组会合并出一个结果)
        DataSet<Tuple2<String, Integer>> data = env.fromElements(
        new Tuple2("zhangsan", 1000), new Tuple2("lisi", 1001), 
        new Tuple2("zhangsan", 3000), new Tuple2("lisi", 1002));
        //4.1根据name进行分组
        DataSet<Tuple2<String, Integer>> data2 = data.groupBy(0).reduceGroup(
        new GroupReduceFunction<Tuple2<String, Integer>, Tuple2<String, Integer>>() {
            @Override
            public void reduce(Iterable<Tuple2<String, Integer>> iterable, 
            Collector<Tuple2<String, Integer>> collector) throws Exception {
                int salary = 0;
                String name = "";
                Iterator<Tuple2<String, Integer>> itor = iterable.iterator();
                //4.2统计每个人的工资总和
                while (itor.hasNext()) {
                    Tuple2<String, Integer> t = itor.next();
                    name = t.f0;
                    salary += t.f1;
                }
                collector.collect(new Tuple2(name, salary));
            }
        });
        data2.print();
    }
}
执行结果:
text3.print()
28

text3.print()
(12,16)

data2.print
(lisi,2003)
(zhangsan,4000)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值