flink windows的wordcount demo(离线和实时)

1.新建maven项目,pom文件新增如下代码

 <dependencies>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-java</artifactId>
        <version>1.10.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-streaming-java_2.12</artifactId>
        <version>1.10.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-kafka-0.10_2.12</artifactId>
        <version>1.10.1</version>
    </dependency>
    </dependencies>

flink的程序步骤较为明确

2, 批处理demo, 数据源使用集合来模拟。 新建类名 WordCountWithBatch

package com.flinkTest.wordCount;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

import java.util.Arrays;

public class WordCountWithBatch {
    public static void main(String[] args) throws Exception {

        // 1,构建环境
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        // 2,读取批次数据, 本例以 硬编码实现
        DataSource<String> stringDataSource = env.fromCollection(Arrays.asList(
                "hello java",
                "hello spark",
                "hello flink",
                "hello scala"));

        // 3,处理数据, 切分(flatMap,split),分组(groupBy),统计(累加sum)
        AggregateOperator<Tuple2<String, Integer>> resultOut = stringDataSource
                .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                    @Override
                    public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
                        String[] words = s.split(" ");
                        for (String word : words) {
                            out.collect(new Tuple2<String, Integer>(word, 1));
                        }
                    }
                })
                .groupBy(0)
                .sum(1);

        // 4, 将处理好的数据存放, print即是 将数据存放到console,标准输入输出
        resultOut.print();
    }
}

3. 实时流处理demo, 使用nc 来模拟实时流。 新建类名 WordCountWithSocket

package com.flinkTest.wordCount;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class WordCountWithSocket {
    public static void main(String[] args) throws Exception {
        // 1,构建环境
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 2,获取数据源, 本例以 nc 命令生产数据为例, 端口为 9000
        DataStream<String> socketDS = env.socketTextStream("localhost", 9000, "\n");

        // 3,处理数据, 切分(flatMap,split),分组(groupBy),统计(累加sum)
        SingleOutputStreamOperator<Tuple2<String, Integer>> resultOut = socketDS
                .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
                    @Override
                    public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
                        String[] words = s.split(" ");
                        for (String word : words) {
                            out.collect(new Tuple2<String, Integer>(word, 1));
                        }
                    }
                })
                .keyBy(0)
                .sum(1);

        //4,将处理好的数据存入, print即是 存到标准输入输出。
        resultOut.print();
        //5,执行
        env.execute();
    }
}

  3.1- 下载nc 工具

     url:https://eternallybored.org/misc/netcat/

     打开下载的压缩包,找到nc.exe,复制粘贴到 c盘 users/当前登录用户目录下即可(简单粗暴的方式)

     也可以自己配置环境变量

3.2- 运行nc 命令, 记得端口号 要和 程序对应, 本例是 9000

nc -l -p 9000

3.3- 运行程序 WordCountWithSocket

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值