2.Flink --常用API练习

1.简单DataSet API 案例

在这里插入图片描述

1.pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.example</groupId>
    <artifactId>flink-tutorial</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>flink-tutorial</name>
    <description>Demo project for Spring Boot</description>
    <properties>
        <java.version>1.8</java.version>
        <flink-version>1.13.0</flink-version>
        <scala.binary.version>2.12</scala.binary.version>
        <slf4j.version>1.7.30</slf4j.version>
    </properties>
    <dependencies>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink-version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
            <version>${flink-version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_${scala.binary.version}</artifactId>
            <version>${flink-version}</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>${slf4j.version}</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-to-slf4j</artifactId>
            <version>2.14.0</version>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>

</project>

2.resources/log4j.properties

log4j.rootLogger = error,stdout

### 输出信息到控制抬 ###
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n

3.words.txt

hello world
hello flink
hello java
  1. BatchWordCount .java
package com.example.flinktutorial.wordcount;

import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.operators.UnsortedGrouping;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;


/**
 * 这里采用DataSet api 进行了批处理  未来不用这种api
 *
 * Flink本身就是流批统一的处理框架,批处理本质也是流,所以从Flink1.12开始,采用DataStream API 进行流批统一处理
 *
 * bin/flink run -Dexecution.runtime-mode=BATCH BatchWordCount.jar
 * 提交任务时,指定批处理模式
 */
public class BatchWordCount {
    public static void main(String[] args) throws Exception {
        //1.创建执行环境
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        //2.从文件中读取数据
        DataSource<String> lineDataSource = env.readTextFile("input/words.txt");

        //3.将每行数据进行分词,转换成二元组类型
        FlatMapOperator<String, Tuple2<String,Long>> wordAndOneTuple = lineDataSource.flatMap((String line, Collector <Tuple2<String,Long>> out) ->{
            String[] words = line.split(" ");
            for(String word : words){
                out.collect(Tuple2.of(word,1l));
            }

        }).returns(Types.TUPLE(Types.STRING,Types.LONG));

        //4.按照word进行分组
        UnsortedGrouping<Tuple2<String,Long>> wordAndOneGroup = wordAndOneTuple.groupBy(0);

        //5.分组内进行聚合统计
        AggregateOperator<Tuple2<String,Long>> sum = wordAndOneGroup.sum(1);

        //6.打印结果
        sum.print();


    }
}

输出

(flink,1)
(world,1)
(hello,3)
(java,1)

这里采用DataSet api 进行了批处理 未来不用这种api
Flink本身就是流批统一的处理框架,批处理本质也是流,所以从Flink1.12开始,采用DataStream API 进行流批统一处理
bin/flink run -Dexecution.runtime-mode=BATCH BatchWordCount.jar
提交任务时,指定批处理模式

2、 DataStream API案例—有界流式处理


import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;


/**
 * 有界的流式处理
 */
public class BoundedStreamWordCount {
    public static void main(String[] args) throws Exception {
        //1.创建流式的执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //2.读取文件
        DataStreamSource<String> lineDataStreamSource = env.readTextFile("input/words.txt");

        //3. 转换计算
        SingleOutputStreamOperator<Tuple2<String,Long>> wordAndOneTuple = lineDataStreamSource.flatMap((String line, Collector<Tuple2<String,Long>> out)->{
            String[] words = line.split(" ");
            for(String word : words){
                out.collect(Tuple2.of(word,1l));
            }
        }).returns(Types.TUPLE(Types.STRING,Types.LONG));

        //4.按照word进行分组
        KeyedStream<Tuple2<String,Long>,String> wordAndOneGroup = wordAndOneTuple.keyBy(data -> data.f0);

        //5.分组内进行聚合统计
        SingleOutputStreamOperator<Tuple2<String,Long>> sum = wordAndOneGroup.sum(1);
        //6.打印
        sum.print();

        //7.启动执行
        env.execute();//不断的监听,计算
    }
}

idea采用了多线程的方式,模拟了Flink的分布式执行,前面的序号 就是线程号,就是并行子任务的编号
输出

3> (hello,1)
5> (world,1)
3> (hello,2)
2> (java,1)
3> (hello,3)
7> (flink,1)

并行的子任务能有多少个?----取决于并行度--默认就是运行电脑的当前的cpu核心数,例如我电脑是8核的,所以前面的序号是1~8之间

同一组的子任务 的处理线程是同一个

3、

在本机虚拟机node4节点上,安装

yum install nmap-ncat -y //centos

安装成功后

[root@node4 ~]# nc -lk 7777

启动java


import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class StreamWordCount {
   public static void main(String[] args) throws Exception {
       //1. 创建流式执行环境
       StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

       //2.读取文本流
       DataStreamSource<String> lineDataStream = env.socketTextStream("您的ip地址", 7777);

       //3. 转换计算
       SingleOutputStreamOperator<Tuple2<String,Long>> wordAndOneTuple = lineDataStream.flatMap((String line, Collector<Tuple2<String,Long>> out)->{
           String[] words = line.split(" ");
           for(String word : words){
               out.collect(Tuple2.of(word,1l));
           }
       }).returns(Types.TUPLE(Types.STRING,Types.LONG));

       //4.按照word进行分组
       KeyedStream<Tuple2<String,Long>,String> wordAndOneGroup = wordAndOneTuple.keyBy(data -> data.f0);

       //5.分组内进行聚合统计
       SingleOutputStreamOperator<Tuple2<String,Long>> sum = wordAndOneGroup.sum(1);
       //6.打印
       sum.print();

       //7.启动执行
       env.execute();//不断的监听,计算
   }
}

linux中输入

[root@node4 ~]# nc -lk 7777
hello word

日志输出

3> (hello,1)
6> (word,1)

优化java

        //从参数中获取ip和端口号
        ParameterTool parameterTool = ParameterTool.fromArgs(args);
        String ipaddr = parameterTool.get("ipaddr");
        Integer port = parameterTool.getInt("port");

        //2.读取文本流
        DataStreamSource<String> lineDataStream = env.socketTextStream(ipaddr, port);

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值