Flink SQL 案例Word Count

最新推荐文章于 2024-08-06 10:35:08 发布

不看白不看，看了不白看

最新推荐文章于 2024-08-06 10:35:08 发布

阅读量555

点赞数

分类专栏： Flink系列文章标签： flink

本文链接：https://blog.csdn.net/test_test111/article/details/119154367

版权

Flink系列专栏收录该内容

14 篇文章 0 订阅

订阅专栏

之前写过一篇word count ，下面介绍基于Flink SQL的Word Count

Flink SQL简介

Flink SQL 支持的语法

Flink SQL 核心算子的语义设计参考了 1992、2011 等 ANSI-SQL 标准，Flink 使用 Apache Calcite 解析 SQL ，Calcite 支持标准的 ANSI SQL。

Flink Table & SQL API是在DataStream和DataSet之上封装的一层高级API，看下图：

基于 Flink SQL 编写的 Flink 程序也离不开读取原始数据，计算逻辑和写入计算结果数据三部分。

一个完整的 Flink SQL 编写的程序包括如下三部分：

Source Operator：Soruce operator 是对外部数据源的抽象, 目前 Apache Flink 内置了很多常用的数据源实现例如 MySQL、Kafka 等；

Transformation Operators：算子操作主要完成例如查询、聚合操作等，目前 Flink SQL 支持了 Union、Join、Projection、Difference、Intersection 及 window 等大多数传统数据库支持的操作；

Sink Operator：Sink operator 是对外结果表的抽象，目前 Apache Flink 也内置了很多常用的结果表的抽象，比如 Kafka Sink 等

我们通过用一个最经典的 WordCount 程序作为入门，看一下传统的基于 DataSet/DataStream API 开发和基于 SQL 开发有哪些不同？

示例一

pom.xml 引入下面的包

<properties>    <flink.version>1.13.0</flink.version>    <scala.binary.version>2.11</scala.binary.version>    <java.version>1.8</java.version></properties><dependencies>    <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-table-api-java-bridge_${scala.binary.version}</artifactId>        <version>${flink.version}</version>    </dependency>    <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-table-api-scala-bridge_${scala.binary.version}</artifactId>        <version>${flink.version}</version>    </dependency>    <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-table-planner_${scala.binary.version}</artifactId>        <version>${flink.version}</version>    </dependency>    <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>        <version>${flink.version}</version>    </dependency>    <!-- Table connectors and formats -->    <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-csv</artifactId>        <version>${flink.version}</version>    </dependency>    <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-table</artifactId>        <version>${flink.version}</version>    </dependency>    <dependency>       <groupId>org.apache.flink</groupId>       <artifactId>flink-table-api-java-bridge_2.11</artifactId>       <version>${flink.version}</version>       <scope>provided</scope>   </dependency>  <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-table-api-java</artifactId>        <version>${flink.version}</version>    </dependency>    <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-streaming-java_2.11</artifactId>        <version>${flink.version}</version>    </dependency>    <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-core</artifactId>        <version>${flink.version}</version>    </dependency>    <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-clients_2.11</artifactId>        <version>${flink.version}</version>    </dependency>    <dependency>        <groupId>org.apache.flink</groupId>        <artifactId>flink-java</artifactId>        <version>${flink.version}</version>    </dependency></dependencies>

上java代码

import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.BatchTableEnvironment;

import static org.apache.flink.table.api.Expressions.$;

/**
 * Simple example that shows how the Batch SQL API is used in Java.
 *
 * <p>This example shows how to: - Convert DataSets to Tables - Register a Table under a name - Run
 * a SQL query on the registered Table
 */
public class WordCountSQL {

    // *************************************************************************
    //     PROGRAM
    // *************************************************************************
    public static void main(String[] args) throws Exception {
        // set up execution environment
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        BatchTableEnvironment tEnv = BatchTableEnvironment.create(env);
        DataSet<WC> input =
                env.fromElements(new WC("Hello", 1), new WC("Ciao", 1), new WC("Hello", 1));
        // register the DataSet as a view "WordCount"
        tEnv.createTemporaryView("WordCount", input, $("word"), $("frequency"));
        // run a SQL query on the Table and retrieve the result as a new Table
        Table table =
                tEnv.sqlQuery(
                        "SELECT word, SUM(frequency) as frequency FROM WordCount GROUP BY word");
        DataSet<WC> result = tEnv.toDataSet(table, WC.class);
        result.print();
    }

    // *************************************************************************
    //     USER DATA TYPES
    // *************************************************************************

    /** Simple POJO containing a word and its respective count. */
    public static class WC {
        public String word;
        public long frequency;

        // public constructor to make it a Flink POJO
        public WC() {}

        public WC(String word, long frequency) {
            this.word = word;
            this.frequency = frequency;
        }

        @Override
        public String toString() {
            return "WC " + word + " " + frequency;
        }
    }
}

查看运行结果，计算结果与输入数据的结果一致

示例二

private static void sql() throws Exception {
      ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
      BatchTableEnvironment tableEnv = BatchTableEnvironment.create(env);

      // 对已注册的表进行 SQL 查询
      // 注册名为 “Orders” 的表
      tableEnv.executeSql("CREATE TABLE Orders (`id` BIGINT, name STRING, age INT) WITH ('connector.type' = 'filesystem',\n" +
                "    'connector.path' = 'D:\\student.csv',\n" +
              "    'format.type' = 'csv',\n" +
                "    'format.field-delimiter' = ',')");// 此处不写默认分割符是
      // 在表上执行 SQL 查询，并把得到的结果作为一个新的表
      Table result = tableEnv.sqlQuery("SELECT name, sum(age) FROM Orders WHERE name LIKE '%zhang%' group by name");
      tableEnv.toDataSet(result,Row.class).print();
}