################################################################################
Copyright 2019 Ververica GmbH
Licensed under the Apache License, Version 2.0 (the “License”);
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an “AS IS” BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
###############################################################################
This file defines the default environment for Flink’s SQL Client.
Defaults might be overwritten by a session specific environment.
==============================================================================
**Table Sources**
==============================================================================
Define table sources here. See the Table API & SQL documentation for details.
tables:
- name: Rides --表名
type: source --表类型 soruce为读入型源表,sink为写入型目标表(source表不存储真实的数据,sink表存储真实数据存储在外部依赖如mysql,kafka等)
update-mode: append --更新方式 append 或者 update(Update 流只能写入支持更新的外部存储,如 MySQL, HBase。Append 流可以写入任意地存储,不过一般写入日志类型的系统,如 Kafka。)
schema: --映射 目标表的字段及类型,此处字段和类型与format处的字段对应
- name: rideId
type: LONG
- name: taxiId
type: LONG
- name: isStart
type: BOOLEAN
- name: lon
type: FLOAT
- name: lat
type: FLOAT
- name: rideTime -- 输出字段由eventTime变更为rideTime ,依据timestamp类型字段将其设为时间属性rowTime
type: TIMESTAMP
rowtime:
timestamps:
type: "from-field" --时间戳字段获取方式 :来自源表字段
from: "eventTime" --时间戳字段 :源表的时间戳字段
watermarks: --水印
type: "periodic-bounded" --定义周期性水印
delay: "60000" --最大延迟
- name: psgCnt
type: INT
connector: --连接器
property-version: 1
type: kafka --连接kafka
version: universal --0.11版本以上选择 universal
topic: Rides --消费的topic名称
startup-mode: earliest-offset --消费方式 earliest-offset从头开始消费数据 latest-offset消费最新数据
properties: --设置zk,kafka端口及IP地址
- key: zookeeper.connect
value: zookeeper:2181
- key: bootstrap.servers
value: kafka:9092
- key: group.id --设置消费者组
value: testGroup
format: --解析数据格式化
property-version: 1
type: json --此处解析数据类型是json格式,与上面字段映射一样
schema: "ROW(rideId LONG, isStart BOOLEAN, eventTime TIMESTAMP, lon FLOAT, lat FLOAT, psgCnt INT, taxiId LONG)"
- name: Fares
type: source
update-mode: append
schema:
- name: rideId
type: LONG
- name: payTime
type: TIMESTAMP
rowtime:
timestamps:
type: "from-field"
from: "eventTime"
watermarks:
type: "periodic-bounded"
delay: "60000"
- name: payMethod
type: STRING
- name: tip
type: FLOAT
- name: toll
type: FLOAT
- name: fare
type: FLOAT
connector:
property-version: 1
type: kafka
version: universal
topic: Fares
startup-mode: earliest-offset
properties:
- key: zookeeper.connect
value: zookeeper:2181
- key: bootstrap.servers
value: kafka:9092
- key: group.id
value: testGroup
format:
property-version: 1
type: json
schema: "ROW(rideId LONG, eventTime TIMESTAMP, payMethod STRING, tip FLOAT, toll FLOAT, fare FLOAT)"
- name: DriverChanges
type: source
update-mode: append
schema:
- name: taxiId
type: LONG
- name: driverId
type: LONG
- name: usageStartTime
type: TIMESTAMP
rowtime:
timestamps:
type: "from-field"
from: "eventTime"
watermarks:
type: "periodic-bounded"
delay: "60000"
connector:
property-version: 1
type: kafka
version: universal
topic: DriverChanges
startup-mode: earliest-offset
properties:
- key: zookeeper.connect
value: zookeeper:2181
- key: bootstrap.servers
value: kafka:9092
- key: group.id
value: testGroup
format:
property-version: 1
type: json
schema: "ROW(eventTime TIMESTAMP, taxiId LONG, driverId LONG)"
- name: Drivers
type: temporal-table
history-table: DriverChanges
primary-key: taxiId
time-attribute: usageStartTime
- name: Sink_TenMinPsgCnt -- 表名(外部存储系统 如kakfa的topic,或者mysql的表名
type: sink-table -- 表类型 soruce为读入型源表,sink为写入型目标表
schema:
- name: cntStart --要输出的目标字段名称 类型
type: STRING
- name: cntEnd
type: STRING
- name: cnt
type: INT
update-mode: append
connector:
property-version: 1
type: kafka
version: universal
topic: Sink_TenMinPsgCnt -- 输出的topic名称
properties:
- key: zookeeper.connect
value: zookeeper:2181
- key: bootstrap.servers
value: kafka:9092
- key: group.id
value: testGroup
format:
property-version: 1
type: json
schema: "ROW(cntStart STRING,cntEnd STRING,cnt INT)" -- 此处为输出的kafka的字段,中间的字段由sql加工别名转换为输出字段,注:字段个数,类型,顺序要与上面schema一摸一样
functions: -- 函数定义
- name: isInNYC
from: class
class: com.ververica.sql_training.udfs.IsInNYC
- name: toAreaId
from: class
class: com.ververica.sql_training.udfs.ToAreaId
- name: toCoords
from: class
class: com.ververica.sql_training.udfs.ToCoords
==============================================================================
**Execution properties**
==============================================================================
Execution properties allow for changing the behavior of a table program.
execution:
planner: blink # using the Blink planner
type: streaming # 'batch' or 'streaming' execution
result-mode: table # 'changelog' or 'table' presentation of results
parallelism: 1 # parallelism of the program
max-parallelism: 128 # maximum parallelism
min-idle-state-retention: 0 # minimum idle state retention in ms
max-idle-state-retention: 0 # maximum idle state retention in ms
==============================================================================
**Deployment properties**
==============================================================================
Deployment properties allow for describing the cluster to which table
programs are submitted to.
deployment:
type: standalone # only the 'standalone' deployment is supported
response-timeout: 5000 # general cluster communication timeout in ms
gateway-address: "" # (optional) address from cluster to gateway
gateway-port: 0 # (optional) port from cluster to gateway