一、FlinkSQL输出表:输出到Kafka
### --- 输出到kafka
~~~ # 往kafka上输出表
DataStreamSource<String> data = env.addSource(new SourceFunction<String>
() {
@Override
public void run(SourceContext<String> ctx) throws Exception {
int num = 0;
while (true) {
num++;
ctx.collect("name"+num);
Thread.sleep(1000);
}
}
@Override
public void cancel() {
}
});
Table name = tEnv.fromDataStream(data, $("name"));
ConnectTableDescriptor descriptor = tEnv.connect(
// declare the external system to connect to
new Kafka()
.version("universal")
.topic("animal")
.startFromEarliest()
.property("bootstrap.servers", "hdp-2:9092")
)
// declare a format for this system
.withFormat(
// new Json()
new Csv()
)
// declare the schema of the table
.withSchema(
new Schema()
// .field("rowtime", DataTypes.TIMESTAMP(3))
// .rowtime(new Rowtime()
// .timestampsFromField("timestamp")
// .watermarksPeriodicBounded(60000)
// )
// .field("user", DataTypes.BIGINT())
.field("message", DataTypes.STRING())
);
// create a table with given name
descriptor.createTemporaryTable("MyUserTable");
name.executeInsert("MyUserTable");
### --- 输出到mysql (了解)
CREATE TABLE MyUserTable (
...
) WITH (
'connector.type' = 'jdbc',
-- required: specify this table type is jdbc 'connector.url' = 'jdbc:mysql://localhost:3306/flink-test',
-- required: JDBCDB url 'connector.table' = 'jdbc_table_name',
-- required: jdbc table name
-- optional: the class name of the JDBC driver to use to connect to this URL.
-- If not set, it will automatically be derived from the URL.'connector.driver' = 'com.mysql.jdbc.Driver',
-- optional: jdbc user name and password 'connector.username' = 'name', 'connector.password' = 'password',
-- **followings are scan options, optional, used when reading from a table**
-- optional: SQL query / prepared statement.
-- If set, this will take precedence over the 'connector.table' setting 'connector.read.query' = 'SELECT * FROM sometable',
-- These options must all be specified if any of them is specified. Inaddition,
-- partition.num must be specified. They describe how to partition the tablewhen
-- reading in parallel from multiple tasks. partition.column must be anumeric,
-- date, or timestamp column from the table in question. Notice that lowerBound and
-- upperBound are just used to decide the partition stride, not for filtering the
-- rows in table. So all rows in the table will be partitioned and returned.'connector.read.partition.column' = 'column_name',
-- optional: the column name used for partitioning the input.'connector.read.partition.num' = '50',
-- optional: the number of partitions.'connector.read.partition.lower-bound' = '500',
-- optional: the smallest value of the first partition.'connector.read.partition.upper-bound' = '1000',
-- optional: the largest value of the last partition.
-- optional, Gives the reader a hint as to the number of rows that should be fetched
-- from the database when reading per round trip. If the value specified is zero, then
-- the hint is ignored. The default value is zero. 'connector.read.fetch-size' = '100',
-- **followings are lookup options, optional, used in temporary join**
-- optional, max number of rows of lookup cache, over this value, the oldest rows will
-- be eliminated. "cache.max-rows" and "cache.ttl" options must all be specified if any
-- of them is specified. Cache is not enabled as default.'connector.lookup.cache.max-rows' = '5000',
-- optional, the max time to live for each rows in lookup cache, over this time, the oldest rows
-- will be expired. "cache.max-rows" and "cache.ttl" options must all be specified if any of
-- them is specified. Cache is not enabled as default.'connector.lookup.cache.ttl' = '10s','connector.lookup.max-retries' = '3',
-- optional, max retry times if lookup database failed
-- **followings are sink options, optional, used when writing into table**
-- optional, flush max size (includes all append, upsert and delete records),
-- over this number of records, will flush data. The default value is "5000".'connector.write.flush.max-rows' = '5000',
-- optional, flush interval mills, over this time, asynchronous threads will flush data.
-- The default value is "0s", which means no asynchronous flush thread will be scheduled.'connector.write.flush.interval' = '2s',
-- optional, max retry times if writing records to database failed 'connector.write.max-retries' = '3'
)
二、编程代码实现
### --- 编程代码实现:输出表到kafka
package com.yanqi.tableql;
import org.apache.flink.api.java.tu