Flink 1.12 JOINS<文件连接器 filesystem>

1 Left Join 普通
Orders.txt

2020-04-15 08:05,4.00,supplier1
2020-04-15 08:06,4.00,supplier2
2020-04-15 08:07,2.00,supplier1
2020-04-15 08:08,2.00,supplier3
2020-04-15 08:09,5.00,supplier4
2020-04-15 08:11,2.00,supplier3
2020-04-15 08:13,1.00,supplier1
2020-04-15 08:15,3.00,supplier2
2020-04-15 08:17,6.00,supplier5
2020-04-15 08:25,6.00,supplier5
2020-04-15 08:30,6.00,supplier5

Product.txt

supplier1,A,S
supplier2,B,S
supplier3,C,N
supplier4,D,S
supplier5,E,N

package com.cn.sql;

import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

public class SqlJoins {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment Tenv = StreamTableEnvironment.create(env);

        Tenv.executeSql(
                "CREATE TABLE Orders (" +
                        "  user_id STRING,\n" +
                        "  order_amount STRING,\n" +
                        "  id STRING\n" +
                        ") \n " +
                        "WITH (\n" +
                        "  'connector'='filesystem',\n" +
                        "  'path'='file:///E:/大数据相关-学员参考/flinkdemo/src/main/resources/o1/Orders.txt',\n" +
                        "  'format'='csv'\n" +
                        ")");

        Tenv.executeSql("CREATE TABLE Product( \n" +
                "id STRING,\n" +
                "S STRING ,\n" +
                "D STRING" +
                "\n)" +
                "WITH (\n" +
                "  'connector'='filesystem',\n" +
                "  'path'='file:///E:/大数据相关-学员参考/flinkdemo/src/main/resources/o2/Product.txt',\n" +
                "  'format'='csv'\n" +
                ")");
        Table table = Tenv.sqlQuery("SELECT * FROM Orders LEFT JOIN Product ON Orders.id=Product.id");
        DataStream<Tuple2<Boolean, Row>> tuple2DataStream = Tenv.toRetractStream(table, Row.class);
        tuple2DataStream.print("join");
        env.execute("test join");
    }
}

注意 : 这里需要注意的是如果是本地文件 那么 不需要指定分区 并且 路径的方式 也跟 HDFS不一样这里官网没说 可以参照代码 :

file:///E:/大数据相关-学员参考/flinkdemo/src/main/resources/o1/Orders.txt

而且这里 转化流的时候需要使用 toRetractStream 因为这里不是简单添加 而是需要去修改原值 并且 这里 需要设置 状态的过期时间 因为如果不设置 这个状态会一直存在等待 join 会造成积压

(true,2020-04-15 08:08,2.00,supplier3,null,null,null)
(true,2020-04-15 08:13,1.00,supplier1,null,null,null)
(true,2020-04-15 08:17,6.00,supplier5,supplier5,E,N)
(true,2020-04-15 08:15,3.00,supplier2,null,null,null)
(false,2020-04-15 08:13,1.00,supplier1,null,null,null)
(true,2020-04-15 08:25,6.00,supplier5,supplier5,E,N)
(true,2020-04-15 08:13,1.00,supplier1,supplier1,A,S)
(false,2020-04-15 08:15,3.00,supplier2,null,null,null)
(true,2020-04-15 08:15,3.00,supplier2,supplier2,B,S)
(true,2020-04-15 08:05,4.00,supplier1,supplier1,A,S)
(true,2020-04-15 08:06,4.00,supplier2,supplier2,B,S)
(true,2020-04-15 08:07,2.00,supplier1,supplier1,A,S)
(true,2020-04-15 08:11,2.00,supplier3,null,null,null)
(true,2020-04-15 08:09,5.00,supplier4,supplier4,D,S)
(true,2020-04-15 08:30,6.00,supplier5,supplier5,E,N)
(false,2020-04-15 08:11,2.00,supplier3,null,null,null)
(false,2020-04-15 08:08,2.00,supplier3,null,null,null)
(true,2020-04-15 08:11,2.00,supplier3,supplier3,C,N)
(true,2020-04-15 08:08,2.00,supplier3,supplier3,C,N)

这里 先是ture 之后 false 修改值有与之 JOIN 的 数据 后 修改true 更新数据 返回join 后的数据;

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值