Flink-Table StreamTableEnvironment实践编程(四)下

七、实现UpsertStreamTableSink操作

import com.springk.flink.bean.StudentInfo;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.util.Collector;

public class TableStreamFlinkStudentUpsertTest {
    
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(env);
        
        //source,这里使用socket连接获取数据
        DataStreamSource<String> text = env.socketTextStream("127.0.0.1", 9999, "\n");
        
        //处理输入数据流,转换为StudentInfo类型,方便后续处理
        SingleOutputStreamOperator<StudentInfo> dataStreamStudent = text.flatMap(new FlatMapFunction<String, StudentInfo>() {
            @Override
            public void flatMap(String s, Collector<StudentInfo> collector){
                String infos[] = s.split(",");
                if(StringUtils.isNotBlank(s) && infos.length==4){
                    StudentInfo studentInfo = new StudentInfo();
                    studentInfo.setName(infos[0]);
                    studentInfo.setSex(infos[1]);
                    studentInfo.setCourse(infos[2]);
                    studentInfo.setScore(Float.parseFloat(infos[3]));
                    collector.collect(studentInfo);
                }
            }
        });
        
        //注册dataStreamStudent流到表中,表名为:studentInfo
        streamTableEnvironment.registerDataStream("studentInfo",dataStreamStudent,"name,sex,course,score");
        
        //注册UpsertStreamTableSink
        Table rTable = streamTableEnvironment.sqlQuery("select name,sum(score) as sum_total_score from studentInfo group by name");
        streamTableEnvironment.registerTableSink("upsertStream", new StreamUpsertStreamTableSink(rTable.getSchema()));
        rTable.insertInto("upsertStream");
        
        env.execute("studentScoreAnalyse");
    }
 }

        

import com.springk.flink.bean.StudentScoreResult;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.sinks.TableSink;
import org.apache.flink.table.sinks.UpsertStreamTableSink;

public class StreamUpsertStreamTableSink implements UpsertStreamTableSink<StudentScoreResult> {

    private TableSchema tableSchema;

    public StreamUpsertStreamTableSink(TableSchema schema) {
        this.tableSchema = schema;
    }

    @Override
    public void setKeyFields(String[] keys) {

    }

    @Override
    public void setIsAppendOnly(Boolean isAppendOnly) {

    }

    @Override
    public TypeInformation<StudentScoreResult> getRecordType() {
        return TypeInformation.of(new TypeHint<StudentScoreResult>(){});
    }

    @Override
    public void emitDataStream(DataStream<Tuple2<Boolean, StudentScoreResult>> dataStream) {

    }

    @Override
    public DataStreamSink<?> consumeDataStream(DataStream<Tuple2<Boolean, StudentScoreResult>> dataStream) {
        return dataStream.addSink(new SinkFunction<Tuple2<Boolean, StudentScoreResult>>() {
            @Override
            public void invoke(Tuple2<Boolean, StudentScoreResult> value){
                System.out.println(value);
            }
        });
    }

    @Override
    public TableSchema getTableSchema() {
        return tableSchema;
    }

    @Override
    public TableSink<Tuple2<Boolean, StudentScoreResult>> configure(String[] strings, TypeInformation<?>[] typeInformations) {
        return null;
    }
}

    命令行中输入如下:
        
    打印输出内容如下:
        
    每次更新只有一条数据输出,

    RetractStreamTableSink中: Insert被编码成一条Add消息; Delete被编码成一条Retract消息;Update被编码成两条消息(先是一条Retract消息,再是一条Add消息),即先删除再增加。
    UpsertStreamTableSink: Insert和Update均被编码成一条消息(Upsert消息); Delete被编码成一条Delete消息。
    UpsertStreamTableSink和RetractStreamTableSink最大的不同在于Update编码成一条消息,效率上比RetractStreamTableSink高。

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值