对于flink的stream没有原生的distinct方法,下面使用ValueState实现一个:
SingleOutputStreamOperator<PO> newStream = stream.keyBy(
new KeySelector<PO, PO>() {
@Override
public PO getKey(PO value) throws Exception {
return value;
}
}).process(new KeyedProcessFunction<PO, PO, PO>() {
private ValueState<Integer> state;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
ValueStateDescriptor<Integer> desctiptor =
new ValueStateDescriptor<>("distinct", Integer.class);
state=getRuntimeContext().getState(desctiptor);
}
@Override
public void processElement(PO value,
KeyedProcessFunction<PO, PO, PO>.Context ctx,
Collector<PO> out) throws Exception {
if(state.value()==null) {
state.update(1);
out.collect(value);
}
}
});