workSpace:messageCenter->javaFlink
1.valueState
ValueState:这将保留一个可以更新和检索的值(作用域为输入元素的键,因此该操作看到的每个键可能会有一个值)。该值可以使用设置update(T)和使用检索 T value()。
1.1 java语言描述
valueState功能定义:对输入的数据数量统计,当数量达到2时,输出清零,之后继续统计。
package javaState.valuestate;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
public class CountWindowAverage extends RichFlatMapFunction<Tuple3<String, Long,String>, Tuple2<String, Long>> {
private transient ValueState<Tuple2<String, Long>> sum;
@Override
public void flatMap(Tuple3<String, Long,String> input, Collector<Tuple2<String, Long>> out) throws Exception {
// 访问状态值
Tuple2<String, Long> currentSum = sum.value();
// 更新计数
currentSum.f0 += 1;
// 添加输入值的第二个字段
currentSum.f1 += input.f1;
// 更新state
sum.update(currentSum);
if (currentSum.f1 >= 2) {
out.collect(new Tuple2<>(input.f0, currentSum.f1));
sum.clear();
}
}
@Override
public void open(Configuration config) {
ValueStateDescriptor<Tuple2<String, Long>> descriptor =
new ValueStateDescriptor<>(
"average", // state的名字
TypeInformation.of(new TypeHint<Tuple2<String, Long>>() {}), // information的类型
Tuple2.of(null, 0L)); // 状态的默认值(如果未设置任何内容)
sum = getRuntimeContext().getState(descriptor);
}
}
自定义MapFunction:将输入的一条数据映射成Tuple3<String, Long, String>,f0为原始数据,f1为方便统计,赋初始值1,f2为附加字段,为方便keyby添加。
package javaState.valuestate;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
public class MySelfFunction implements MapFunction{
@Override
public Tuple3 map(Object value) throws Exception {
Tuple3<String, Long, String> tuple3 = new Tuple3<>();
tuple3.f0 = (String) value;
tuple3.f1 = 1l;
tuple3.f2 = "a";
return tuple3;
}
}
=flink程序:功能:读取kafka数据,进行有状态的统计,计数为2时,打印输出。
package javaState.valuestate;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import java.util.Properties;
public class ValueStateDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers","CentOS:9092");
properties.setProperty("group.id", "aa");
FlinkKafkaConsumer011<String> streamSource = new FlinkKafkaConsumer011<String>("topic", new SimpleStringSchema(), properties);
DataStreamSource<String> source = env.addSource(streamSource);
SingleOutputStreamOperator<Tuple3<String, Long, String>> process = source.process(new ProcessFunction<String, Tuple3<String, Long, String>>() {
@Override
public void processElement(String value, Context ctx, Collector<Tuple3<String, Long, String>> out) throws Exception {
out.collect(new MySelfFunction().map(value));
}
});
process.keyBy(2).flatMap(new CountWindowAverage()).print();
env.execute("test");
}
}
1.2 Scala版语言描述:
程序功能:进行wordCount统计,打印输出
var env=StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("centos",9999)
.flatMap(_.split("\\s+"))
.map((_,1))
.keyBy(0)
.map(new RichMapFunction[(String,Int),(String,Int)] {
var vs:ValueState[Int]=_
override def open(parameters: Configuration): Unit = {
val vsd=new ValueStateDescriptor[Int]("valueCount",createTypeInformation[Int])
vs=getRuntimeContext.getState[Int](vsd)
}
override def map(value: (String, Int)): (String, Int) = {
val histroyCount = vs.value()
val currentCount=histroyCount+value._2
vs.update(currentCount)
(value._1,currentCount)
}
}).print()
env.execute("wordcount")
2.reducingState
ReducingState:这将保留一个值,该值代表添加到状态的所有值的集合。介面与相似,ListState但使用新增的元素 add(T)会使用指定的简化为汇总ReduceFunction。
2.1 Java语言描述
reducingState功能定义:统计flink处理数据的条数
package javaState.reducingSatae;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.*;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
public class CountWindowAverage extends RichFlatMapFunction<Tuple3<String, Long,String>, Tuple2<String, Long>> {
private transient ReducingState<Tuple2<String, Long>> sum;
@Override
public void flatMap(Tuple3<String, Long,String> input, Collector<Tuple2<String, Long>> out) throws Exception {
// 访问状态值
Tuple2<String, Long> tuple2 = new Tuple2<>();
tuple2.f0 = input.f0;
tuple2.f1 = input.f1;
sum.add(tuple2);
out.collect(new Tuple2<>(input.f0,sum.get().f1));
}
@Override
public void open(Configuration config) {
ReducingStateDescriptor<Tuple2<String, Long>> descriptor =
new ReducingStateDescriptor<>(
"average",
new ReduceFunction<Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> reduce(Tuple2<String, Long> value1, Tuple2<String, Long> value2) throws Exception {
Tuple2<String, Long> tuple2 = new Tuple2<>();
tuple2.f0 = value1.f0;
tuple2.f1 = value1.f1 + value2.f1;
return tuple2;
}
},TypeInformation.of(new TypeHint<Tuple2<String, Long>>() {}));
sum = getRuntimeContext().getReducingState(descriptor);
}
}
自定义MapFunction:将输入的一条数据映射成Tuple3<String, Long, String>,f0为原始数据,f1为方便统计,赋初始值1,f2为附加字段,为方便keyby添加。
package javaState.reducingSatae;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
public class MySelfFunction implements MapFunction{
@Override
public Tuple3 map(Object value) throws Exception {
Tuple3<String, Long, String> tuple3 = new Tuple3<>();
tuple3.f0 = (String) value;
tuple3.f1 = 1l;
tuple3.f2 = "a";
return tuple3;
}
}
flink程序功能:读取kafka数据,将数据统计的条数和原始数据打印出来
package javaState.reducingSatae;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import java.util.Properties;
public class ReducingStateDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers","CentOS:9092");
properties.setProperty("group.id", "aa");
FlinkKafkaConsumer011<String> streamSource = new FlinkKafkaConsumer011<String>("topic", new SimpleStringSchema(), properties);
DataStreamSource<String> source = env.addSource(streamSource);
SingleOutputStreamOperator<Tuple3<String, Long, String>> process = source.process(new ProcessFunction<String, Tuple3<String, Long, String>>() {
@Override
public void processElement(String value, Context ctx, Collector<Tuple3<String, Long, String>> out) throws Exception {
out.collect(new MySelfFunction().map(value));
}
});
process.keyBy(2).flatMap(new CountWindowAverage()).print();
env.execute("test");
}
}
2.2 Scala语言描述
package com.reducingState.demo02
import org.apache.flink.api.common.functions.{ReduceFunction, RichMapFunction}
import org.apache.flink.api.common.state.{ReducingState, ReducingStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
object ReduciingState {
def main(args: Array[String]): Unit = {
val fsEnv = StreamExecutionEnvironment.getExecutionEnvironment
fsEnv.socketTextStream("CentOS",9999)
.flatMap(_.split("\\s+"))
.map((_,1))
.keyBy(0)
.map(new RichMapFunction[(String,Int),(String,Int)] {
var reducingState:ReducingState[(String,Int)] = _
override def open(parameters: Configuration): Unit = {
val rsd = new ReducingStateDescriptor[(String,Int)]("reducingState",new ReduceFunction[(String, Int)] {
override def reduce(t: (String, Int), t1: (String, Int)): (String, Int) = {
(t._1,t._2+t1._2)
}
},createTypeInformation[(String,Int)])
reducingState = getRuntimeContext.getReducingState(rsd)
}
override def map(in: (String, Int)): (String, Int) = {
reducingState.add(in)
reducingState.get()
}
}).print()
fsEnv.execute("aaaaaaa")
}
}
3.ListState
ListState:这保留了元素列表。您可以追加元素并检索Iterable 所有当前存储的元素。使用add(T)或添加元素addAll(List),可以使用检索Iterable Iterable get()。您还可以使用以下方法覆盖现有列表update(List)
3.1 Java语言
listState功能定义:统计flink处理数据的条数
package javaState.listState;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.*;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
import java.util.Iterator;
public class CountWindowAverage extends RichFlatMapFunction<Tuple3<String, Long,String>, Tuple2<String, Long>> {
private transient ListState<Long> sum;
@Override
public void flatMap(Tuple3<String, Long,String> input, Collector<Tuple2<String, Long>> out) throws Exception {
// 访问状态值
sum.add(input.f1);
Tuple2<String, Long> tuple2 = new Tuple2<>();
tuple2.f0 = input.f0;
Iterator<Long> iterator = sum.get().iterator();
Long l = 0l;
while (iterator.hasNext()){
l = iterator.next();
}
tuple2.f1 = l;
out.collect(tuple2);
}
@Override
public void open(Configuration config) {
ListStateDescriptor<Long> descriptor =
new ListStateDescriptor<>(
"average",
TypeInformation.of(new TypeHint<Long>() {})
);
sum = getRuntimeContext().getListState(descriptor);
}
}
自定义MapFunction:将输入的一条数据映射成Tuple3<String, Long, String>,f0为原始数据,f1为方便统计,赋初始值1,f2为附加字段,为方便keyby添加。
package javaState.listState;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
public class MySelfFunction implements MapFunction{
@Override
public Tuple3 map(Object value) throws Exception {
Tuple3<String, Long, String> tuple3 = new Tuple3<>();
tuple3.f0 = (String) value;
tuple3.f1 = 1l;
tuple3.f2 = "a";
return tuple3;
}
}
flink程序功能:读取kafka数据,将数据统计的条数和原始数据打印出来
package javaState.listState;
import javaState.reducingSatae.CountWindowAverage;
import javaState.reducingSatae.MySelfFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import java.util.Properties;
public class ReducingStateDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers","CentOS:9092");
properties.setProperty("group.id", "aa");
FlinkKafkaConsumer011<String> streamSource = new FlinkKafkaConsumer011<String>("topic", new SimpleStringSchema(), properties);
DataStreamSource<String> source = env.addSource(streamSource);
SingleOutputStreamOperator<Tuple3<String, Long, String>> process = source.process(new ProcessFunction<String, Tuple3<String, Long, String>>() {
@Override
public void processElement(String value, Context ctx, Collector<Tuple3<String, Long, String>> out) throws Exception {
out.collect(new MySelfFunction().map(value));
}
});
process.keyBy(2).flatMap(new CountWindowAverage()).print();
env.execute("test");
}
}
3.2 Scala语言
package com.baizhi.demo02
import org.apache.flink.api.common.functions.RichMapFunction
import org.apache.flink.api.common.state.{ListState, ListStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
import scala.collection.JavaConversions._
object listState {
def main(args: Array[String]): Unit = {
val fsEnv = StreamExecutionEnvironment.getExecutionEnvironment
fsEnv.socketTextStream("CentOS",9999)
.flatMap(_.split("\\s+"))
.map((_,1))
.keyBy(0)
.map(new RichMapFunction[(String,Int),(String,Int)] {
var listState:ListState[Int] = _
override def open(parameters: Configuration): Unit = {
val lsd = new ListStateDescriptor[Int]("listState",createTypeInformation[Int])
listState = getRuntimeContext.getListState(lsd)
}
override def map(in: (String, Int)): (String, Int) = {
listState.add(in._2)
(in._1,listState.get().sum)
}
}).print()
fsEnv.execute("aaaaa")
}
}
4.FoldState
FoldingState<T, ACC>:这将保留一个值,该值代表添加到状态的所有值的集合。与相反ReducingState,聚合类型可能不同于添加到状态中的元素的类型。该接口类似于,ListState但是使用添加的元素add(T)使用指定的折叠为一个集合FoldFunction。
4.1 Java语言
FoldState功能描述:统计flink处理数据的条数
package javaState.foldState;
import org.apache.flink.api.common.functions.FoldFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.FoldingState;
import org.apache.flink.api.common.state.FoldingStateDescriptor;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
import java.util.Iterator;
public class CountWindowAverage extends RichFlatMapFunction<Tuple3<String, Long,String>, Tuple2<String, Long>> {
private transient FoldingState<Long,Long> sum;
@Override
public void flatMap(Tuple3<String, Long,String> input, Collector<Tuple2<String, Long>> out) throws Exception {
// 访问状态值
sum.add(input.f1);
out.collect(new Tuple2<>(input.f0,sum.get()));
}
@Override
public void open(Configuration config) {
FoldingStateDescriptor<Long,Long> descriptor =
new FoldingStateDescriptor<>(
"average", 0l,
new FoldFunction<Long, Long>() {
@Override
public Long fold(Long accumulator, Long value) throws Exception {
return accumulator+value;
}
}, TypeInformation.of(new TypeHint<Long>() {})
);
sum = getRuntimeContext().getFoldingState(descriptor);
}
}
自定义MapFunction:将输入的一条数据映射成Tuple3<String, Long, String>,f0为原始数据,f1为方便统计,赋初始值1,f2为附加字段,为方便keyby添加。
package javaState.listState;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
public class MySelfFunction implements MapFunction{
@Override
public Tuple3 map(Object value) throws Exception {
Tuple3<String, Long, String> tuple3 = new Tuple3<>();
tuple3.f0 = (String) value;
tuple3.f1 = 1l;
tuple3.f2 = "a";
return tuple3;
}
}
flink程序功能:读取kafka数据,将数据统计的条数和原始数据打印出来
package javaState.listState;
import javaState.reducingSatae.CountWindowAverage;
import javaState.reducingSatae.MySelfFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import java.util.Properties;
public class ReducingStateDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers","CentOS:9092");
properties.setProperty("group.id", "aa");
FlinkKafkaConsumer011<String> streamSource = new FlinkKafkaConsumer011<String>("topic", new SimpleStringSchema(), properties);
DataStreamSource<String> source = env.addSource(streamSource);
SingleOutputStreamOperator<Tuple3<String, Long, String>> process = source.process(new ProcessFunction<String, Tuple3<String, Long, String>>() {
@Override
public void processElement(String value, Context ctx, Collector<Tuple3<String, Long, String>> out) throws Exception {
out.collect(new MySelfFunction().map(value));
}
});
process.keyBy(2).flatMap(new CountWindowAverage()).print();
env.execute("test");
}
}
4.2 Scala语言
package com.baizhi.demo02
import org.apache.flink.api.common.functions.{FoldFunction, RichMapFunction}
import org.apache.flink.api.common.state.{FoldingState, FoldingStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
import scala.collection.JavaConverters._
import scala.collection.JavaConversions._
object FoldState {
def main(args: Array[String]): Unit = {
val fsEnv = StreamExecutionEnvironment.getExecutionEnvironment
fsEnv.socketTextStream("CentOS",9999)
.flatMap(_.split("\\s+"))
.map((_,1))
.keyBy(0)
.map(new RichMapFunction[(String,Int),(String,Int)] {
var foldState:FoldingState[Int,Int] =_
override def open(parameters: Configuration): Unit = {
val fsd = new FoldingStateDescriptor[Int,Int]("foldingState",0,new FoldFunction[Int,Int] {
override def fold(t: Int, o: Int): Int = {
t + o
}
},createTypeInformation[Int])
foldState = getRuntimeContext.getFoldingState(fsd)
}
override def map(in: (String, Int)): (String, Int) = {
foldState.add(in._2)
(in._1,foldState.get())
}
}).print()
fsEnv.execute("aaaa")
}
}
5.MapState
MapState<UK, UV>:这将保留一个映射列表。您可以将键值对放入状态,并检索Iterable所有当前存储的映射。使用put(UK, UV)或 添加映射putAll(Map<UK, UV>)。可以使用检索与用户密钥关联的值get(UK)。对于映射,键和值可迭代视图可以使用被检索entries(),keys()并values()分别。您还可以isEmpty()用来检查此映射是否包含任何键值映射。
5.1 Java语言
MapState功能描述:统计flink处理数据的条数
package javaState.mapstate;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.*;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
public class CountWindowAverage extends RichFlatMapFunction<Tuple3<String, Long,String>, Tuple2<String, Long>> {
private transient MapState<String, Long> sum;
@Override
public void flatMap(Tuple3<String, Long,String> input, Collector<Tuple2<String, Long>> out) throws Exception {
// 访问状态值
Long currentSum = sum.get(input.f0);
Tuple2<String, Long> tuple2 = new Tuple2<>();
tuple2.f0 = input.f0;
tuple2.f1 = currentSum + input.f1;
out.collect(tuple2);
}
@Override
public void open(Configuration config) {
MapStateDescriptor<String, Long> descriptor =
new MapStateDescriptor<>(
"average"
,TypeInformation.of(new TypeHint<String>() {}),
TypeInformation.of(new TypeHint<Long>() {})
);
sum = getRuntimeContext().getMapState(descriptor);
}
}
自定义MapFunction:将输入的一条数据映射成Tuple3<String, Long, String>,f0为原始数据,f1为方便统计,赋初始值1,f2为附加字段,为方便keyby添加。
package javaState.listState;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
public class MySelfFunction implements MapFunction{
@Override
public Tuple3 map(Object value) throws Exception {
Tuple3<String, Long, String> tuple3 = new Tuple3<>();
tuple3.f0 = (String) value;
tuple3.f1 = 1l;
tuple3.f2 = "a";
return tuple3;
}
}
flink程序功能:读取kafka数据,将数据统计的条数和原始数据打印出来
package javaState.listState;
import javaState.reducingSatae.CountWindowAverage;
import javaState.reducingSatae.MySelfFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import java.util.Properties;
public class ReducingStateDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers","CentOS:9092");
properties.setProperty("group.id", "aa");
FlinkKafkaConsumer011<String> streamSource = new FlinkKafkaConsumer011<String>("topic", new SimpleStringSchema(), properties);
DataStreamSource<String> source = env.addSource(streamSource);
SingleOutputStreamOperator<Tuple3<String, Long, String>> process = source.process(new ProcessFunction<String, Tuple3<String, Long, String>>() {
@Override
public void processElement(String value, Context ctx, Collector<Tuple3<String, Long, String>> out) throws Exception {
out.collect(new MySelfFunction().map(value));
}
});
process.keyBy(2).flatMap(new CountWindowAverage()).print();
env.execute("test");
}
}
5.2 Scala语言
var env=StreamExecutionEnvironment.getExecutionEnvironment
//001 zs 202.15.10.12 日本 2019-10-10
env.socketTextStream("centos",9999)
.map(_.split("\\s+"))
.map(ts=>Login(ts(0),ts(1),ts(2),ts(3),ts(4)))
.keyBy("id","name")
.map(new RichMapFunction[Login,String] {
var vs:MapState[String,String]=_
override def open(parameters: Configuration): Unit = {
val msd=new MapStateDescriptor[String,String]("mapstate",createTypeInformation[String],createTypeInformation[String])
vs=getRuntimeContext.getMapState(msd)
}
override def map(value: Login): String = {
println("历史登录")
for(k<- vs.keys().asScala){
println(k+" "+vs.get(k))
}
var result=""
if(vs.keys().iterator().asScala.isEmpty){
result="ok"
}else{
if(!value.city.equalsIgnoreCase(vs.get("city"))){
result="error"
}else{
result="ok"
}
}
vs.put("ip",value.ip)
vs.put("city",value.city)
vs.put("loginTime",value.loginTime)
result
}
}).print()
env.execute("wordcount")
6.AggregatingState
AggregatingState<IN, OUT>:这将保留一个值,该值代表添加到状态的所有值的集合。与相反ReducingState,聚合类型可能不同于添加到状态中的元素的类型。该接口与for相同,ListState但是使用添加的元素add(IN)是使用指定的聚合的AggregateFunction。
6.1 Java语言
AggregatingState功能描述:统计flink处理数据的条数
package javaState.aggregatingState;
import com.sun.org.apache.bcel.internal.generic.NEW;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.AggregatingState;
import org.apache.flink.api.common.state.AggregatingStateDescriptor;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
public class CountWindowAverage extends RichFlatMapFunction<Tuple3<String, Long,String>, Tuple2<String, Long>> {
private transient AggregatingState<Long, Long> sum;
@Override
public void flatMap(Tuple3<String, Long,String> input, Collector<Tuple2<String, Long>> out) throws Exception {
// 访问状态值
sum.add(input.f1);
Tuple2<String, Long> tuple2 = new Tuple2<>();
tuple2.f0 = input.f0;
tuple2.f1 = sum.get();
out.collect(tuple2);
}
@Override
public void open(Configuration config) {
AggregatingStateDescriptor<Long,Tuple2<Long, Long>,Long> descriptor =
new AggregatingStateDescriptor<>(
"average"
, new AggregateFunction<Long, Tuple2<Long, Long>, Long>() {
@Override
public Tuple2<Long, Long> createAccumulator() {
Tuple2<Long, Long> tuple2 = new Tuple2<>();
return tuple2;
}
@Override
public Tuple2<Long, Long> add(Long value, Tuple2<Long, Long> accumulator) {
Tuple2<Long, Long> tuple2 = new Tuple2<>();
tuple2.f0 = accumulator.f0 +value;
tuple2.f1 = accumulator.f1 + 1;
return tuple2;
}
@Override
public Long getResult(Tuple2<Long, Long> accumulator) {
return accumulator.f0 / accumulator.f1;
}
@Override
public Tuple2<Long, Long> merge(Tuple2<Long, Long> a, Tuple2<Long, Long> b) {
Tuple2<Long, Long> tuple2 = new Tuple2<>();
tuple2.f0 = a.f0 + b.f0;
tuple2.f1 = a.f1 + b.f1;
return tuple2;
}
}
, TypeInformation.of(new TypeHint<Tuple2<Long, Long>>() {
})
);
sum = getRuntimeContext().getAggregatingState(descriptor);
}
}
自定义MapFunction:将输入的一条数据映射成Tuple3<String, Long, String>,f0为原始数据,f1为方便统计,赋初始值1,f2为附加字段,为方便keyby添加。
package javaState.listState;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple3;
public class MySelfFunction implements MapFunction{
@Override
public Tuple3 map(Object value) throws Exception {
Tuple3<String, Long, String> tuple3 = new Tuple3<>();
tuple3.f0 = (String) value;
tuple3.f1 = 1l;
tuple3.f2 = "a";
return tuple3;
}
}
flink程序功能:读取kafka数据,将数据统计的条数和原始数据打印出来
package javaState.listState;
import javaState.reducingSatae.CountWindowAverage;
import javaState.reducingSatae.MySelfFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import java.util.Properties;
public class ReducingStateDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Properties properties = new Properties();
properties.setProperty("bootstrap.servers","CentOS:9092");
properties.setProperty("group.id", "aa");
FlinkKafkaConsumer011<String> streamSource = new FlinkKafkaConsumer011<String>("topic", new SimpleStringSchema(), properties);
DataStreamSource<String> source = env.addSource(streamSource);
SingleOutputStreamOperator<Tuple3<String, Long, String>> process = source.process(new ProcessFunction<String, Tuple3<String, Long, String>>() {
@Override
public void processElement(String value, Context ctx, Collector<Tuple3<String, Long, String>> out) throws Exception {
out.collect(new MySelfFunction().map(value));
}
});
process.keyBy(2).flatMap(new CountWindowAverage()).print();
env.execute("test");
}
}
6.2 Scala语言
var env=StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("centos",9999)
.map(_.split("\\s+"))
.map(ts=>(ts(0),ts(1).toInt))
.keyBy(0)
.map(new RichMapFunction[(String,Int),(String,Double)] {
var vs:AggregatingState[Int,Double]=_
override def open(parameters: Configuration): Unit = {
val vsd=new AggregatingStateDescriptor[Int,(Double,Int),Double]("avgCount",new AggregateFunction[Int,(Double,Int),Double] {
override def createAccumulator(): (Double, Int) = {
(0.0,0)
}
override def add(value: Int, accumulator: (Double, Int)): (Double, Int) = {
(accumulator._1+value,accumulator._2+1)
}
override def merge(a: (Double, Int), b: (Double, Int)): (Double, Int) = {
(a._1+b._1,a._2+b._2)
}
override def getResult(accumulator: (Double, Int)): Double = {
accumulator._1/accumulator._2
}
},createTypeInformation[(Double,Int)])
vs=getRuntimeContext.getAggregatingState(vsd)
}
override def map(value: (String, Int)): (String, Double) = {
vs.add(value._2)
val avgCount=vs.get()
(value._1,avgCount)
}
}).print()
env.execute("wordcount")