state,状态管理可以用来物化流计算的数据,做聚合和容错。现在可以物化到jobmanager内存中和文件系统中,rocketDB现在正在开发中。有四种类型的state,通过StreamingRuntimeContext.get对应的state。
package com.alibaba.flink.train.streaming;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.FoldingState;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ReducingState;
import org.apache.flink.api.common.state.State;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
/**
*
* @author fuling.lgz
*
*/
public class SumSingleValueStateFunction extends
RichFlatMapFunction<Tuple2<String, Integer>, Tuple2<String, Integer>> {
State state;// 下面四种类似的状态
private transient ValueState<Tuple2<Long, Long>> valueState;
FoldingState<String, String> foldingState;
ListState<String> listState;
ReducingState<String> reducingState;
@Override
public void flatMap(Tuple2<String, Integer> value,
Collector<Tuple2<String, Integer>> out) throws Exception {
Tuple2<Long, Long> curn = valueState.value();
curn.f0 += value.f1;
valueState.update(curn);
if (curn.f0 >= 10) {
out.collect(new Tuple2<String, Integer>(value.f0, curn.f0
.intValue()));
valueState.clear();
}
}
@Override
public void open(Configuration parameters) throws Exception {
TypeInformation<Tuple2<Long, Long>> clss = TypeInformation
.of(new TypeHint<Tuple2<Long, Long>>() {
});
ValueStateDescriptor<Tuple2<Long, Long>> valueDesc = new ValueStateDescriptor<Tuple2<Long, Long>>(
"saleValue", clss, Tuple2.of(0l, 0l));
valueState = getRuntimeContext().getState(valueDesc);
}
}
package com.alibaba.flink.train.streaming;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class HelloWorld {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment
.getExecutionEnvironment();
env.setParallelism(4);// 并发度
DataStream<String> dataStream = env
.readTextFile("D:/flinkdata/helloworld"); // 1:(flink storm
// )(hadoop hive)
dataStream = env.addSource(new MemSource());
dataStream
.flatMap(
new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String input,
Collector<Tuple2<String, Integer>> collector)
throws Exception {
String[] objs = input.split(" ");
for (String obj : objs) {
collector
.collect(new Tuple2<String, Integer>(
obj, 1));// (这里很关键,表示0位置是word,1的位置是1次数)
}
}
})// 2:(flink 1)(storm 1)
.keyBy(0)// 3:以第0个位置的值,做分区。
// .sum(1)// (flink:8)(storm:5),对第1个位置的值做sum的操作。
.flatMap(new SumSingleValueStateFunction())// 做单个key的sum
.addSink(new MemSink());
// .printToErr();
env.execute();// 启动任务
while (true) {
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.state;
import org.apache.flink.annotation.PublicEvolving;
/**
* Interface that different types of partitioned state must implement.
*
* <p>The state is only accessible by functions applied on a KeyedDataStream. The key is
* automatically supplied by the system, so the function always sees the value mapped to the
* key of the current element. That way, the system can handle stream and state partitioning
* consistently together.
*/
@PublicEvolving
public interface State {
/**
* Removes the value mapped under the current key.
*/
void clear();
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.state;
import org.apache.flink.annotation.PublicEvolving;
import java.io.IOException;
/**
* {@link State} interface for partitioned single-value state. The value can be retrieved or
* updated.
*
* <p>The state is accessed and modified by user functions, and checkpointed consistently
* by the system as part of the distributed snapshots.
*
* <p>The state is only accessible by functions applied on a KeyedDataStream. The key is
* automatically supplied by the system, so the function always sees the value mapped to the
* key of the current element. That way, the system can handle stream and state partitioning
* consistently together.
*
* @param <T> Type of the value in the state.
*/
@PublicEvolving
public interface ValueState<T> extends State, OperatorState<T> {
/**
* Returns the current value for the state. When the state is not
* partitioned the returned value is the same for all inputs in a given
* operator instance. If state partitioning is applied, the value returned
* depends on the current operator input, as the operator maintains an
* independent state for each partition.
*
* @return The operator state value corresponding to the current input.
*
* @throws IOException Thrown if the system cannot access the state.
*/
T value() throws IOException;
/**
* Updates the operator state accessible by {@link #value()} to the given
* value. The next time {@link #value()} is called (for the same state
* partition) the returned state will represent the updated value. When a
* partitioned state is updated with null, the state for the current key
* will be removed and the default value is returned on the next access.
*
* @param value
* The new value for the state.
*
* @throws IOException Thrown if the system cannot access the state.
*/
void update(T value) throws IOException;
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENS

最低0.47元/天 解锁文章
3297

被折叠的 条评论
为什么被折叠?



