state,状态管理可以用来物化流计算的数据,做聚合和容错。现在可以物化到jobmanager内存中和文件系统中,rocketDB现在正在开发中。有四种类型的state,通过StreamingRuntimeContext.get对应的state。
package com.alibaba.flink.train.streaming;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.state.FoldingState;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ReducingState;
import org.apache.flink.api.common.state.State;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
/**
*
* @author fuling.lgz
*
*/
public class SumSingleValueStateFunction extends
RichFlatMapFunction<Tuple2<String, Integer>, Tuple2<String, Integer>> {
State state;// 下面四种类似的状态
private transient ValueState<Tuple2<Long, Long>> valueState;
FoldingState<String, String> foldingState;
ListState<String> listState;
ReducingState<String> reducingState;
@Override
public void flatMap(Tuple2<String, Integer> value,
Collector<Tuple2<String, Integer>> out) throws Exception {
Tuple2<Long, Long> curn = valueState.value();
curn.f0 += value.f1;
valueState.update(curn);
if (curn.f0 >= 10) {
out.collect(new Tuple2<String, Integer>(value.f0, curn.f0
.intValue()));
valueState.clear();
}
}
@Override
public void open(Configuration parameters) throws Exception {
TypeInformation<Tuple2<Long, Long>> clss = TypeInformation
.of(new TypeHint<Tuple2<Long, Long>>() {
});
ValueStateDescriptor<Tuple2<Long, Long>> valueDesc = new ValueStateDescriptor<Tuple2<Long, Long>>(
"saleValue", clss, Tuple2.of(0l, 0l));
valueState = getRuntimeContext().getState(valueDesc);
}
}
package com.alibaba.flink.train.streaming;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class HelloWorld {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment
.getExecutionEnvironment();
env.setParallelism(4);// 并发度
DataStream<String> dataStream = env
.readTextFile("D:/flinkdata/helloworld"); // 1:(flink storm
// )(hadoop hive)
dataStream = env.addSource(new MemSource());
dataStream
.flatMap(
new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String input,
Collector<Tuple2<String, Integer>> collector)
throws Exception {
String[] objs = input.split(" ");
for (String obj : objs) {
collector
.collect(new Tuple2<String, Integer>(
obj, 1));// (这里很关键,表示0位置是word,1的位置是1次数)
}
}
})// 2:(flink 1)(storm 1)
.keyBy(0)// 3:以第0个位置的值,做分区。
// .sum(1)// (flink:8)(storm:5),对第1个位置的值做sum的操作。
.flatMap(new SumSingleValueStateFunction())// 做单个key的sum
.addSink(new MemSink());
// .printToErr();
env.execute();// 启动任务
while (true) {
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.state;
import org.apache.flink.annotation.PublicEvolving;
/**
* Interface that different types of partitioned state must implement.
*
* <p>The state is only accessible by functions applied on a KeyedDataStream. The key is
* automatically supplied by the system, so the function always sees the value mapped to the
* key of the current element. That way, the system can handle stream and state partitioning
* consistently together.
*/
@PublicEvolving
public interface State {
/**
* Removes the value mapped under the current key.
*/
void clear();
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.state;
import org.apache.flink.annotation.PublicEvolving;
import java.io.IOException;
/**
* {@link State} interface for partitioned single-value state. The value can be retrieved or
* updated.
*
* <p>The state is accessed and modified by user functions, and checkpointed consistently
* by the system as part of the distributed snapshots.
*
* <p>The state is only accessible by functions applied on a KeyedDataStream. The key is
* automatically supplied by the system, so the function always sees the value mapped to the
* key of the current element. That way, the system can handle stream and state partitioning
* consistently together.
*
* @param <T> Type of the value in the state.
*/
@PublicEvolving
public interface ValueState<T> extends State, OperatorState<T> {
/**
* Returns the current value for the state. When the state is not
* partitioned the returned value is the same for all inputs in a given
* operator instance. If state partitioning is applied, the value returned
* depends on the current operator input, as the operator maintains an
* independent state for each partition.
*
* @return The operator state value corresponding to the current input.
*
* @throws IOException Thrown if the system cannot access the state.
*/
T value() throws IOException;
/**
* Updates the operator state accessible by {@link #value()} to the given
* value. The next time {@link #value()} is called (for the same state
* partition) the returned state will represent the updated value. When a
* partitioned state is updated with null, the state for the current key
* will be removed and the default value is returned on the next access.
*
* @param value
* The new value for the state.
*
* @throws IOException Thrown if the system cannot access the state.
*/
void update(T value) throws IOException;
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.api.operators;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.accumulators.Accumulator;
import org.apache.flink.api.common.functions.BroadcastVariableInitializer;
import org.apache.flink.api.common.functions.util.AbstractRuntimeUDFContext;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.OperatorState;
import org.apache.flink.api.common.state.ReducingState;
import org.apache.flink.api.common.state.ReducingStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.runtime.execution.Environment;
import org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.graph.StreamConfig;
import org.apache.flink.streaming.runtime.operators.Triggerable;
import java.util.List;
import java.util.Map;
import static java.util.Objects.requireNonNull;
/**
* Implementation of the {@link org.apache.flink.api.common.functions.RuntimeContext},
* for streaming operators.
*/
@PublicEvolving
public class StreamingRuntimeContext extends AbstractRuntimeUDFContext {
/** The operator to which this function belongs */
private final AbstractStreamOperator<?> operator;
/** The task environment running the operator */
private final Environment taskEnvironment;
private final StreamConfig streamConfig;
public StreamingRuntimeContext(AbstractStreamOperator<?> operator,
Environment env, Map<String, Accumulator<?, ?>> accumulators) {
super(env.getTaskInfo(),
env.getUserClassLoader(),
operator.getExecutionConfig(),
accumulators,
env.getDistributedCacheEntries());
this.operator = operator;
this.taskEnvironment = env;
this.streamConfig = new StreamConfig(env.getTaskConfiguration());
}
// ------------------------------------------------------------------------
/**
* Returns the input split provider associated with the operator.
*
* @return The input split provider.
*/
public InputSplitProvider getInputSplitProvider() {
return taskEnvironment.getInputSplitProvider();
}
/**
* Register a timer callback. At the specified time the {@link Triggerable } will be invoked.
* This call is guaranteed to not happen concurrently with method calls on the operator.
*
* @param time The absolute time in milliseconds.
* @param target The target to be triggered.
*/
public void registerTimer(long time, Triggerable target) {
operator.registerTimer(time, target);
}
// ------------------------------------------------------------------------
// broadcast variables
// ------------------------------------------------------------------------
@Override
public <RT> List<RT> getBroadcastVariable(String name) {
throw new UnsupportedOperationException("Broadcast variables can only be used in DataSet programs");
}
@Override
public <T, C> C getBroadcastVariableWithInitializer(String name, BroadcastVariableInitializer<T, C> initializer) {
throw new UnsupportedOperationException("Broadcast variables can only be used in DataSet programs");
}
// ------------------------------------------------------------------------
// key/value state
// ------------------------------------------------------------------------
@Override
public <T> ValueState<T> getState(ValueStateDescriptor<T> stateProperties) {
requireNonNull(stateProperties, "The state properties must not be null");
try {
stateProperties.initializeSerializerUnlessSet(getExecutionConfig());
return operator.getPartitionedState(stateProperties);
} catch (Exception e) {
throw new RuntimeException("Error while getting state", e);
}
}
@Override
public <T> ListState<T> getListState(ListStateDescriptor<T> stateProperties) {
requireNonNull(stateProperties, "The state properties must not be null");
try {
stateProperties.initializeSerializerUnlessSet(getExecutionConfig());
return operator.getPartitionedState(stateProperties);
} catch (Exception e) {
throw new RuntimeException("Error while getting state", e);
}
}
@Override
public <T> ReducingState<T> getReducingState(ReducingStateDescriptor<T> stateProperties) {
requireNonNull(stateProperties, "The state properties must not be null");
try {
stateProperties.initializeSerializerUnlessSet(getExecutionConfig());
return operator.getPartitionedState(stateProperties);
} catch (Exception e) {
throw new RuntimeException("Error while getting state", e);
}
}
@Override
@Deprecated
public <S> OperatorState<S> getKeyValueState(String name, Class<S> stateType, S defaultState) {
requireNonNull(stateType, "The state type class must not be null");
TypeInformation<S> typeInfo;
try {
typeInfo = TypeExtractor.getForClass(stateType);
}
catch (Exception e) {
throw new RuntimeException("Cannot analyze type '" + stateType.getName() +
"' from the class alone, due to generic type parameters. " +
"Please specify the TypeInformation directly.", e);
}
return getKeyValueState(name, typeInfo, defaultState);
}
@Override
@Deprecated
public <S> OperatorState<S> getKeyValueState(String name, TypeInformation<S> stateType, S defaultState) {
requireNonNull(name, "The name of the state must not be null");
requireNonNull(stateType, "The state type information must not be null");
ValueStateDescriptor<S> stateProps =
new ValueStateDescriptor<>(name, stateType, defaultState);
return getState(stateProps);
}
// ------------------ expose (read only) relevant information from the stream config -------- //
/**
* Returns true if checkpointing is enabled for the running job.
* @return true if checkpointing is enabled.
*/
public boolean isCheckpointingEnabled() {
return streamConfig.isCheckpointingEnabled();
}
/**
* Returns the checkpointing mode
* @return checkpointing mode
*/
public CheckpointingMode getCheckpointMode() {
return streamConfig.getCheckpointMode();
}
/**
* Returns the buffer timeout of the job
* @return buffer timeout (in milliseconds)
*/
public long getBufferTimeout() {
return streamConfig.getBufferTimeout();
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.state.FoldingState;
import org.apache.flink.api.common.state.FoldingStateDescriptor;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.ReducingState;
import org.apache.flink.api.common.state.ReducingStateDescriptor;
import org.apache.flink.api.common.state.State;
import org.apache.flink.api.common.state.StateBackend;
import org.apache.flink.api.common.state.StateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataInputViewStreamWrapper;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
import org.apache.flink.runtime.execution.Environment;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
/**
* A state backend defines how state is stored and snapshotted during checkpoints.
*/
public abstract class AbstractStateBackend implements java.io.Serializable {
private static final long serialVersionUID = 4620413814639220247L;
protected transient TypeSerializer<?> keySerializer;
protected transient ClassLoader userCodeClassLoader;
protected transient Object currentKey;
/** For efficient access in setCurrentKey() */
private transient KvState<?, ?, ?, ?, ?>[] keyValueStates;
/** So that we can give out state when the user uses the same key. */
private transient HashMap<String, KvState<?, ?, ?, ?, ?>> keyValueStatesByName;
/** For caching the last accessed partitioned state */
private transient String lastName;
@SuppressWarnings("rawtypes")
private transient KvState lastState;
// ------------------------------------------------------------------------
// initialization and cleanup
// ------------------------------------------------------------------------
/**
* This method is called by the task upon deployment to initialize the state backend for
* data for a specific job.
*
* @param env The {@link Environment} of the task that instantiated the state backend
* @param operatorIdentifier Unique identifier for naming states created by this backend
* @throws Exception Overwritten versions of this method may throw exceptions, in which
* case the job that uses the state backend is considered failed during
* deployment.
*/
public void initializeForJob(Environment env,
String operatorIdentifier,
TypeSerializer<?> keySerializer) throws Exception {
this.userCodeClassLoader = env.getUserClassLoader();
this.keySerializer = keySerializer;
}
/**
* Disposes all state associated with the current job.
*
* @throws Exception Exceptions may occur during disposal of the state and should be forwarded.
*/
public abstract void disposeAllStateForCurrentJob() throws Exception;
/**
* Closes the state backend, releasing all internal resources, but does not delete any persistent
* checkpoint data.
*
* @throws Exception Exceptions can be forwarded and will be logged by the system
*/
public abstract void close() throws Exception;
public void dispose() {
if (keyValueStates != null) {
for (KvState<?, ?, ?, ?, ?> state : keyValueStates) {
state.dispose();
}
}
}
// ------------------------------------------------------------------------
// key/value state
// ------------------------------------------------------------------------
/**
* Creates and returns a new {@link ValueState}.
*
* @param namespaceSerializer TypeSerializer for the state namespace.
* @param stateDesc The {@code StateDescriptor} that contains the name of the state.
*
* @param <N> The type of the namespace.
* @param <T> The type of the value that the {@code ValueState} can store.
*/
protected abstract <N, T> ValueState<T> createValueState(TypeSerializer<N> namespaceSerializer, ValueStateDescriptor<T> stateDesc) throws Exception;
/**
* Creates and returns a new {@link ListState}.
*
* @param namespaceSerializer TypeSerializer for the state namespace.
* @param stateDesc The {@code StateDescriptor} that contains the name of the state.
*
* @param <N> The type of the namespace.
* @param <T> The type of the values that the {@code ListState} can store.
*/
protected abstract <N, T> ListState<T> createListState(TypeSerializer<N> namespaceSerializer, ListStateDescriptor<T> stateDesc) throws Exception;
/**
* Creates and returns a new {@link ReducingState}.
*
* @param namespaceSerializer TypeSerializer for the state namespace.
* @param stateDesc The {@code StateDescriptor} that contains the name of the state.
*
* @param <N> The type of the namespace.
* @param <T> The type of the values that the {@code ListState} can store.
*/
protected abstract <N, T> ReducingState<T> createReducingState(TypeSerializer<N> namespaceSerializer, ReducingStateDescriptor<T> stateDesc) throws Exception;
/**
* Creates and returns a new {@link FoldingState}.
*
* @param namespaceSerializer TypeSerializer for the state namespace.
* @param stateDesc The {@code StateDescriptor} that contains the name of the state.
*
* @param <N> The type of the namespace.
* @param <T> Type of the values folded into the state
* @param <ACC> Type of the value in the state *
*/
protected abstract <N, T, ACC> FoldingState<T, ACC> createFoldingState(TypeSerializer<N> namespaceSerializer, FoldingStateDescriptor<T, ACC> stateDesc) throws Exception;
/**
* Sets the current key that is used for partitioned state.
* @param currentKey The current key.
*/
@SuppressWarnings({"unchecked", "rawtypes"})
public void setCurrentKey(Object currentKey) {
this.currentKey = currentKey;
if (keyValueStates != null) {
for (KvState kv : keyValueStates) {
kv.setCurrentKey(currentKey);
}
}
}
public Object getCurrentKey() {
return currentKey;
}
/**
* Creates or retrieves a partitioned state backed by this state backend.
*
* @param stateDescriptor The state identifier for the state. This contains name
* and can create a default state value.
* @param <N> The type of the namespace.
* @param <S> The type of the state.
*
* @return A new key/value state backed by this backend.
*
* @throws Exception Exceptions may occur during initialization of the state and should be forwarded.
*/
@SuppressWarnings({"rawtypes", "unchecked"})
public <N, S extends State> S getPartitionedState(final N namespace, final TypeSerializer<N> namespaceSerializer, final StateDescriptor<S, ?> stateDescriptor) throws Exception {
if (keySerializer == null) {
throw new Exception("State key serializer has not been configured in the config. " +
"This operation cannot use partitioned state.");
}
if (!stateDescriptor.isSerializerInitialized()) {
stateDescriptor.initializeSerializerUnlessSet(new ExecutionConfig());
}
if (keyValueStatesByName == null) {
keyValueStatesByName = new HashMap<>();
}
if (lastName != null && lastName.equals(stateDescriptor.getName())) {
lastState.setCurrentNamespace(namespace);
return (S) lastState;
}
KvState<?, ?, ?, ?, ?> previous = keyValueStatesByName.get(stateDescriptor.getName());
if (previous != null) {
lastState = previous;
lastState.setCurrentNamespace(namespace);
lastName = stateDescriptor.getName();
return (S) previous;
}
// create a new blank key/value state
S kvstate = stateDescriptor.bind(new StateBackend() {
@Override
public <T> ValueState<T> createValueState(ValueStateDescriptor<T> stateDesc) throws Exception {
return AbstractStateBackend.this.createValueState(namespaceSerializer, stateDesc);
}
@Override
public <T> ListState<T> createListState(ListStateDescriptor<T> stateDesc) throws Exception {
return AbstractStateBackend.this.createListState(namespaceSerializer, stateDesc);
}
@Override
public <T> ReducingState<T> createReducingState(ReducingStateDescriptor<T> stateDesc) throws Exception {
return AbstractStateBackend.this.createReducingState(namespaceSerializer, stateDesc);
}
@Override
public <T, ACC> FoldingState<T, ACC> createFoldingState(FoldingStateDescriptor<T, ACC> stateDesc) throws Exception {
return AbstractStateBackend.this.createFoldingState(namespaceSerializer, stateDesc);
}
});
keyValueStatesByName.put(stateDescriptor.getName(), (KvState) kvstate);
keyValueStates = keyValueStatesByName.values().toArray(new KvState[keyValueStatesByName.size()]);
lastName = stateDescriptor.getName();
lastState = (KvState<?, ?, ?, ?, ?>) kvstate;
((KvState) kvstate).setCurrentKey(currentKey);
((KvState) kvstate).setCurrentNamespace(namespace);
return kvstate;
}
public HashMap<String, KvStateSnapshot<?, ?, ?, ?, ?>> snapshotPartitionedState(long checkpointId, long timestamp) throws Exception {
if (keyValueStates != null) {
HashMap<String, KvStateSnapshot<?, ?, ?, ?, ?>> snapshots = new HashMap<>(keyValueStatesByName.size());
for (Map.Entry<String, KvState<?, ?, ?, ?, ?>> entry : keyValueStatesByName.entrySet()) {
KvStateSnapshot<?, ?, ?, ?, ?> snapshot = entry.getValue().snapshot(checkpointId, timestamp);
snapshots.put(entry.getKey(), snapshot);
}
return snapshots;
}
return null;
}
public void notifyOfCompletedCheckpoint(long checkpointId) throws Exception {
// We check whether the KvStates require notifications
if (keyValueStates != null) {
for (KvState<?, ?, ?, ?, ?> kvstate : keyValueStates) {
if (kvstate instanceof CheckpointListener) {
((CheckpointListener) kvstate).notifyCheckpointComplete(checkpointId);
}
}
}
}
/**
* Injects K/V state snapshots for lazy restore.
* @param keyValueStateSnapshots The Map of snapshots
*/
@SuppressWarnings("unchecked,rawtypes")
public final void injectKeyValueStateSnapshots(HashMap<String, KvStateSnapshot> keyValueStateSnapshots, long recoveryTimestamp) throws Exception {
if (keyValueStateSnapshots != null) {
if (keyValueStatesByName == null) {
keyValueStatesByName = new HashMap<>();
}
for (Map.Entry<String, KvStateSnapshot> state : keyValueStateSnapshots.entrySet()) {
KvState kvState = state.getValue().restoreState(this,
keySerializer,
userCodeClassLoader,
recoveryTimestamp);
keyValueStatesByName.put(state.getKey(), kvState);
}
keyValueStates = keyValueStatesByName.values().toArray(new KvState[keyValueStatesByName.size()]);
}
}
// ------------------------------------------------------------------------
// storing state for a checkpoint
// ------------------------------------------------------------------------
/**
* Creates an output stream that writes into the state of the given checkpoint. When the stream
* is closes, it returns a state handle that can retrieve the state back.
*
* @param checkpointID The ID of the checkpoint.
* @param timestamp The timestamp of the checkpoint.
* @return An output stream that writes state for the given checkpoint.
*
* @throws Exception Exceptions may occur while creating the stream and should be forwarded.
*/
public abstract CheckpointStateOutputStream createCheckpointStateOutputStream(
long checkpointID, long timestamp) throws Exception;
/**
* Creates a {@link DataOutputView} stream that writes into the state of the given checkpoint.
* When the stream is closes, it returns a state handle that can retrieve the state back.
*
* @param checkpointID The ID of the checkpoint.
* @param timestamp The timestamp of the checkpoint.
* @return An DataOutputView stream that writes state for the given checkpoint.
*
* @throws Exception Exceptions may occur while creating the stream and should be forwarded.
*/
public CheckpointStateOutputView createCheckpointStateOutputView(
long checkpointID, long timestamp) throws Exception {
return new CheckpointStateOutputView(createCheckpointStateOutputStream(checkpointID, timestamp));
}
/**
* Writes the given state into the checkpoint, and returns a handle that can retrieve the state back.
*
* @param state The state to be checkpointed.
* @param checkpointID The ID of the checkpoint.
* @param timestamp The timestamp of the checkpoint.
* @param <S> The type of the state.
*
* @return A state handle that can retrieve the checkpoined state.
*
* @throws Exception Exceptions may occur during serialization / storing the state and should be forwarded.
*/
public abstract <S extends Serializable> StateHandle<S> checkpointStateSerializable(
S state, long checkpointID, long timestamp) throws Exception;
// ------------------------------------------------------------------------
// Checkpoint state output stream
// ------------------------------------------------------------------------
/**
* A dedicated output stream that produces a {@link StreamStateHandle} when closed.
*/
public static abstract class CheckpointStateOutputStream extends OutputStream {
/**
* Closes the stream and gets a state handle that can create an input stream
* producing the data written to this stream.
*
* @return A state handle that can create an input stream producing the data written to this stream.
* @throws IOException Thrown, if the stream cannot be closed.
*/
public abstract StreamStateHandle closeAndGetHandle() throws IOException;
}
/**
* A dedicated DataOutputView stream that produces a {@code StateHandle<DataInputView>} when closed.
*/
public static final class CheckpointStateOutputView extends DataOutputViewStreamWrapper {
private final CheckpointStateOutputStream out;
public CheckpointStateOutputView(CheckpointStateOutputStream out) {
super(out);
this.out = out;
}
/**
* Closes the stream and gets a state handle that can create a DataInputView.
* producing the data written to this stream.
*
* @return A state handle that can create an input stream producing the data written to this stream.
* @throws IOException Thrown, if the stream cannot be closed.
*/
public StateHandle<DataInputView> closeAndGetHandle() throws IOException {
return new DataInputViewHandle(out.closeAndGetHandle());
}
@Override
public void close() throws IOException {
out.close();
}
}
/**
* Simple state handle that resolved a {@link DataInputView} from a StreamStateHandle.
*/
private static final class DataInputViewHandle implements StateHandle<DataInputView> {
private static final long serialVersionUID = 2891559813513532079L;
private final StreamStateHandle stream;
private DataInputViewHandle(StreamStateHandle stream) {
this.stream = stream;
}
@Override
public DataInputView getState(ClassLoader userCodeClassLoader) throws Exception {
return new DataInputViewStreamWrapper(stream.getState(userCodeClassLoader));
}
@Override
public void discardState() throws Exception {
stream.discardState();
}
@Override
public long getStateSize() throws Exception {
return stream.getStateSize();
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.filesystem;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.state.KvState;
import org.apache.flink.runtime.state.KvStateSnapshot;
import java.util.HashMap;
import java.util.Map;
/**
* Heap-backed partitioned {@link org.apache.flink.api.common.state.ValueState} that is snapshotted
* into files.
*
* @param <K> The type of the key.
* @param <N> The type of the namespace.
* @param <V> The type of the value.
*/
public class FsValueState<K, N, V>
extends AbstractFsState<K, N, V, ValueState<V>, ValueStateDescriptor<V>>
implements ValueState<V> {
/**
* Creates a new and empty key/value state.
*
* @param keySerializer The serializer for the key.
* @param namespaceSerializer The serializer for the namespace.
* @param stateDesc The state identifier for the state. This contains name
* and can create a default state value.
* @param backend The file system state backend backing snapshots of this state
*/
public FsValueState(FsStateBackend backend,
TypeSerializer<K> keySerializer,
TypeSerializer<N> namespaceSerializer,
ValueStateDescriptor<V> stateDesc) {
super(backend, keySerializer, namespaceSerializer, stateDesc.getSerializer(), stateDesc);
}
/**
* Creates a new key/value state with the given state contents.
* This method is used to re-create key/value state with existing data, for example from
* a snapshot.
*
* @param keySerializer The serializer for the key.
* @param namespaceSerializer The serializer for the namespace.
* @param stateDesc The state identifier for the state. This contains name
* and can create a default state value.
* @param state The map of key/value pairs to initialize the state with.
* @param backend The file system state backend backing snapshots of this state
*/
public FsValueState(FsStateBackend backend,
TypeSerializer<K> keySerializer,
TypeSerializer<N> namespaceSerializer,
ValueStateDescriptor<V> stateDesc,
HashMap<N, Map<K, V>> state) {
super(backend, keySerializer, namespaceSerializer, stateDesc.getSerializer(), stateDesc, state);
}
@Override
public V value() {
if (currentNSState == null) {
currentNSState = state.get(currentNamespace);
}
if (currentNSState != null) {
V value = currentNSState.get(currentKey);
return value != null ? value : stateDesc.getDefaultValue();
}
return stateDesc.getDefaultValue();
}
@Override
public void update(V value) {
if (currentKey == null) {
throw new RuntimeException("No key available.");
}
if (value == null) {
clear();
return;
}
if (currentNSState == null) {
currentNSState = new HashMap<>();
state.put(currentNamespace, currentNSState);
}
currentNSState.put(currentKey, value);
}
@Override
public KvStateSnapshot<K, N, ValueState<V>, ValueStateDescriptor<V>, FsStateBackend> createHeapSnapshot(Path filePath) {
return new Snapshot<>(getKeySerializer(), getNamespaceSerializer(), stateSerializer, stateDesc, filePath);
}
public static class Snapshot<K, N, V> extends AbstractFsStateSnapshot<K, N, V, ValueState<V>, ValueStateDescriptor<V>> {
private static final long serialVersionUID = 1L;
public Snapshot(TypeSerializer<K> keySerializer,
TypeSerializer<N> namespaceSerializer,
TypeSerializer<V> stateSerializer,
ValueStateDescriptor<V> stateDescs,
Path filePath) {
super(keySerializer, namespaceSerializer, stateSerializer, stateDescs, filePath);
}
@Override
public KvState<K, N, ValueState<V>, ValueStateDescriptor<V>, FsStateBackend> createFsState(FsStateBackend backend, HashMap<N, Map<K, V>> stateMap) {
return new FsValueState<>(backend, keySerializer, namespaceSerializer, stateDesc, stateMap);
}
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.filesystem;
import org.apache.flink.api.common.state.State;
import org.apache.flink.api.common.state.StateDescriptor;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.core.fs.FSDataInputStream;
import org.apache.flink.core.fs.Path;
import org.apache.flink.core.memory.DataInputViewStreamWrapper;
import org.apache.flink.runtime.state.KvState;
import org.apache.flink.runtime.state.KvStateSnapshot;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
/**
* A snapshot of a heap key/value state stored in a file.
*
* @param <K> The type of the key in the snapshot state.
* @param <N> The type of the namespace in the snapshot state.
* @param <SV> The type of the state value.
*/
public abstract class AbstractFsStateSnapshot<K, N, SV, S extends State, SD extends StateDescriptor<S, ?>> extends AbstractFileStateHandle implements KvStateSnapshot<K, N, S, SD, FsStateBackend> {
private static final long serialVersionUID = 1L;
/** Key Serializer */
protected final TypeSerializer<K> keySerializer;
/** Namespace Serializer */
protected final TypeSerializer<N> namespaceSerializer;
/** Serializer for the state value */
protected final TypeSerializer<SV> stateSerializer;
/** StateDescriptor, for sanity checks */
protected final SD stateDesc;
/**
* Creates a new state snapshot with data in the file system.
*
* @param keySerializer The serializer for the keys.
* @param namespaceSerializer The serializer for the namespace.
* @param stateSerializer The serializer for the elements in the state HashMap
* @param stateDesc The state identifier
* @param filePath The path where the snapshot data is stored.
*/
public AbstractFsStateSnapshot(TypeSerializer<K> keySerializer,
TypeSerializer<N> namespaceSerializer,
TypeSerializer<SV> stateSerializer,
SD stateDesc,
Path filePath) {
super(filePath);
this.stateDesc = stateDesc;
this.keySerializer = keySerializer;
this.stateSerializer = stateSerializer;
this.namespaceSerializer = namespaceSerializer;
}
public abstract KvState<K, N, S, SD, FsStateBackend> createFsState(FsStateBackend backend, HashMap<N, Map<K, SV>> stateMap);
@Override
public KvState<K, N, S, SD, FsStateBackend> restoreState(
FsStateBackend stateBackend,
final TypeSerializer<K> keySerializer,
ClassLoader classLoader,
long recoveryTimestamp) throws Exception {
// validity checks
if (!this.keySerializer.equals(keySerializer)) {
throw new IllegalArgumentException(
"Cannot restore the state from the snapshot with the given serializers. " +
"State (K/V) was serialized with " +
"(" + this.keySerializer + ") " +
"now is (" + keySerializer + ")");
}
// state restore
try (FSDataInputStream inStream = stateBackend.getFileSystem().open(getFilePath())) {
DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(new DataInputStream(inStream));
final int numKeys = inView.readInt();
HashMap<N, Map<K, SV>> stateMap = new HashMap<>(numKeys);
for (int i = 0; i < numKeys; i++) {
N namespace = namespaceSerializer.deserialize(inView);
final int numValues = inView.readInt();
Map<K, SV> namespaceMap = new HashMap<>(numValues);
stateMap.put(namespace, namespaceMap);
for (int j = 0; j < numValues; j++) {
K key = keySerializer.deserialize(inView);
SV value = stateSerializer.deserialize(inView);
namespaceMap.put(key, value);
}
}
// return new FsHeapValueState<>(stateBackend, keySerializer, namespaceSerializer, stateDesc, stateMap);
return createFsState(stateBackend, stateMap);
}
catch (Exception e) {
throw new Exception("Failed to restore state from file system", e);
}
}
/**
* Returns the file size in bytes.
*
* @return The file size in bytes.
* @throws IOException Thrown if the file system cannot be accessed.
*/
@Override
public long getStateSize() throws IOException {
return getFileSize();
}
}