Flink 的本地运行job流程

一个简单的wordcount demo

public class BatchWordCount {
    public static void main(String[] args) throws Exception {
        String inputPath = "D:\\outputDir\\file.txt";
        String outPath = "D:\\outputDir\\result.txt";
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        DataSource<String> source = env.readTextFile(inputPath);
        AggregateOperator<Tuple2<String, Integer>> operator = source.flatMap(new Tokenizer()).groupBy(0).sum(1);
        operator.writeAsCsv(outPath).setParallelism(1);
        env.execute();
    }
    public static class Tokenizer implements FlatMapFunction<String, Tuple2<String, Integer>> {
        @Override
        public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
            Arrays.asList(value.split(" ")).forEach( i -> out.collect(new Tuple2(i, 1)));
        }
    }
}

从 env.execute(); 开始:

如果我们不指定名字会有一个默认的名种。execute方法会触发程序执行,本地运行环境会触发所有带有sink操作的流程

execute的关键在于创建程序的执行计划使用执行器去执行这个任务计划

创建程序的执行计划:

创建程序计划,这个计划描述了所有的数据源,数据的sink操作和 中间的operations以及他们之间是如何交互的,整个作为一个独立的单元来被org.apache.flink.api.common.PlanExecutor执行。通过使用执行器来获取计划并且执行它的这种运行方式是针对于分布操作组成的程序的另一种方式。
jobName – 任务名称
clearSinks – 是否启用一个新的执行阶段

public Plan createProgramPlan(String jobName, boolean clearSinks) {
//   先判断是否有sink操作,没有的话就会抛出异常
   if (this.sinks.isEmpty()) {
      if (wasExecuted) {
         throw new RuntimeException("No new data sinks have been defined since the " +
               "last execution. The last execution refers to the latest call to " +
               "'execute()', 'count()', 'collect()', or 'print()'.");
      } else {
         throw new RuntimeException("No data sinks have been created yet. " +
               "A program needs at least one sink that consumes data. " +
               "Examples are writing the data set or printing it.");
      }
   }

// 如果你不指定名称,会自动生成一个名字
   if (jobName == null) {
      jobName = getDefaultName();
   }

   OperatorTranslation translator = new OperatorTranslation();
// 这里产生了执行计划
   Plan plan = translator.translateToPlan(this.sinks, jobName);

// 如果手动设置了并行度就在这里设置给执行计划
   if (getParallelism() > 0) {
      plan.setDefaultParallelism(getParallelism());
   }
// 进行一些配置
   plan.setExecutionConfig(getConfig());

   // Check plan for GenericTypeInfo's and register the types at the serializers.
   if (!config.isAutoTypeRegistrationDisabled()) {
      plan.accept(new Visitor<org.apache.flink.api.common.operators.Operator<?>>() {

         private final Set<Class<?>> registeredTypes = new HashSet<>();
         private final Set<org.apache.flink.api.common.operators.Operator<?>> visitedOperators = new HashSet<>();

         @Override
         public boolean preVisit(org.apache.flink.api.common.operators.Operator<?> visitable) {
            if (!visitedOperators.add(visitable)) {
               return false;
            }
            OperatorInformation<?> opInfo = visitable.getOperatorInfo();
            Serializers.recursivelyRegisterType(opInfo.getOutputType(), config, registeredTypes);
            return true;
         }

         @Override
         public void postVisit(org.apache.flink.api.common.operators.Operator<?> visitable) {}
      });
   }

   try {
      registerCachedFilesWithPlan(plan);
   } catch (Exception e) {
      throw new RuntimeException("Error while registering cached files: " + e.getMessage(), e);
   }

   // clear all the sinks such that the next execution does not redo everything
   if (clearSinks) {
      this.sinks.clear();
      wasExecuted = true;
   }

   // All types are registered now. Print information.
   int registeredTypes = config.getRegisteredKryoTypes().size() +
         config.getRegisteredPojoTypes().size() +
         config.getRegisteredTypesWithKryoSerializerClasses().size() +
         config.getRegisteredTypesWithKryoSerializers().size();
   int defaultKryoSerializers = config.getDefaultKryoSerializers().size() +
         config.getDefaultKryoSerializerClasses().size();
   LOG.info("The job has {} registered types and {} default Kryo serializers", registeredTypes, defaultKryoSerializers);

   if (config.isForceKryoEnabled() && config.isForceAvroEnabled()) {
      LOG.warn("In the ExecutionConfig, both Avro and Kryo are enforced. Using Kryo serializer");
   }
   if (config.isForceKryoEnabled()) {
      LOG.info("Using KryoSerializer for serializing POJOs");
   }
   if (config.isForceAvroEnabled()) {
      LOG.info("Using AvroSerializer for serializing POJOs");
   }

   if (LOG.isDebugEnabled()) {
      LOG.debug("Registered Kryo types: {}", config.getRegisteredKryoTypes().toString());
      LOG.debug("Registered Kryo with Serializers types: {}", config.getRegisteredTypesWithKryoSerializers().entrySet().toString());
      LOG.debug("Registered Kryo with Serializer Classes types: {}", config.getRegisteredTypesWithKryoSerializerClasses().entrySet().toString());
      LOG.debug("Registered Kryo default Serializers: {}", config.getDefaultKryoSerializers().entrySet().toString());
      LOG.debug("Registered Kryo default Serializers Classes {}", config.getDefaultKryoSerializerClasses().entrySet().toString());
      LOG.debug("Registered POJO types: {}", config.getRegisteredPojoTypes().toString());

      // print information about static code analysis
      LOG.debug("Static code analysis mode: {}", config.getCodeAnalysisMode());
   }

   return plan;
}

执行任务计划

public JobExecutionResult executePlan(Plan plan) throws Exception {
   if (plan == null) {
      throw new IllegalArgumentException("The plan may not be null.");
   }

   synchronized (this.lock) {

      // check if we start a session dedicated for this execution
      final boolean shutDownAtEnd;

      if (jobExecutorService == null) {
         shutDownAtEnd = true;

         // configure the number of local slots equal to the parallelism of the local plan
         if (this.taskManagerNumSlots == DEFAULT_TASK_MANAGER_NUM_SLOTS) {
            int maxParallelism = plan.getMaximumParallelism();
            if (maxParallelism > 0) {
               this.taskManagerNumSlots = maxParallelism;
            }
         }

         // start the cluster for us
         start();
      }
      else {
         // we use the existing session
         shutDownAtEnd = false;
      }

      try {
         // TODO: Set job's default parallelism to max number of slots
         final int slotsPerTaskManager = jobExecutorServiceConfiguration.getInteger(TaskManagerOptions.NUM_TASK_SLOTS, taskManagerNumSlots);
         final int numTaskManagers = jobExecutorServiceConfiguration.getInteger(ConfigConstants.LOCAL_NUMBER_TASK_MANAGER, 1);
         plan.setDefaultParallelism(slotsPerTaskManager * numTaskManagers);

         Optimizer pc = new Optimizer(new DataStatistics(), jobExecutorServiceConfiguration);
         OptimizedPlan op = pc.compile(plan);

         JobGraphGenerator jgg = new JobGraphGenerator(jobExecutorServiceConfiguration);
         JobGraph jobGraph = jgg.compileJobGraph(op, plan.getJobId());

         return jobExecutorService.executeJobBlocking(jobGraph);
      }
      finally {
         if (shutDownAtEnd) {
            stop();
         }
      }
   }
}

 

 

 

 

 

 

 

 

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值