kettle执行job流程分析（2）

最新推荐文章于 2024-02-04 11:41:08 发布

夜里慢慢行456

最新推荐文章于 2024-02-04 11:41:08 发布

阅读量797

点赞数

文章标签： java 开发语言

本文链接：https://blog.csdn.net/u013257767/article/details/129622107

版权

文章详细分析了Kettle中任务(trans)和作业(job)的执行差异，特别是job的执行过程，包括JobListener的使用、心跳管理、资源释放以及连接池的管理。文中还深入探讨了job的并行执行、错误处理和扩展点的调用，以及核心的job执行逻辑。

摘要由CSDN通过智能技术生成

真是时光荏苒啊好长时间没再写kettle的代码分析了，正好最近在排查kettle的连接池泄露的问题，趁此机会再来好好聊一聊。

如果查看 trans 和 job 的执行，就会发现这两者的执行是有较大差别的。

trans 每个执行组件（step）是个线程
job 是个线程

在这里插入图片描述
job里有个比较灵活的用法，就是前后监听器 JobListener。遍查 engine的代码，以结束监听较多。

 @Override public void run() {

    ExecutorService heartbeat = null; // this job's heartbeat scheduled executor

    try {
      setStopped( false );
      setFinished( false );
      setInitialized( true );

      // Create a new variable name space as we want jobs to have their own set of variables.
      // initialize from parentJob or null
      //
      variables.initializeVariablesFrom( parentJob );
      setInternalKettleVariables( variables );
      copyParametersFrom( jobMeta );
      activateParameters();

      // Run the job
      //
      fireJobStartListeners(); // 

      heartbeat = startHeartbeat( getHeartbeatIntervalInSeconds() );

      result = execute();
    } catch ( Throwable je ) {
      log.logError( BaseMessages.getString( PKG, "Job.Log.ErrorExecJob", je.getMessage() ), je );
      // log.logError(Const.getStackTracker(je));
      //
      // we don't have result object because execute() threw a curve-ball.
      // So we create a new error object.
      //
      result = new Result();
      result.setNrErrors( 1L );
      result.setResult( false );
      addErrors( 1 ); // This can be before actual execution

      emergencyWriteJobTracker( result );

      setActive( false );
      setFinished( true );
      setStopped( false );
    } finally {
      try {
        shutdownHeartbeat( heartbeat );

        ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobFinish.id, this );
        jobMeta.disposeEmbeddedMetastoreProvider();
//     资源释放，有较多的情况下是进行 连接池释放连接。
        fireJobFinishListeners();

        // release unused vfs connections
        KettleVFS.freeUnusedResources();

      } catch ( KettleException e ) {
        result.setNrErrors( 1 );
        result.setResult( false );
        log.logError( BaseMessages.getString( PKG, "Job.Log.ErrorExecJob", e.getMessage() ), e );

        emergencyWriteJobTracker( result );
      }
    }
  }

另一个方法核心：
任务启动的启动点

/**
   * Execute a job without previous results. This is a job entry point (not recursive)<br>
   * <br>
   *
   * @return the result of the execution
   *
   * @throws KettleException
   */
  private Result execute() throws KettleException {
    try {
      log.snap( Metrics.METRIC_JOB_START );
//   环境变量
      setFinished( false );
      setStopped( false );
      KettleEnvironment.setExecutionInformation( this, rep );

      log.logMinimal( BaseMessages.getString( PKG, "Job.Comment.JobStarted" ) );

      ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobStart.id, this );

      // Start the tracking...
      JobEntryResult jerStart =
          new JobEntryResult( null, null, BaseMessages.getString( PKG, "Job.Comment.JobStarted" ), BaseMessages
              .getString( PKG, "Job.Reason.Started" ), null, 0, null );
      jobTracker.addJobTracker( new JobTracker( jobMeta, jerStart ) );

      setActive( true );
      // Where do we start?
      JobEntryCopy startpoint;

      // synchronize this to a parent job if needed.
      //  任务有父子级，即可以包含子任务。
      Object syncObject = this;
      if ( parentJob != null ) {
        syncObject = parentJob; // parallel execution in a job
      }
      synchronized ( syncObject ) {
        beginProcessing();
      }
      Result res = null;
//		可以选择任务的启动节点
      if ( startJobEntryCopy == null ) {
        startpoint = jobMeta.findJobEntry( JobMeta.STRING_SPECIAL_START, 0, false );
      } else {
        startpoint = startJobEntryCopy;
        res = startJobEntryResult;
      }
      if ( startpoint == null ) {
        throw new KettleJobException( BaseMessages.getString( PKG, "Job.Log.CounldNotFindStartingPoint" ) );
      }
      JobEntryResult jerEnd = null;
      if ( startpoint.isStart() ) {
        // Perform optional looping in the special Start job entry...
        //
        // long iteration = 0;
        boolean isFirst = true;
        JobEntrySpecial jes = (JobEntrySpecial) startpoint.getEntry();
        while ( ( jes.isRepeat() || isFirst ) && !isStopped() ) {
          isFirst = false;
          res = execute( 0, null, startpoint, null, BaseMessages.getString( PKG, "Job.Reason.Started" ) );

          //
          // if (iteration > 0 && (iteration % 500) == 0) {
          // System.out.println("other 500 iterations: " + iteration);
          // }

          // iteration++;
          //
        }
        jerEnd =
            new JobEntryResult( res, jes.getLogChannelId(), BaseMessages.getString( PKG, "Job.Comment.JobFinished" ),
                BaseMessages.getString( PKG, "Job.Reason.Finished" ), null, 0, null );
      } else {
      //  启动核心，非常重要
        res = execute( 0, res, startpoint, null, BaseMessages.getString( PKG, "Job.Reason.Started" ) );
        jerEnd =
            new JobEntryResult( res, startpoint.getEntry().getLogChannel().getLogChannelId(), BaseMessages.getString(
                PKG, "Job.Comment.JobFinished" ), BaseMessages.getString( PKG, "Job.Reason.Finished" ), null, 0, null );
      }
      // Save this result...
      jobTracker.addJobTracker( new JobTracker( jobMeta, jerEnd ) );
      log.logMinimal( BaseMessages.getString( PKG, "Job.Comment.JobFinished" ) );

      setActive( false );
      setFinished( true );
      return res;
    } finally {
      log.snap( Metrics.METRIC_JOB_STOP );
    }
  }

下面就是最核心也是又臭又长的方法：

/**
   * Execute a job entry recursively and move to the next job entry automatically.<br>
   * Uses a back-tracking algorithm.<br>
   *
   * @param nr
   * @param prev_result
   * @param jobEntryCopy
   * @param previous
   * @param reason
   * @return
   * @throws KettleException
   */
  private Result execute( final int nr, Result prev_result, final JobEntryCopy jobEntryCopy, JobEntryCopy previous,
      String reason ) throws KettleException {
    Result res = null;
// 前期准备
    if ( isStopped() ) {
      res = new Result( nr );
      res.stopped = true;
      return res;
    }

    // if we didn't have a previous result, create one, otherwise, copy the content...
    //
    final Result newResult;
    Result prevResult = null;
    if ( prev_result != null ) {
      prevResult = prev_result.clone();
    } else {
      prevResult = new Result();
    }
   ...
      // Which entry is next?
      JobEntryInterface jobEntryInterface = jobEntryCopy.getEntry();
      jobEntryInterface.getLogChannel().setLogLevel( logLevel );

      // Track the fact that we are going to launch the next job entry...
      JobEntryResult jerBefore =
          new JobEntryResult( null, null, BaseMessages.getString( PKG, "Job.Comment.JobStarted" ), reason, jobEntryCopy
              .getName(), jobEntryCopy.getNr(), environmentSubstitute( jobEntryCopy.getEntry().getFilename() ) );
      jobTracker.addJobTracker( new JobTracker( jobMeta, jerBefore ) );

      ClassLoader cl = Thread.currentThread().getContextClassLoader();
      Thread.currentThread().setContextClassLoader( jobEntryInterface.getClass().getClassLoader() );
      // Execute this entry...
      JobEntryInterface cloneJei = (JobEntryInterface) jobEntryInterface.clone();
      ( (VariableSpace) cloneJei ).copyVariablesFrom( this );
      cloneJei.setRepository( rep );
      if ( rep != null ) {
        cloneJei.setMetaStore( rep.getMetaStore() );
      }
      cloneJei.setParentJob( this );
      cloneJei.setParentJobMeta( this.getJobMeta() );
      final long start = System.currentTimeMillis();

      cloneJei.getLogChannel().logDetailed( "Starting job entry" );
      for ( JobEntryListener jobEntryListener : jobEntryListeners ) {
        jobEntryListener.beforeExecution( this, jobEntryCopy, cloneJei );
      }
      if ( interactive ) {
        if ( jobEntryCopy.isTransformation() ) {
          getActiveJobEntryTransformations().put( jobEntryCopy, (JobEntryTrans) cloneJei );
        }
        if ( jobEntryCopy.isJob() ) {
          getActiveJobEntryJobs().put( jobEntryCopy, (JobEntryJob) cloneJei );
        }
      }
      log.snap( Metrics.METRIC_JOBENTRY_START, cloneJei.toString() );
      newResult = cloneJei.execute( prevResult, nr );
      log.snap( Metrics.METRIC_JOBENTRY_STOP, cloneJei.toString() );

      final long end = System.currentTimeMillis();
      if ( interactive ) {
        if ( jobEntryCopy.isTransformation() ) {
          getActiveJobEntryTransformations().remove( jobEntryCopy );
        }
        if ( jobEntryCopy.isJob() ) {
          getActiveJobEntryJobs().remove( jobEntryCopy );
        }
      }

      if ( cloneJei instanceof JobEntryTrans ) {
        String throughput = newResult.getReadWriteThroughput( (int) ( ( end - start ) / 1000 ) );
        if ( throughput != null ) {
          log.logMinimal( throughput );
        }
      }
      for ( JobEntryListener jobEntryListener : jobEntryListeners ) {
        jobEntryListener.afterExecution( this, jobEntryCopy, cloneJei, newResult );
      }

      Thread.currentThread().setContextClassLoader( cl );
      addErrors( (int) newResult.getNrErrors() );

      // Also capture the logging text after the execution...
      //
      LoggingBuffer loggingBuffer = KettleLogStore.getAppender();
      StringBuffer logTextBuffer = loggingBuffer.getBuffer( cloneJei.getLogChannel().getLogChannelId(), false );
      newResult.setLogText( logTextBuffer.toString() + newResult.getLogText() );

      // Save this result as well...
      //
      JobEntryResult jerAfter =
          new JobEntryResult( newResult, cloneJei.getLogChannel().getLogChannelId(), BaseMessages.getString( PKG,
              "Job.Comment.JobFinished" ), null, jobEntryCopy.getName(), jobEntryCopy.getNr(), environmentSubstitute(
                  jobEntryCopy.getEntry().getFilename() ) );
      jobTracker.addJobTracker( new JobTracker( jobMeta, jerAfter ) );
      synchronized ( jobEntryResults ) {
        jobEntryResults.add( jerAfter );

        // Only keep the last X job entry results in memory
        //
        if ( maxJobEntriesLogged > 0 ) {
          while ( jobEntryResults.size() > maxJobEntriesLogged ) {
            // Remove the oldest.
            jobEntryResults.removeFirst();
          }
        }
      }
    }

    extension = new JobExecutionExtension( this, prevResult, jobEntryCopy, extension.executeEntry );
    ExtensionPointHandler.callExtensionPoint( log, KettleExtensionPoint.JobAfterJobEntryExecution.id, extension );

    // Try all next job entries.
    //
    // Keep track of all the threads we fired in case of parallel execution...
    // Keep track of the results of these executions too.
    //
    final List<Thread> threads = new ArrayList<Thread>();
    // next 2 lists is being modified concurrently so must be synchronized for this case.
    final Queue<Result> threadResults = new ConcurrentLinkedQueue<Result>();
    final Queue<KettleException> threadExceptions = new ConcurrentLinkedQueue<KettleException>();
    final List<JobEntryCopy> threadEntries = new ArrayList<JobEntryCopy>();

    // Launch only those where the hop indicates true or false
    //
    int nrNext = jobMeta.findNrNextJobEntries( jobEntryCopy );
    for ( int i = 0; i < nrNext && !isStopped(); i++ ) {
      // The next entry is...
      final JobEntryCopy nextEntry = jobMeta.findNextJobEntry( jobEntryCopy, i );

      // See if we need to execute this...
      final JobHopMeta hi = jobMeta.findJobHop( jobEntryCopy, nextEntry );

      // The next comment...
      final String nextComment;
      if ( hi.isUnconditional() ) {
        nextComment = BaseMessages.getString( PKG, "Job.Comment.FollowedUnconditional" );
      } else {
        if ( newResult.getResult() ) {
          nextComment = BaseMessages.getString( PKG, "Job.Comment.FollowedSuccess" );
        } else {
          nextComment = BaseMessages.getString( PKG, "Job.Comment.FollowedFailure" );
        }
      }

      //
      // If the link is unconditional, execute the next job entry (entries).
      // If the start point was an evaluation and the link color is correct:
      // green or red, execute the next job entry...
      //
      if ( hi.isUnconditional() || ( jobEntryCopy.evaluates() && ( !( hi.getEvaluation() ^ newResult
          .getResult() ) ) ) ) {
        // Start this next step!
        if ( log.isBasic() ) {
          log.logBasic( BaseMessages.getString( PKG, "Job.Log.StartingEntry", nextEntry.getName() ) );
        }

        // Pass along the previous result, perhaps the next job can use it...
        // However, set the number of errors back to 0 (if it should be reset)
        // When an evaluation is executed the errors e.g. should not be reset.
        if ( nextEntry.resetErrorsBeforeExecution() ) {
          newResult.setNrErrors( 0 );
        }

        // Now execute!
        //
        // if (we launch in parallel, fire the execution off in a new thread...
        //
        if ( jobEntryCopy.isLaunchingInParallel() ) {
          threadEntries.add( nextEntry );

          Runnable runnable = new Runnable() {
            @Override public void run() {
              try {
                Result threadResult = execute( nr + 1, newResult, nextEntry, jobEntryCopy, nextComment );
                threadResults.add( threadResult );
              } catch ( Throwable e ) {
                log.logError( Const.getStackTracker( e ) );
                threadExceptions.add( new KettleException( BaseMessages.getString( PKG, "Job.Log.UnexpectedError",
                    nextEntry.toString() ), e ) );
                Result threadResult = new Result();
                threadResult.setResult( false );
                threadResult.setNrErrors( 1L );
                threadResults.add( threadResult );
              }
            }
          };
          Thread thread = new Thread( runnable );
          threads.add( thread );
          thread.start();
          if ( log.isBasic() ) {
            log.logBasic( BaseMessages.getString( PKG, "Job.Log.LaunchedJobEntryInParallel", nextEntry.getName() ) );
          }
        } else {
          try {
            // Same as before: blocks until it's done
            //
            res = execute( nr + 1, newResult, nextEntry, jobEntryCopy, nextComment );
          } catch ( Throwable e ) {
            log.logError( Const.getStackTracker( e ) );
            throw new KettleException( BaseMessages.getString( PKG, "Job.Log.UnexpectedError", nextEntry.toString() ),
                e );
          }
          if ( log.isBasic() ) {
            log.logBasic( BaseMessages.getString( PKG, "Job.Log.FinishedJobEntry", nextEntry.getName(), res.getResult()
                + "" ) );
          }
        }
      }
    }

    // OK, if we run in parallel, we need to wait for all the job entries to
    // finish...
    //
    if ( jobEntryCopy.isLaunchingInParallel() ) {
      for ( int i = 0; i < threads.size(); i++ ) {
        Thread thread = threads.get( i );
        JobEntryCopy nextEntry = threadEntries.get( i );

        try {
          thread.join();
        } catch ( InterruptedException e ) {
          log.logError( jobMeta.toString(), BaseMessages.getString( PKG,
              "Job.Log.UnexpectedErrorWhileWaitingForJobEntry", nextEntry.getName() ) );
          threadExceptions.add( new KettleException( BaseMessages.getString( PKG,
              "Job.Log.UnexpectedErrorWhileWaitingForJobEntry", nextEntry.getName() ), e ) );
        }
      }
      // if(log.isBasic()) log.logBasic(BaseMessages.getString(PKG,
      // "Job.Log.FinishedJobEntry",startpoint.getName(),res.getResult()+""));
    }

    // Perhaps we don't have next steps??
    // In this case, return the previous result.
    if ( res == null ) {
      res = prevResult;
    }

    // See if there where any errors in the parallel execution
    //
    if ( threadExceptions.size() > 0 ) {
      res.setResult( false );
      res.setNrErrors( threadExceptions.size() );

      for ( KettleException e : threadExceptions ) {
        log.logError( jobMeta.toString(), e.getMessage(), e );
      }

      // Now throw the first Exception for good measure...
      //
      throw threadExceptions.poll();
    }

    // In parallel execution, we aggregate all the results, simply add them to
    // the previous result...
    //
    for ( Result threadResult : threadResults ) {
      res.add( threadResult );
    }

    // If there have been errors, logically, we need to set the result to
    // "false"...
    //
    if ( res.getNrErrors() > 0 ) {
      res.setResult( false );
    }

    return res;
  }