Flume 1.9.0 源码解析 : TailDirSource 全解

最新推荐文章于 2023-04-16 20:55:46 发布

张伯毅

最新推荐文章于 2023-04-16 20:55:46 发布

阅读量5.1k

点赞数 11

分类专栏： Flume 源码

本文链接：https://blog.csdn.net/zhanglong_4444/article/details/89879367

版权

Flume 同时被 2 个专栏收录

5 篇文章 1 订阅

订阅专栏

源码

3 篇文章 0 订阅

订阅专栏

TailDirSource继承了AbstractSource类，而AbstractSource类中channelProcessor属性负责将Source中的Event提交给Channel组件

TailDirSource类通过配置参数匹配日志文件，获取日志文件更新内容并且将已经读取的偏移量记录到特定的文件当中(position file)

类图:

TailDirSource 启动的时候, 首先需要进行初始化,然后调用 start 方法.

为了单独测试,我直接用测试用例进行测试. 配置参数与代码如下:

@Test
  public void testRun() throws InterruptedException {
    source = new TaildirSource();

    channel = new MemoryChannel();

    Map<String, String> parameters = new HashMap<>();
    parameters.put("positionFile","/todo/flume/taildir/taildir_position.json") ;

    parameters.put("channels","c1") ;
    parameters.put("filegroups.f1","/todo/flume/taildir/input/data.log") ;
    parameters.put("filegroups","f1") ;
    parameters.put("fileHeader","true") ;
    parameters.put("type","TAILDIR") ;
    parameters.put("headers.f1.headerKey1","markHeaderKey") ;



    Configurables.configure(channel, new Context());

    List<Channel> channels = new ArrayList<Channel>();
    channels.add(channel);

    ChannelSelector rcs = new ReplicatingChannelSelector();
    rcs.setChannels(channels);

    source.setChannelProcessor(new ChannelProcessor(rcs));


    source.configure(new Context(parameters));

    source.start();

    Thread.sleep(1000000);


  }

点击运行, 直接进行初始化,根据配置文件进行初始化操作.

org.apache.flume.source.taildir.TaildirSource#configure

/**
   *
   * @param context
   */
  @Override
  public synchronized void configure(Context context) {


    //todo  以空格分隔的文件组列表。每个文件组都指示一组要挂起的文件。
    String fileGroups = context.getString(FILE_GROUPS);

    Preconditions.checkState(fileGroups != null, "Missing param: " + FILE_GROUPS);


    //todo 返回一个group对应FilePath的Map<String,String>
    filePaths = selectByKeys(context.getSubProperties(FILE_GROUPS_PREFIX),
                             fileGroups.split("\\s+"));

    //todo 判断文件路径是否为空
    Preconditions.checkState(!filePaths.isEmpty(),
        "Mapping for tailing files is empty or invalid: '" + FILE_GROUPS_PREFIX + "'");


    //todo  获取当前用户主目录
    String homePath = System.getProperty("user.home").replace('\\', '/');


     //  todo 获取positionFile 路径，带默认值
    //  todo  默认: /var/log/flume/taildir_position.json

    positionFilePath = context.getString(POSITION_FILE, homePath + DEFAULT_POSITION_FILE);

    //todo  positionFile路径
    Path positionFile = Paths.get(positionFilePath);


    try {

      //todo  创建目录目录名，上级目录如果缺失一起创建
      Files.createDirectories(positionFile.getParent());

    } catch (IOException e) {
      throw new FlumeException("Error creating positionFile parent directories", e);
    }

    //todo  用于发送EVENT的header信息添加值
    //todo  返回table 结构

    headerTable = getTable(context, HEADERS_PREFIX);

    // todo 批量大小
    batchSize = context.getInteger(BATCH_SIZE, DEFAULT_BATCH_SIZE);

    // todo 从头还是从尾部读取，默认false
    skipToEnd = context.getBoolean(SKIP_TO_END, DEFAULT_SKIP_TO_END);

    // todo 是否加偏移量，剔除行标题 默认 false
    byteOffsetHeader = context.getBoolean(BYTE_OFFSET_HEADER, DEFAULT_BYTE_OFFSET_HEADER);

    // todo idleTimeout日志文件在idleTimeout间隔时间，没有被修改，文件将被关闭 默认值: 120000
    idleTimeout = context.getInteger(IDLE_TIMEOUT, DEFAULT_IDLE_TIMEOUT);

    // todo writePosInterval，TaildirSource读取每个监控文件都在位置文件中记录监控文件的已经读取的偏移量，
    // todo writePosInterval 更新positionFile的间隔时间  默认值: 3000
    writePosInterval = context.getInteger(WRITE_POS_INTERVAL, DEFAULT_WRITE_POS_INTERVAL);

    // todo 是否开启matcher cache 默认: true
    cachePatternMatching = context.getBoolean(CACHE_PATTERN_MATCHING,
        DEFAULT_CACHE_PATTERN_MATCHING);

    // todo  当最后一次尝试没有找到任何新数据时，推迟变量长的时间再次轮训查找。 默认值: 1000
    backoffSleepIncrement = context.getLong(PollableSourceConstants.BACKOFF_SLEEP_INCREMENT,
        PollableSourceConstants.DEFAULT_BACKOFF_SLEEP_INCREMENT);

    // todo  当最后一次尝试没有找到任何新数据时,每次重新尝试轮询新数据之间的最大时间延迟 . 默认值: 5000
    maxBackOffSleepInterval = context.getLong(PollableSourceConstants.MAX_BACKOFF_SLEEP,
        PollableSourceConstants.DEFAULT_MAX_BACKOFF_SLEEP);

    // todo 是否添加头部存储绝对路径 默认: false
    fileHeader = context.getBoolean(FILENAME_HEADER, DEFAULT_FILE_HEADER);

    // todo 当fileHeader为TURE时使用。  默认头文件信息 key : file
    fileHeaderKey = context.getString(FILENAME_HEADER_KEY,  DEFAULT_FILENAME_HEADER_KEY);

    //todo 最大批次数量 Long.MAX_VALUE   2^63-1
    maxBatchCount = context.getLong(MAX_BATCH_COUNT, DEFAULT_MAX_BATCH_COUNT);


    if (maxBatchCount <= 0) {
      maxBatchCount = DEFAULT_MAX_BATCH_COUNT;
      logger.warn("Invalid maxBatchCount specified, initializing source "
          + "default maxBatchCount of {}", maxBatchCount);
    }

    if (sourceCounter == null) {
      sourceCounter = new SourceCounter(getName());
    }
  }

初始化完成之后,调用 start 方法.

// todo:  创建初始化后的变量创建了 ReliableTaildirEventReader 对象,
  //        并启动两个线程池，分别是监控日志文件，记录日志文件读取的偏移量
  @Override
  public synchronized void start() {
    logger.info("{} TaildirSource source starting with directory: {}", getName(), filePaths);
    try {


      reader = new ReliableTaildirEventReader.Builder()
          .filePaths(filePaths)
          .headerTable(headerTable)
          .positionFilePath(positionFilePath)
          .skipToEnd(skipToEnd)
          .addByteOffset(byteOffsetHeader)
          .cachePatternMatching(cachePatternMatching)
          .annotateFileName(fileHeader)
          .fileNameHeader(fileHeaderKey)
          .build();


    } catch (IOException e) {
      throw new FlumeException("Error instantiating ReliableTaildirEventReader", e);
    }


    // todo 创建线程池监控日志文件。
    idleFileChecker = Executors.newSingleThreadScheduledExecutor(
        new ThreadFactoryBuilder().setNameFormat("idleFileChecker").build());

    //todo idleTimeout 默认值: 120000
    //todo checkIdleInterval  默认值: 5000
    idleFileChecker.scheduleWithFixedDelay(new idleFileCheckerRunnable(),
        idleTimeout, checkIdleInterval, TimeUnit.MILLISECONDS);



    // todo 创建线程池记录日志文件读取的偏移量。
    // todo  writePosInitDelay 默认值: 5000
    // todo  writePosInterval  默认值: 5000
    // todo positionWriter主要作用是记录日志文件读取的偏移量，
    //  以json格式（"inode", inode, "pos", tf.getPos(), "file", tf.getPath()），
    //  其中inode是linux系统中特有属性，在适应其他系统（Windows等）日志采集时ReliableTaildirEventReader.getInode()方法需要修改。
    //  pos则是记录的日志读取的偏移量，file记录了日志文件的路径
    positionWriter = Executors.newSingleThreadScheduledExecutor(
        new ThreadFactoryBuilder().setNameFormat("positionWriter").build());

    positionWriter.scheduleWithFixedDelay(new PositionWriterRunnable(),
        writePosInitDelay, writePosInterval, TimeUnit.MILLISECONDS);

    super.start();

    logger.debug("TaildirSource started");
    sourceCounter.start();
  }

在这里面构建对象: ReliableTaildirEventReader

在 ReliableTaildirEventReader 里面创建有三个地方需要重点关注一下

先说第一个地方, 创建TaildirMatcher , 个人理解是: 为了标识缓存查找文件用的工具类.

org.apache.flume.source.taildir.TaildirMatcher#TaildirMatcher

构造方法如下:

第二个地方,如果创建了新文件或者检测到附加到现有文件，则更新tailFiles映射 TailFiles。

org.apache.flume.source.taildir.TaildirSource#updateTailFiles

/**
   * Update tailFiles mapping if a new file is created or appends are detected
   * to the existing file.
   */
  public List<Long> updateTailFiles(boolean skipToEnd) throws IOException {
    updateTime = System.currentTimeMillis();
    List<Long> updatedInodes = Lists.newArrayList();
    //todo     获取缓存中的 taildir ,
    //todo     taildir对象内容:   {filegroup='f1', filePattern='/todo/flume/taildir/input/data.log', cached=true}
    for (TaildirMatcher taildir : taildirCache) {
      //todo    taildir     :  {filegroup='f1', filePattern='/todo/flume/taildir/input/data.log', cached=true}
      //todo    headerTable :  {f1={headerKey1=markHeaderKey}}
      Map<String, String> headers = headerTable.row(taildir.getFileGroup());
      // todo 获取匹配文件,并将文件按最后修改时间进行排序
      for (File f : taildir.getMatchingFiles()) {
        long inode;
        try {

          //todo 获取文件的  inode   只支持 unix:ino   这里写死了
          inode = getInode(f);

        } catch (NoSuchFileException e) {
          logger.info("File has been deleted in the meantime: " + e.getMessage());
          continue;
        }

        TailFile tf = tailFiles.get(inode);


        if (tf == null || !tf.getPath().equals(f.getAbsolutePath())) {

          //todo , 缓存中没有,或者路径不一样.  就认为是新建的数据.

          long startPos = skipToEnd ? f.length() : 0;

          //todo 读取文件获取 操作对象实例 TailFile
          tf = openFile(f, headers, inode, startPos);


        } else {
          //todo , 缓存中存在, 判断 更新文件修改最后修改日期, 文件的大小是否有过变动.
          boolean updated = tf.getLastUpdated() < f.lastModified() || tf.getPos() != f.length();


          if (updated) {

            if (tf.getRaf() == null) {


              tf = openFile(f, headers, inode, tf.getPos());


            }
            if (f.length() < tf.getPos()) {
              logger.info("Pos " + tf.getPos() + " is larger than file size! "
                  + "Restarting from pos 0, file: " + tf.getPath() + ", inode: " + inode);


              tf.updatePos(tf.getPath(), inode, 0);


            }

          }


          tf.setNeedTail(updated);


        }

        //todo 更新文件
        tailFiles.put(inode, tf);


        updatedInodes.add(inode);


      }
    }
    return updatedInodes;
  }

这里有几个地方需要说一下.

1. headerTable 的数据结构是 HashBasedTable<R, C, V> 其实就是 Map<R, Map<C, V>>

其中 R: rowKey , C : columnKey , V : value

2. 获取匹配到的文件,以最后修改时间进行排序.

3. 根据文件,获取文件对应的 inode , 这个代码里面写死了, 所以只支持 unix:inoTailFile

private long getInode(File file) throws IOException {
    long inode = (long) Files.getAttribute(file.toPath(), "unix:ino");
    return inode;
  }

4. 并根据获取到的 inode 生成对应的实体对象

tf = openFile(f, headers, inode, startPos);

  //todo  方法根据日志文件对象，headers，inode和偏移量pos创建一个TailFile对象
  private TailFile openFile(File file, Map<String, String> headers, long inode, long pos) {
    try {
      logger.info("Opening file: " + file + ", inode: " + inode + ", pos: " + pos);
      return new TailFile(file, headers, inode, pos);
    } catch (IOException e) {
      throw new FlumeException("Failed opening file: " + file, e);
    }
  }

在创建 TailFile 的时候, 有几个点要说明一下.

读取文件采用的是 RandomAccessFile 类 , 这个类可以用 seek 方法进行定位, 从而读取指定的数据. 定位是根据字节来的,而不是根据行.

这样就获取到了需要跟新的文件了.

我们再跳出来. 回落到

org.apache.flume.source.taildir.TaildirSource#ReliableTaildirEventReader 构造方法里面

加载位置文件.

org.apache.flume.source.taildir.TaildirSource#loadPositionFile

这个类是更新 TailFile 对象中的文件指针: pos

当第一次加载的时候, 这个文件时空的,所以会跳过.

我们看一下里面的源码. 其实就是读取文件里面的 json 数据.

对文件进行定位. 涉及的参数: inode / pos / file

文件里面的内容:

[{"inode":12895973088,"pos":27,"file":"/todo/flume/taildir/input/data.log"}]

读取源码:

/**
   * Load a position file which has the last read position of each file.
   * If the position file exists, update tailFiles mapping.
   *
   * 加载具有每个文件的最后读取位置的位置文件。
   * 如果位置文件存在，更新tailFiles映射。
   *
   */
  public void loadPositionFile(String filePath) {
    Long inode, pos;
    String path;
    FileReader fr = null;
    JsonReader jr = null;
    try {
      fr = new FileReader(filePath);
      jr = new JsonReader(fr);
      jr.beginArray();
      while (jr.hasNext()) {
        inode = null;
        pos = null;
        path = null;
        jr.beginObject();
        while (jr.hasNext()) {
          switch (jr.nextName()) {
            case "inode":
              inode = jr.nextLong();
              break;
            case "pos":
              pos = jr.nextLong();
              break;
            case "file":
              path = jr.nextString();
              break;
          }
        }
        jr.endObject();

        for (Object v : Arrays.asList(inode, pos, path)) {
          Preconditions.checkNotNull(v, "Detected missing value in position file. "
              + "inode: " + inode + ", pos: " + pos + ", path: " + path);
        }
        TailFile tf = tailFiles.get(inode);
        if (tf != null && tf.updatePos(path, inode, pos)) {
          tailFiles.put(inode, tf);
        } else {
          logger.info("Missing file: " + path + ", inode: " + inode + ", pos: " + pos);
        }
      }
      jr.endArray();
    } catch (FileNotFoundException e) {
      logger.info("File not found: " + filePath + ", not updating position");
    } catch (IOException e) {
      logger.error("Failed loading positionFile: " + filePath, e);
    } finally {
      try {
        if (fr != null) fr.close();
        if (jr != null) jr.close();
      } catch (IOException e) {
        logger.error("Error: " + e.getMessage(), e);
      }
    }
  }

ReliableTaildirEventReader 类的初始化&创建就说了完了, 在 start 方法里面还有两个线程


    // todo 创建线程池监控日志文件。
    idleFileChecker = Executors.newSingleThreadScheduledExecutor(
        new ThreadFactoryBuilder().setNameFormat("idleFileChecker").build());

    //todo idleTimeout 默认值: 120000
    //todo checkIdleInterval  默认值: 5000
    idleFileChecker.scheduleWithFixedDelay(new idleFileCheckerRunnable(),
        idleTimeout, checkIdleInterval, TimeUnit.MILLISECONDS);

唠叨两句, 记录文件读取位置的日志文件,是定时更新. 默认应该是 5 秒一次.

// todo 创建线程池记录日志文件读取的偏移量。
    // todo  writePosInitDelay 默认值: 5000
    // todo  writePosInterval  默认值: 5000
    // todo positionWriter主要作用是记录日志文件读取的偏移量，
    //  以json格式（"inode", inode, "pos", tf.getPos(), "file", tf.getPath()），
    //  其中inode是linux系统中特有属性，在适应其他系统（Windows等）日志采集时ReliableTaildirEventReader.getInode()方法需要修改。
    //  pos则是记录的日志读取的偏移量，file记录了日志文件的路径
    positionWriter = Executors.newSingleThreadScheduledExecutor(
        new ThreadFactoryBuilder().setNameFormat("positionWriter").build());

    positionWriter.scheduleWithFixedDelay(new PositionWriterRunnable(),
        writePosInitDelay, writePosInterval, TimeUnit.MILLISECONDS);

好了,到这里, TailDirSource 的启动工作就完成了.

接下来我们说代码执行.

TailDirSource 实现了 PollableSource 接口 , 在这个接口中定义了三个方法,直接看类图吧:

最重要的就三个方法:

getBackOffSleepIncrement: 当最后一次尝试没有找到任何新数据时，推迟变量长的时间再次轮训查找。默认值: 1000

getMaxBackOffSleepInterval: 当最后一次尝试没有找到任何新数据时,每次重新尝试轮询新数据之间的最大时间延迟 . 默认值: 5000

process : 运行的代码.

TailDirSource 是封装为 PollingRunner 进行运行的. 不断循环调用 process 方法, 每次执行完 process 方法之后,然后休眠一定时间

话不多说,直接看 process 方法:

org.apache.flume.source.taildir.TaildirSource#process

 @Override
  public Status process() {
    Status status = Status.BACKOFF;
    try {
      // todo 清空记录存在inode的list
      existingInodes.clear();

      // todo 调用ReliableTaildirEventReader对象的updateTailFiles方法获取要监控的日志文件。
      existingInodes.addAll(reader.updateTailFiles());


      for (long inode : existingInodes) {

        // todo 获取具体tailFile对象
        TailFile tf = reader.getTailFiles().get(inode);

        // todo 是否需要tail
        if (tf.needTail()) {
          // todo  获取每个日志文件的更新数据,并发送，其中包括文件规则是否满足
          boolean hasMoreLines = tailFileProcess(tf, true);

          if (hasMoreLines) {
            status = Status.READY;
          }
        }
      }
      closeTailFiles();
    } catch (Throwable t) {
      logger.error("Unable to tail files", t);
      sourceCounter.incrementEventReadFail();
      status = Status.BACKOFF;
    }
    return status;
  }

在这里面,核心的方法是:

boolean hasMoreLines = tailFileProcess(tf, true);

batchSize 代表一个事务有多少个 Event .

org.apache.flume.source.taildir.ReliableTaildirEventReader#readEvents

public List<Event> readEvents(int numEvents, boolean backoffWithoutNL)
      throws IOException {

    //todo 如果有未提交的任务,代表之前的任务失败了,进行回滚操作.
    if (!committed) {
      if (currentFile == null) {
        throw new IllegalStateException("current file does not exist. " + currentFile.getPath());
      }
      logger.info("Last read was never committed - resetting position");
      long lastPos = currentFile.getPos();
      currentFile.updateFilePos(lastPos);
    }

    //todo 读取数据
    List<Event> events = currentFile.readEvents(numEvents, backoffWithoutNL, addByteOffset);

    if (events.isEmpty()) {
      return events;
    }



    Map<String, String> headers = currentFile.getHeaders();

    //todo 是否添加头部信息
    if (annotateFileName || (headers != null && !headers.isEmpty())) {

      for (Event event : events) {
        if (headers != null && !headers.isEmpty()) {
          event.getHeaders().putAll(headers);
        }
        if (annotateFileName) {
          event.getHeaders().put(fileNameHeader, currentFile.getPath());
        }
      }


    }
    committed = false;
    return events;
  }

org.apache.flume.source.taildir.TaildirSource#readEvents

public List<Event> readEvents(int numEvents, boolean backoffWithoutNL,
      boolean addByteOffset) throws IOException {
    List<Event> events = Lists.newLinkedList();
    for (int i = 0; i < numEvents; i++) {
      //todo 读取数据 ,并转换为 Event
      Event event = readEvent(backoffWithoutNL, addByteOffset);
      if (event == null) {
        break;
      }
      events.add(event);
    }
    return events;
  }

org.apache.flume.source.taildir.TailFile#readEvent

private Event readEvent(boolean backoffWithoutNL, boolean addByteOffset) throws IOException {

    Long posTmp = getLineReadPos();

    //todo 按字节读取数据, 然后根据换行符  BYTE_NL = 10 , 截取一行数据, 返回二进制数据.
    LineResult line = readLine();

    if (line == null) {
      return null;
    }

    //todo 过滤掉已经读取的数据.
    if (backoffWithoutNL && !line.lineSepInclude) {
      logger.info("Backing off in file without newline: "
          + path + ", inode: " + inode + ", pos: " + raf.getFilePointer());

      updateFilePos(posTmp);

      return null;
    }

    Event event = EventBuilder.withBody(line.line);

    // todo 是否要增加偏移量
    if (addByteOffset == true) {

      event.getHeaders().put(BYTE_OFFSET_HEADER_KEY, posTmp.toString());

    }
    return event;

  }

在这里说一下 evnet 数据结构吧.

Event 的实例是 SimpleEvent 包含 headers 和 body 两部分.

headers, 头信息, 是一个 HashMap 数据结构.

body , 数据内容, 是一个 byte 数据结构. 里面为一行数据.

到这里,我们已经拿到了文件数据, 是一个 event 的集合. 需要放到 channel 里面.

getChannelProcessor().processEventBatch(events);

调用的是

org.apache.flume.channel.ChannelProcessor.processEventBatch

嗯嗯,这个方法有点长, 其实就就根据 event 的不同, 将数据分发到不同的 channel 中. 然后等待 sink 进行消费.

 /**
   * Attempts to {@linkplain Channel#put(Event) put} the given events into each
   * configured channel. If any {@code required} channel throws a
   * {@link ChannelException}, that exception will be propagated.
   * <p>
   * <p>Note that if multiple channels are configured, some {@link Transaction}s
   * may have already been committed while others may be rolled back in the
   * case of an exception.
   *
   * @param events A list of events to put into the configured channels.
   * @throws ChannelException when a write to a required channel fails.
   */
  public void processEventBatch(List<Event> events) {
    Preconditions.checkNotNull(events, "Event list must not be null");

    events = interceptorChain.intercept(events);

    Map<Channel, List<Event>> reqChannelQueue =
        new LinkedHashMap<Channel, List<Event>>();

    Map<Channel, List<Event>> optChannelQueue =
        new LinkedHashMap<Channel, List<Event>>();

    for (Event event : events) {
      List<Channel> reqChannels = selector.getRequiredChannels(event);

      for (Channel ch : reqChannels) {
        List<Event> eventQueue = reqChannelQueue.get(ch);
        if (eventQueue == null) {
          eventQueue = new ArrayList<Event>();
          reqChannelQueue.put(ch, eventQueue);
        }
        eventQueue.add(event);
      }

      List<Channel> optChannels = selector.getOptionalChannels(event);

      for (Channel ch : optChannels) {
        List<Event> eventQueue = optChannelQueue.get(ch);
        if (eventQueue == null) {
          eventQueue = new ArrayList<Event>();
          optChannelQueue.put(ch, eventQueue);
        }

        eventQueue.add(event);
      }
    }

    // Process required channels
    for (Channel reqChannel : reqChannelQueue.keySet()) {
      Transaction tx = reqChannel.getTransaction();
      Preconditions.checkNotNull(tx, "Transaction object must not be null");
      try {
        tx.begin();

        List<Event> batch = reqChannelQueue.get(reqChannel);

        for (Event event : batch) {
          reqChannel.put(event);
        }

        tx.commit();
      } catch (Throwable t) {
        tx.rollback();
        if (t instanceof Error) {
          LOG.error("Error while writing to required channel: " + reqChannel, t);
          throw (Error) t;
        } else if (t instanceof ChannelException) {
          throw (ChannelException) t;
        } else {
          throw new ChannelException("Unable to put batch on required " +
              "channel: " + reqChannel, t);
        }
      } finally {
        if (tx != null) {
          tx.close();
        }
      }
    }

    // Process optional channels
    for (Channel optChannel : optChannelQueue.keySet()) {
      Transaction tx = optChannel.getTransaction();
      Preconditions.checkNotNull(tx, "Transaction object must not be null");
      try {
        tx.begin();

        List<Event> batch = optChannelQueue.get(optChannel);

        for (Event event : batch) {
          optChannel.put(event);
        }

        tx.commit();
      } catch (Throwable t) {
        tx.rollback();
        LOG.error("Unable to put batch on optional channel: " + optChannel, t);
        if (t instanceof Error) {
          throw (Error) t;
        }
      } finally {
        if (tx != null) {
          tx.close();
        }
      }
    }
  }