kettle实现递归处理数据

业务流程为 Ldap拉取数据,kettle处理好后插入es中

下图为整体的流程
在这里插入图片描述
处理到空操作步骤的数据如下
在这里插入图片描述
接下来需要根据resource_id 和relation_resource_id 的关系将数据处理为

在这里插入图片描述
在处理数据时需要递归进行处理

先使用java代码统计数据的总条数
在这里插入图片描述
具体代码如下

	long rows = 0L;
	public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
	  if (first) {
	    first = false;
	  }
	
	  Object[] r = getRow();
	
	  if (r == null) {
	    setOutputDone();
	    return false;
	  }
	  r = createOutputRow(r, data.outputRowMeta.size());
	  get(Fields.Out, "rows").setValue(r, ++rows);
	  String foobar = get(Fields.In, "a_fieldname").getString(r);
	  foobar += "bar";
	  get(Fields.Out, "output_fieldname").setValue(r, foobar);
	  // Send the row on to the next step.
	  putRow(data.outputRowMeta, r);
	
	  return true;
	}

这部之后会添加一行属性 rows 最大值即为数据的条目数,通过rows进行排序,将最大值放在第一列

在这里插入图片描述
这样前期的处理就完成了,通过java代码递归处理数据

在这里插入图片描述
具体代码

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

int rows = 0;
int totalRows = 0;
String ownerId = null;
String ownerType = null;
List relationList = new ArrayList();
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
  if (first) {
    first = false;

    /* TODO: Your code here. (Using info fields)

    FieldHelper infoField = get(Fields.Info, "info_field_name");

    RowSet infoStream = findInfoRowSet("info_stream_tag");

    Object[] infoRow = null;

    int infoRowCount = 0;

    // Read all rows from info step before calling getRow() method, which returns first row from any
    // input rowset. As rowMeta for info and input steps varies getRow() can lead to errors.
    while((infoRow = getRowFrom(infoStream)) != null){

      // do something with info data
      infoRowCount++;
    }
    */
  }

  Object[] r = getRow();

  if (r == null) {
    setOutputDone();
    return false;
  }
  //String resourceName = get(Fields.In,"resource_name").getString(r);
  //logBasic("resourceName:"+resourceName);
  // It is always safest to call createOutputRow() to ensure that your output row's Object[] is large
  // enough to handle any new fields you are creating in this step.
  r = createOutputRow(r, data.outputRowMeta.size());
  
  Map map = new HashMap();
  map.put("resourceId",get(Fields.In,"resource_id").getString(r));
  map.put("resourceName",get(Fields.In,"resource_name").getString(r));
  map.put("relationResourceId",get(Fields.In,"relation_resource_id").getString(r));
  map.put("relationResourceName",get(Fields.In,"relation_resource_name").getString(r));
  map.put("resourceType",get(Fields.In,"resource_type").getString(r));
  map.put("relation",get(Fields.In,"relation").getString(r));
  map.put("relationResourceType",get(Fields.In,"relation_resource_type").getString(r));
  map.put("type",get(Fields.In,"type").getString(r));
  map.put("rowObject",r);
  //logBasic("map:"+map);
  relationList.add(map);
  //logBasic("listSize:"+relationList.size());
  
  if (totalRows < Integer.parseInt(get(Fields.In,"rows").getString(r))){
    totalRows = Integer.parseInt(get(Fields.In,"rows").getString(r));
  }
  //logBasic("totalRows:"+totalRows);

  if(totalRows == relationList.size()){
    logBasic("开始递归处理数据");
    map = null;
    processRelationDate(map);
  }
  /* TODO: Your code here. (See Sample)

  // Get the value from an input field
  String foobar = get(Fields.In, "a_fieldname").getString(r);

  foobar += "bar";
    
  // Set a value in a new output field
  get(Fields.Out, "output_fieldname").setValue(r, foobar);

  */
  // Send the row on to the next step.
  //putRow(data.outputRowMeta, r);

  return true;
}

public void processRelationDate(Map rowMap) throws KettleException {
  if (rowMap == null){
    for(int i = 0; i < relationList.size(); i++){
      Map map = (Map)relationList.get(i);
      Object[] r = (Object[])map.get("rowObject");
      //logBasic("map:"+map);
      ownerId = (String)map.get("resourceId");
      //logBasic("ownerId:"+ownerId);
      ownerType = (String)map.get("type");
      //logBasic("ownerType:"+ownerType);
      get(Fields.Out, "ownerId").setValue(r, ownerId);
      get(Fields.Out, "ownerType").setValue(r, ownerType);
      putRow(data.outputRowMeta, r);
      map.put("ownerId",ownerId);
      map.put("ownerType",ownerType);
      processRelationDate(map);
    }
  }else{
    //logBasic("开始递归");
    String relationResourceId = (String)rowMap.get("relationResourceId");
    //logBasic("relationResourceId:"+relationResourceId);
    for(int i = 0; i < relationList.size(); i++){
      Map map = (Map)relationList.get(i);
      String resourceId = (String)map.get("resourceId");
      if(relationResourceId.equals(resourceId)){
        //logBasic("进入递归");
        Object[] r = (Object[])map.get("rowObject");
        ownerId = (String)rowMap.get("ownerId");
        ownerType = (String)rowMap.get("ownerType");
        get(Fields.Out, "ownerId").setValue(r, ownerId);
        get(Fields.Out, "ownerType").setValue(r, ownerType);
        putRow(data.outputRowMeta,(Object[])map.get("rowObject"));
        if ((String)rowMap.get("type") == "1"){
          map.put("ownerId",ownerId);
          map.put("ownerType",ownerType);
          processRelationDate(map);
        }
      }
    }
  }
}

这样数据就处理完成了,再选择需要的字段,将其插入到es中即可

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值