业务流程为 Ldap
拉取数据,kettle处理好后插入es中
下图为整体的流程
处理到空操作步骤的数据如下
接下来需要根据resource_id 和relation_resource_id 的关系将数据处理为
在处理数据时需要递归进行处理
先使用java代码统计数据的总条数
具体代码如下
long rows = 0L;
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
if (first) {
first = false;
}
Object[] r = getRow();
if (r == null) {
setOutputDone();
return false;
}
r = createOutputRow(r, data.outputRowMeta.size());
get(Fields.Out, "rows").setValue(r, ++rows);
String foobar = get(Fields.In, "a_fieldname").getString(r);
foobar += "bar";
get(Fields.Out, "output_fieldname").setValue(r, foobar);
// Send the row on to the next step.
putRow(data.outputRowMeta, r);
return true;
}
这部之后会添加一行属性 rows
最大值即为数据的条目数,通过rows进行排序,将最大值放在第一列
这样前期的处理就完成了,通过java代码递归处理数据
具体代码
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
int rows = 0;
int totalRows = 0;
String ownerId = null;
String ownerType = null;
List relationList = new ArrayList();
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi) throws KettleException {
if (first) {
first = false;
/* TODO: Your code here. (Using info fields)
FieldHelper infoField = get(Fields.Info, "info_field_name");
RowSet infoStream = findInfoRowSet("info_stream_tag");
Object[] infoRow = null;
int infoRowCount = 0;
// Read all rows from info step before calling getRow() method, which returns first row from any
// input rowset. As rowMeta for info and input steps varies getRow() can lead to errors.
while((infoRow = getRowFrom(infoStream)) != null){
// do something with info data
infoRowCount++;
}
*/
}
Object[] r = getRow();
if (r == null) {
setOutputDone();
return false;
}
//String resourceName = get(Fields.In,"resource_name").getString(r);
//logBasic("resourceName:"+resourceName);
// It is always safest to call createOutputRow() to ensure that your output row's Object[] is large
// enough to handle any new fields you are creating in this step.
r = createOutputRow(r, data.outputRowMeta.size());
Map map = new HashMap();
map.put("resourceId",get(Fields.In,"resource_id").getString(r));
map.put("resourceName",get(Fields.In,"resource_name").getString(r));
map.put("relationResourceId",get(Fields.In,"relation_resource_id").getString(r));
map.put("relationResourceName",get(Fields.In,"relation_resource_name").getString(r));
map.put("resourceType",get(Fields.In,"resource_type").getString(r));
map.put("relation",get(Fields.In,"relation").getString(r));
map.put("relationResourceType",get(Fields.In,"relation_resource_type").getString(r));
map.put("type",get(Fields.In,"type").getString(r));
map.put("rowObject",r);
//logBasic("map:"+map);
relationList.add(map);
//logBasic("listSize:"+relationList.size());
if (totalRows < Integer.parseInt(get(Fields.In,"rows").getString(r))){
totalRows = Integer.parseInt(get(Fields.In,"rows").getString(r));
}
//logBasic("totalRows:"+totalRows);
if(totalRows == relationList.size()){
logBasic("开始递归处理数据");
map = null;
processRelationDate(map);
}
/* TODO: Your code here. (See Sample)
// Get the value from an input field
String foobar = get(Fields.In, "a_fieldname").getString(r);
foobar += "bar";
// Set a value in a new output field
get(Fields.Out, "output_fieldname").setValue(r, foobar);
*/
// Send the row on to the next step.
//putRow(data.outputRowMeta, r);
return true;
}
public void processRelationDate(Map rowMap) throws KettleException {
if (rowMap == null){
for(int i = 0; i < relationList.size(); i++){
Map map = (Map)relationList.get(i);
Object[] r = (Object[])map.get("rowObject");
//logBasic("map:"+map);
ownerId = (String)map.get("resourceId");
//logBasic("ownerId:"+ownerId);
ownerType = (String)map.get("type");
//logBasic("ownerType:"+ownerType);
get(Fields.Out, "ownerId").setValue(r, ownerId);
get(Fields.Out, "ownerType").setValue(r, ownerType);
putRow(data.outputRowMeta, r);
map.put("ownerId",ownerId);
map.put("ownerType",ownerType);
processRelationDate(map);
}
}else{
//logBasic("开始递归");
String relationResourceId = (String)rowMap.get("relationResourceId");
//logBasic("relationResourceId:"+relationResourceId);
for(int i = 0; i < relationList.size(); i++){
Map map = (Map)relationList.get(i);
String resourceId = (String)map.get("resourceId");
if(relationResourceId.equals(resourceId)){
//logBasic("进入递归");
Object[] r = (Object[])map.get("rowObject");
ownerId = (String)rowMap.get("ownerId");
ownerType = (String)rowMap.get("ownerType");
get(Fields.Out, "ownerId").setValue(r, ownerId);
get(Fields.Out, "ownerType").setValue(r, ownerType);
putRow(data.outputRowMeta,(Object[])map.get("rowObject"));
if ((String)rowMap.get("type") == "1"){
map.put("ownerId",ownerId);
map.put("ownerType",ownerType);
processRelationDate(map);
}
}
}
}
}
这样数据就处理完成了,再选择需要的字段,将其插入到es中即可