java级联添加_java – 在级联中构建自定义连接逻辑,仅确保MAP_SIDE

解决这个问题的最好方法(我可以想到)是修改你的较小的数据集.您可以向较小的数据集添加新的字段(F1DecidingFactor). F1Result的价值可以如下:

Sudo代码

if F1DecidingFactor == "Yes" then

F1Result = ACTUAL_VALUE

else

F1Result = "N/A"

结果表

|F1#Join|F1#Result|F1#DecidingFactor|

| Yes| 0| True|

| Yes| 1| False|

| No| 0| N/A|

| No| 1| N/A|

您也可以通过级联进行上述操作.

之后,您可以进行地图侧的加入.

如果修改较小的数据集是不可能的,那么我有2个选项来解决问题.

选项1

将新的字段添加到您的小管道,这相当于您决定的因素(即F1DecidingFactor_RHS =是).然后将其包含在您的加入条件中.一旦你的加入完成,你将只有这些条件匹配的那些行的值.否则将为空/空白.示例代码:

主班

import cascading.operation.Insert;

import cascading.pipe.Each;

import cascading.pipe.HashJoin;

import cascading.pipe.Pipe;

import cascading.pipe.assembly.Discard;

import cascading.pipe.joiner.LeftJoin;

import cascading.tuple.Fields;

public class StackHashJoinTestOption2 {

public StackHashJoinTestOption2() {

Fields f1Input = new Fields("F1Input");

Fields f2Input = new Fields("F2Input");

Fields f1Join = new Fields("F1Join");

Fields f2Join = new Fields("F2Join");

Fields f1DecidingFactor = new Fields("F1DecidingFactor");

Fields f2DecidingFactor = new Fields("F2DecidingFactor");

Fields f1DecidingFactorRhs = new Fields("F1DecidingFactor_RHS");

Fields f2DecidingFactorRhs = new Fields("F2DecidingFactor_RHS");

Fields lhsJoinerOne = f1DecidingFactor.append(f1Input);

Fields lhsJoinerTwo = f2DecidingFactor.append(f2Input);

Fields rhsJoinerOne = f1DecidingFactorRhs.append(f1Join);

Fields rhsJoinerTwo = f2DecidingFactorRhs.append(f2Join);

Fields functionFields = new Fields("F1DecidingFactor","F1Output","F2DecidingFactor","F2Output");

// Large Pipe fields :

// F1DecidingFactor F1Input F2DecidingFactor F2Input

Pipe largePipe = new Pipe("large-pipe");

// Small Pipe 1 Fields :

// F1Join F1Result

Pipe rhsOne = new Pipe("small-pipe-1");

// New field to small pipe. Expected Fields:

// F1Join F1Result F1DecidingFactor_RHS

rhsOne = new Each(rhsOne,new Insert(f1DecidingFactorRhs,"Yes"),Fields.ALL);

// Small Pipe 2 Fields :

// F2Join F2Result

Pipe rhsTwo = new Pipe("small-pipe-2");

// New field to small pipe. Expected Fields:

// F2Join F2Result F2DecidingFactor_RHS

rhsTwo = new Each(rhsTwo,Fields.ALL);

// Joining first small pipe. Expected fields after join:

// F1DecidingFactor F1Input F2DecidingFactor F2Input F1Join F1Result F1DecidingFactor_RHS

Pipe resultsOne = new HashJoin(largePipe,lhsJoinerOne,rhsOne,rhsJoinerOne,new LeftJoin());

// Joining second small pipe. Expected fields after join:

// F1DecidingFactor F1Input F2DecidingFactor F2Input F1Join F1Result F1DecidingFactor_RHS F2Join F2Result F2DecidingFactor_RHS

Pipe resultsTwo = new HashJoin(resultsOne,lhsJoinerTwo,rhsTwo,rhsJoinerTwo,new LeftJoin());

Pipe result = new Each(resultsTwo,functionFields,new TestFunction(),Fields.REPLACE);

result = new Discard(result,f1DecidingFactorRhs);

result = new Discard(result,f2DecidingFactorRhs);

// result Pipe should have expected result

}

}

选项2

如果要使用默认值而不是null / blank,那么建议先使用默认的Joiners进行HashJoin,然后使用一个函数来更新具有适当值的元组.就像是:

主班

import cascading.pipe.Each;

import cascading.pipe.HashJoin;

import cascading.pipe.Pipe;

import cascading.pipe.joiner.LeftJoin;

import cascading.tuple.Fields;

public class StackHashJoinTest {

public StackHashJoinTest() {

Fields f1Input = new Fields("F1Input");

Fields f2Input = new Fields("F2Input");

Fields f1Join = new Fields("F1Join");

Fields f2Join = new Fields("F2Join");

Fields functionFields = new Fields("F1DecidingFactor","F2Output");

// Large Pipe fields :

// F1DecidingFactor F1Input F2DecidingFactor F2Input

Pipe largePipe = new Pipe("large-pipe");

// Small Pipe 1 Fields :

// F1Join F1Result

Pipe rhsOne = new Pipe("small-pipe-1");

// Small Pipe 2 Fields :

// F2Join F2Result

Pipe rhsTwo = new Pipe("small-pipe-2");

// Joining first small pipe.

// Expected fields after join:

// F1DecidingFactor F1Input F2DecidingFactor F2Input F1Join F1Result

Pipe resultsOne = new HashJoin(largePipe,f1Input,f1Join,new LeftJoin());

// Joining second small pipe.

// Expected fields after join:

// F1DecidingFactor F1Input F2DecidingFactor F2Input F1Join F1Result F2Join F2Result

Pipe resultsTwo = new HashJoin(resultsOne,f2Input,f2Join,Fields.REPLACE);

// result Pipe should have expected result

}

}

更新功能

import cascading.flow.FlowProcess;

import cascading.operation.BaSEOperation;

import cascading.operation.Function;

import cascading.operation.FunctionCall;

import cascading.tuple.Fields;

import cascading.tuple.TupleEntry;

public class TestFunction extends BaSEOperation implements Function {

private static final long serialVersionUID = 1L;

private static final String DECIDING_FACTOR = "No";

private static final String DEFAULT_VALUE = "N/A";

// Expected Fields: "F1DecidingFactor","F2Output"

public TestFunction() {

super(Fields.ARGS);

}

@Override

public void operate(@SuppressWarnings("rawtypes") FlowProcess process,FunctionCall call) {

TupleEntry arguments = call.getArguments();

TupleEntry result = new TupleEntry(arguments);

if (result.getString("F1DecidingFactor").equalsIgnoreCase(DECIDING_FACTOR)) {

result.setString("F1Output",DEFAULT_VALUE);

}

if (result.getString("F2DecidingFactor").equalsIgnoreCase(DECIDING_FACTOR)) {

result.setString("F2Output",DEFAULT_VALUE);

}

call.getOutputCollector().add(result);

}

}

参考

这应该可以解决你的问题.让我知道这是否有帮助.

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值