java实现re_Java JavaRDD.repartition方法代码示例

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类

@Override

public JavaRDD parseQuads(String path) {

Configuration conf = new Configuration();

Integer batchSize = config.getBatchSize();

conf.set(NLineInputFormat.LINES_PER_MAP, batchSize.toString());

if (config.getErrorHandling() == ParseErrorHandling.Throw) {

conf.set(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, "false");

} else {

conf.set(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, "true");

}

Boolean isLineBased = config.getLineBasedFormat();

if (isLineBased == null) {

isLineBased = guessIsLineBasedFormat(path);

}

JavaRDD quads;

Integer partitions = config.getRepartition();

if (isLineBased) {

log.info("Parsing RDF in parallel with batch size: {}", batchSize);

quads = sc.newAPIHadoopFile(path,

NQuadsInputFormat.class,

LongWritable.class, // position

QuadWritable.class, // value

conf).values().map(QuadWritable::get);

} else {

// let Jena guess the format, load whole files

log.info("Input format is not line based, parsing RDF by Master node only.");

quads = sc.newAPIHadoopFile(path,

TriplesOrQuadsInputFormat.class,

LongWritable.class, // position

QuadWritable.class, // value

conf).values().map(QuadWritable::get);

if (partitions == null) {

log.warn("Reading non-line based formats by master node only, consider setting --parsing.repartition to redistribute work to other nodes.");

}

}

if (partitions != null) {

log.info("Distributing workload, repartitioning into {} partitions", partitions);

quads = quads.repartition(partitions);

}

final List acceptedLanguages = config.getAcceptedLanguages();

// if only some languages are accepted

if (!acceptedLanguages.isEmpty()) {

// filter out literals of unsupported languages

quads = quads.filter(quad ->

!quad.getObject().isLiteral() ||

quad.getObject().getLiteralLanguage() == null ||

quad.getObject().getLiteralLanguage().isEmpty() ||

acceptedLanguages.contains(quad.getObject().getLiteralLanguage())

);

}

return quads;

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值