java pair map,Java JavaPairRDD.flatMapToPair方法代码示例

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类

private static JavaPairRDD interleaveReads(String fastq, String fastq2, int splitlen, JavaSparkContext sc) throws IOException {

FileSystem fs = FileSystem.get(new Configuration());

FileStatus fst = fs.getFileStatus(new Path(fastq));

FileStatus fst2 = fs.getFileStatus(new Path(fastq2));

List nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);

List nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

JavaRDD splitRDD = sc.parallelize(nlif);

JavaRDD splitRDD2 = sc.parallelize(nlif2);

JavaPairRDD zips = splitRDD.zip(splitRDD2);

return zips.flatMapToPair( splits -> {

FastqInputFormat.FastqRecordReader fqreader = new FastqInputFormat.FastqRecordReader(new Configuration(), splits._1);

FastqInputFormat.FastqRecordReader fqreader2 = new FastqInputFormat.FastqRecordReader(new Configuration(), splits._2);

ArrayList> reads = new ArrayList>();

while (fqreader.nextKeyValue()) {

String key = fqreader.getCurrentKey().toString();

String[] keysplit = key.split(" ");

key = keysplit[0];

SequencedFragment sf = new SequencedFragment();

sf.setQuality(new Text(fqreader.getCurrentValue().getQuality().toString()));

sf.setSequence(new Text(fqreader.getCurrentValue().getSequence().toString()));

if (fqreader2.nextKeyValue()) {

String key2 = fqreader2.getCurrentKey().toString();

String[] keysplit2 = key2.split(" ");

key2 = keysplit2[0];

//key2 = key2.replace(" 2:N:0:1","/2");

SequencedFragment sf2 = new SequencedFragment();

sf2.setQuality(new Text(fqreader2.getCurrentValue().getQuality().toString()));

sf2.setSequence(new Text(fqreader2.getCurrentValue().getSequence().toString()));

reads.add(new Tuple2(new Text(key), sf));

reads.add(new Tuple2(new Text(key2), sf2));

}

}

return reads.iterator();

});

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值