mapreduce中combiner执行情况
- combiner在mapreduce过程中可能在溢出和规并时执行:源码见类【MapTask】
- 溢出时combiner执行情况源码:OutputCollector => spill
private void sortAndSpill() throws IOException, ClassNotFoundException, InterruptedException {
if (this.combinerRunner == null) {
for(DataInputBuffer key = new DataInputBuffer(); spindex < mend && this.kvmeta.get(this.offsetFor(spindex % this.maxRec) + 2) == i; ++spindex) {
int kvoff = this.offsetFor(spindex % this.maxRec);
int keystart = this.kvmeta.get(kvoff + 1);
int valstart = this.kvmeta.get(kvoff + 0);
key.reset(this.kvbuffer, keystart, valstart - keystart);
this.getVBytesForOffset(kvoff, value);
writer.append(key, value);
}
} else {
int spstart;
for(spstart = spindex; spindex < mend && this.kvmeta.get(this.offsetFor(spindex % this.maxRec) + 2) == i; ++spindex) {
;
}
if (spstart != spindex) {
this.combineCollector.setWriter(writer);
RawKeyValueIterator kvIter = new MapTask.MapOutputBuffer.MRResultIterator(spstart, spindex);
this.combinerRunner.combine(kvIter, this.combineCollector);
}
}
writer.close();
}
- 规并时combiner执行情况源码:partitioners => merge
private void mergeParts() throws IOException, InterruptedException, ClassNotFoundException{
if (this.combinerRunner != null && this.numSpills >= this.minSpillsForCombine) {
this.combineCollector.setWriter(writer);
this.combinerRunner.combine(kvIter, this.combineCollector);
} else {
Merger.writeFile(kvIter, writer, this.reporter, this.job);
}
writer.close();
}