之前一段时间偶尔会收到 hadoop 集群的 nagios 监控报警,具体报警是几个 resource-manager 节点一直负载超过阀值.找了个空闲时间分析了一下 job-history,发现是一个小伙伴的 job在段时间内创建了大量 map-task 导致的,在解决问题后做个笔记备忘
首先分析 job-history 的统计数据
- 可以发现 map 任务执行的时间很短,但是同时会有大量的 map 任务
- 与小伙伴沟通后,了解到他的 job 是根据运营侧需求,本周都在应用埋点日志中提取指定按钮的点击计数
- 应用埋点的日志记录了每个用户的所有访问路径和参数
1) log-agent 通过 logback 将日志记录到本地
2)每小时生成一个 gz 压缩包,并上传至 hdfs 指定目录(根据应用标识+ip 生产目录规则)
根据 hadoop 的 map split 机制我们可以得出如下结论
- 每个 inputfile 会对应多个 map split(根据 hdfs 的 block zise切分)
- 每个map split会对应一个 map task
- 由于每个小时生成的 gz 文件均未超过hdfs block zise(128m)
- 小伙伴要统计的集群中有三个应用节点,排除凌晨时段没有日志产出的情况,大概一周的日志文件树=24*7*3~=350
解决问题
- 我们知道 hadoop中是可以利用CombineFileInputFormat来合并大量小文件输入,提高 map 性能的.
- 但默认实现只提供了CombineSequenceFileInputFormat和CombineTextFileInputFormat,没有压缩文件的支持.
- 所以这里要实现自定义的CombineFileInputFormat来解决该问题
自定义CompressedCombineFileInputFormat
package ctu.components.amada.hadoop.usertrace;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader;
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
import java.io.IOException;
/**
* Created by pippo on 14/12/23.
*/
public class CompressedCombineFileInputFormat extends CombineFileInputFormat<CompressedRecordKey, Text> {
public RecordReader<CompressedRecordKey, Text> createRecordReader(InputSplit split, TaskAttemptContext context)
throws IOException {
return new CombineFileRecordReader<>((CombineFileSplit) split,
context,
CompressedCombineFileRecordReader.class);
}
}
CompressedCombineFileRecordReader
package ctu.components.amada.hadoop.usertrace;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
import org.apache.hadoop.util.LineReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
* Created by pippo on 14/12/23.
*/
public class CompressedCombineFileRecordReader extends RecordReader<CompressedRecordKey, Text> {
private long offset;
private long end;
private long pos;
private CompressedRecordKey key;
private Text value = new Text();
private CompressTrunk trunk;
private LineReader reader;
public CompressedCombineFileRecordReader(CombineFileSplit split, TaskAttemptContext context, Integer index)
throws IOException {
/*多个压缩文件合并为一个ombine file, 那么实际的压缩文件就是file中的一个trunk*/
this.trunk = new CompressTrunk(context.getConfiguration(), split.getPath(index));
/*trunk在combine 中的起始位置*/
this.offset = split.getOffset(index);
/* trunk在combine file中的结束位置*/
this.end = offset + (trunk.isCompress() ? trunk.getFileLength() : split.getLength(index));
boolean skipFirstLine = false;
FSDataInputStream in = trunk.open();
if (offset != 0) {
skipFirstLine = true;
--offset;
in.seek(offset);
}
reader = new LineReader(trunk.open());
// skip first line and re-establish "offset".
if (skipFirstLine) {
offset += reader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - offset));
}
this.pos = offset;
}
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
}
@Override
public void close() throws IOException {
trunk.close();
}
@Override
public float getProgress() throws IOException {
if (offset == end) {
return 0.0f;
} else {
return Math.min(1.0f, (pos - offset) / (float) (end - offset));
}
}
@Override
public boolean nextKeyValue() throws IOException {
initKey();
initValue();
//指定当前记录的读取起始位置
key.offset = pos;
int readed = 0;
//读取一条记录
if (pos < end) {
readed = reader.readLine(value);
pos += readed;
}
//如果没有读到任何记录,说明当前 trunk 已经没有更多记录了
if (readed == 0) {
key = null;
value = null;
return false;
} else {
return true;
}
}
private void initKey() {
if (key == null) {
key = new CompressedRecordKey();
key.fileName = trunk.getFileName();
}
}
private void initValue() {
if (value == null) {
value = new Text();
}
}
@Override
public CompressedRecordKey getCurrentKey() throws IOException, InterruptedException {
return key;
}
@Override
public Text getCurrentValue() throws IOException, InterruptedException {
return value;
}
public static class CompressTrunk {
public CompressTrunk(Configuration configuration, Path compressFile) throws IOException {
this.configuration = configuration;
this.compressFile = compressFile;
this.fs = compressFile.getFileSystem(configuration);
this.factory = new CompressionCodecFactory(configuration);
this.codec = factory.getCodec(compressFile);
prepareReadableFile();
}
/*将trunk解压缩到一个临时目录,并提供inputStream供读取*/
protected void prepareReadableFile() throws IOException {
if (!isCompress()) {
readableFile = compressFile;
return;
}
String _readFile = CompressionCodecFactory.removeSuffix(compressFile.toString(),
codec.getDefaultExtension());
readableFile = new Path(_readFile);
InputStream in = null;
OutputStream out = null;
try {
in = codec.createInputStream(fs.open(compressFile));
out = fs.create(readableFile);
IOUtils.copyBytes(in, out, configuration);
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
private Configuration configuration;
//源文件
private Path compressFile;
//解压后的文件
private Path readableFile;
private FileSystem fs;
private CompressionCodecFactory factory;
private CompressionCodec codec;
public boolean isCompress() {
return codec != null;
}
public String getFileName() {
return readableFile.getName();
}
public long getFileLength() throws IOException {
return fs.getFileStatus(readableFile).getLen();
}
private FSDataInputStream in;
public FSDataInputStream open() throws IOException {
if (in == null) {
in = fs.open(readableFile);
}
return in;
}
//处理完毕后删除临时文件
public void close() throws IOException {
if (in != null) {
in.close();
}
if (isCompress()) {
fs.delete(readableFile, false);
}
}
}
}
CompressedRecordKey
package ctu.components.amada.hadoop.usertrace;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/**
* Created by pippo on 14/12/23.
*/
public class CompressedRecordKey implements WritableComparable {
//记录所在的文件
public String fileName;
//记录在文件中所在的位置
public long offset;
public CompressedRecordKey() {
super();
}
@Override
public void readFields(DataInput in) throws IOException {
this.offset = in.readLong();
this.fileName = in.readUTF();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeLong(offset);
out.writeUTF(fileName);
}
@Override
public int compareTo(Object o) {
CompressedRecordKey that = (CompressedRecordKey) o;
int f = this.fileName.compareTo(that.fileName);
if (f == 0) {
return (int) Math.signum((double) (this.offset - that.offset));
}
return f;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof CompressedRecordKey) {
return this.compareTo(obj) == 0;
}
return false;
}
@Override
public int hashCode() {
final int hashPrime = 47;
int hash = 13;
hash = hashPrime * hash + (this.fileName != null ? this.fileName.hashCode() : 0);
hash = hashPrime * hash + (int) (this.offset ^ (this.offset >>> 16));
return hash;
}
@Override
public String toString() {
return this.fileName + "-" + this.offset;
}
}
JOB 配置
private void buildMapper(Job job) {
job.setInputFormatClass(CompressedCombineFileInputFormat.class);
job.setMapperClass(LogMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(UserTrace.class);
job.setCombinerClass(LogCombiner.class);
}
验证结果
- 如图所示所有的输入被合并为三个map task处理
- 共处理了3.7G的数据(File:Number of bytes read)/解压后37G(HDFS:Number of bytes read)
新的问题
- CombineFileInputFormat没有split,导致只有三个map taks
- 每个map task的输出文件过大,shuffle 消耗了1个多小时
问题定位
通过查看 hadoop 源码发现,在使用CombineFileInputFormat时,如果没有显示设定CombineFileInputFormat.SPLIT_MAXSIZE,那么在一个 hadoop node 上只会有一个 split
问题解决
将CombineFileInputFormat.SPLIT_MAXSIZE设置为和 hadoop 的 block size 一样大小
结果检验
- 处理4.5g 的日志(解压后45g) 共耗时20分钟
- 其中 map 处理1.4亿条记录耗时5分51秒
- map 的 output 进行 lz4压缩,shuffle 的时间缩短到11分钟