本文定义的InputFormat主要用于处理不需要分割的文件(例如视频,音频等),并将并将key值定为文件名,value值定为文件路径
代码如下:
public class videoInputFormat extends FileInputFormat<Text,Text>{
private static class videoRecordReader extends RecordReader<Text,Text>{
//private static final Log LOG = LogFactory.getLog(videoRecordReader.class);
private long start;
private long pos;
private long end;
private long length;
private String videoName;
private String videoPath;
private Text key=null;
private Text value=null;
private FSDataInputStream fileIn;
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
FileSplit split=(FileSplit)genericSplit;
Configuration job=new Configuration();
start=split.getStart();
end=start+split.getLength();//split.getLength?
length=split.getLength();
final Path path=split.getPath();
videoName=path.getName();
videoPath=path.toString();
FileSystem fs=path.getFileSystem(job);
fileIn=fs.open(split.getPath());
fileIn.seek(start);
this.pos=start;
}
@Override
public void close() throws IOException {
// TODO Auto-generated method stub
if(fileIn!=null)
fileIn.close();
}
@Override
public Text getCurrentKey() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return key;
}
@Override
public Text getCurrentValue() throws IOException, InterruptedException {
// TODO Auto-generated method stub
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
// TODO Auto-generated method stub
if (start == end) {
return 0.0f;
} else {
return Math.min(1.0f, (pos - start) / (float)(end - start));
}
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
// TODO Auto-generated method stub
//return false;
while(pos<end){
if(key==null){
key=new Text();
}
key.set(videoName);
if(value==null){
value=new Text();
}
value.set(videoPath);
pos+=length;
return true;
}
return false;
}
}
@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split,
TaskAttemptContext context) throws IOException, InterruptedException {
// TODO Auto-generated method stub
return new videoRecordReader();
}
protected boolean isSplitable(JobContext context, Path file) {
return false;
}
}
注:本文中设计的InputFormat比较简单,对于商用还远远不够,需要进一步改进。