sequencefile中的数据是以key,value对存储的。
通过mapreduce模式,可以读取sequencefile中的数据。
public class MapReduceReadFile {
private static SequenceFile.Reader reader = null;
private static Configuration conf = new Configuration();
public static class ReadFileMapper extends
Mapper<LongWritable, Text, LongWritable, Text> {
/* (non-Javadoc)
* @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context)
*/
@Override
public void map(LongWritable key, Text value, Context context) {
key = (LongWritable) ReflectionUtils.newInstance(
reader.getKeyClass(), conf);
value = (Text) ReflectionUtils.newInstance(
reader.getValueClass(), conf);
try {
while (reader.next(key, value)) {
System.out.printf("%s\t%s\n", key, value);
context.write(key, value);
}
} catch (IOException e1) {
e1.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
/**
* @param args
* @throws IOException
* @throws InterruptedException
* @throws ClassNotFoundException
*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Job job = new Job(conf,"read seq file");
job.setJarByClass(MapReduceReadFile.class);
job.setMapperClass(ReadFileMapper.class);
job.setMapOutputValueClass(Text.class);
Path path = new Path("logfile2");
FileSystem fs = FileSystem.get(conf);
reader = new SequenceFile.Reader(fs, path, conf);
FileInputFormat.addInputPath(job, path);
FileOutputFormat.setOutputPath(job, new Path("result"));
System.exit(job.waitForCompletion(true)?0:1);
}
}