SequenceFile写操作
1、通过createWrite创建SequenceFile对象,返回Write实例,指定待写入的数据流如FSDataOutputStream或FileSystem对象和Path对象。还需指定Configuration对象和键值类型(都需要能序列化)。
SequenceFile.Writer writer = SequenceFile.createWriter(conf,
Writer.file(seqFile), Writer.keyClass(Text.class),
Writer.valueClass(Text.class),
Writer.compression(CompressionType.NONE));
2、SequenceFile通过API来完成新记录的添加操作
// 通过writer向文档中写入记录
3、关闭writer流
IOUtils.closeStream(writer);// 关闭write流
SequenceFile读操作
1、创建Reader对象
SequenceFile.Reader reader = new SequenceFile.Reader(conf,Reader.file(seqFile));
2、读出
Text key = new Text();
Text value = new Text();
while (reader.next(key, value)) {
System.out.println(key);
System.out.println(value);
}
3、关闭reader流
IOUtils.closeStream(reader);
实例一:写入key value并读出
package hdfs;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.Text;
public class TestSequenceFile {
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
Path seqFile = new Path("hdfs://master:9000/input/input1.txt");
// Writer内部类用于文件的写操作,假设Key和Value都为Text类型
SequenceFile.Writer writer = SequenceFile.createWriter(conf,
Writer.file(seqFile), Writer.keyClass(Text.class),
Writer.valueClass(Text.class),
Writer.compression(CompressionType.NONE));
// 通过writer向文档中写入记录
writer.append(new Text("lala"),new Text("value"));
IOUtils.closeStream(writer);// 关闭write流
// 通过reader从文档中读取记录
SequenceFile.Reader reader = new SequenceFile.Reader(conf,Reader.file(seqFile));
Text key = new Text();
Text value = new Text();
while (reader.next(key, value)) {
System.out.println(key);
System.out.println(value);
}
IOUtils.closeStream(reader);// 关闭read流
}
}
实例二:多次写入并读出(附带压缩和解压过程)
package hdfs;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
public class SequenceFileWriter {
private static final String [] DATA={
"One,two,buckle my shoe",
"Three,four,shut the door",
"Five,six,pick up sticks",
"Seven,eight,lay them straight",
"Nine,ten,a big fat hen"
};
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
String uri="hdfs://master:9000/input/aaaa";
Configuration conf=new Configuration();
FileSystem fs=FileSystem.get(URI.create(uri),conf);
Path path=new Path(uri);
IntWritable key=new IntWritable();
Text value=new Text();
SequenceFile.Writer writer=null;
try{
writer=SequenceFile.createWriter(fs,conf,path,key.getClass(),value.getClass());
for(int i=0;i<100;i++){
key.set(100-i);
value.set(DATA[i%DATA.length]);
System.out.printf("[%s]\t%s\t%s\n",writer.getLength(),key,value);
writer.append(key, value);
}
}finally{
IOUtils.closeStream(writer);
}
}
}
基于HDFS文件一次写入多次读出的特点,追加貌似难以实现,一般实现为覆盖操作。