Hadoop定义的SequenceFile和MapFile的编程实现

最新推荐文章于 2020-09-10 16:21:20 发布

赵厚雄

最新推荐文章于 2020-09-10 16:21:20 发布

阅读量328

点赞数

分类专栏：大数据

本文链接：https://blog.csdn.net/nengyu/article/details/83782411

版权

大数据专栏收录该内容

28 篇文章 0 订阅

订阅专栏

Hadoop定义了SequenceFile 和MapFile两种类型的数据结构以适应Mapreduce编程框架的需要，Map输出的中间结果就是由他们表示的。其中MapFile是经过排序并带有索引的SequenceFile.

SequenceFile记录的是key/value对的列表，是序列化后的二进制文件，因此是不能直接查看的，可以通过命令查看内容：

hadoop fs -text myseq.seq

代码实现：

package com.jr.sun.ly;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.Text;
import org.junit.Test;


/**
 * 序列文件格式
 *
 */
public class TestSequenceFile {
/**
 * 写入
 * @throws IOException 
 */
	@Test
	public void write() throws IOException {
		Configuration conf=new Configuration();
		FileSystem fs = FileSystem.get(conf);
		Path name=new Path("/user/hadoop/myseq.seq");
	//	Path name=new Path("g:/elyq/myseq.seq");
		Writer w=SequenceFile.createWriter(fs, conf, name, IntWritable.class, Text.class);
		w.append(new IntWritable(100), new Text("tom"));
		w.append(new IntWritable(100), new Text("toms"));
		w.append(new IntWritable(100), new Text("tomLee"));
		w.close();
		
	}
	/**
     *读取
     */
	@Test
	public void read() throws IOException {
		Configuration conf=new Configuration();
		FileSystem fs = FileSystem.get(conf);
		Path name=new Path("/user/hadoop/myseq.seq");
		IntWritable key=new IntWritable();
		Text value=new Text();
		SequenceFile.Reader reader=new SequenceFile.Reader(fs, name,conf);
		//遍历所有key-value
		while(reader.next(key))
		{
			reader.getCurrentValue(value);
			System.out.println(key.get()+":"+value.toString());
		}
	}
}

MapFile是已经拍过序的SequenceFile,它的使用与SequenceFile类似。

package com.jr.sun.ly;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
import org.junit.Test;

public class TestMapFile {
	/**
	 * 写入
	 * @throws IOException 
	 */
		@Test
		public void write() throws IOException {
			Configuration conf=new Configuration();
			FileSystem fs = FileSystem.get(conf);
			String name="/user/hadoop/mymap";
			IntWritable iw=new IntWritable();
			Text txt=new Text();
			MapFile.Writer w=new MapFile.Writer(conf, fs, name, IntWritable.class, Text.class);
			w.setIndexInterval(256);
			for(int i=1;i<=1000;i++) {
				w.append(new IntWritable(i), new Text("tom"+i));
			}
			w.close();
			
		}
		
		/**
		 * 读
		 * @throws IOException 
		 */
			@Test
			public void getClosestByKey() throws IOException {
				Configuration conf=new Configuration();
				FileSystem fs = FileSystem.get(conf);
				String name="/user/hadoop/mymap";
				IntWritable iw=new IntWritable();
				Text txt=new Text();
				MapFile.Reader reader=new MapFile.Reader(fs, name, conf);
				IntWritable key= (IntWritable)reader.getClosest(new IntWritable(0), txt);
				System.out.println(key);
				
			}
}