hadoop-java——MapReduce编程框架的设计

最新推荐文章于 2024-04-22 22:11:27 发布

pat_datamine

最新推荐文章于 2024-04-22 22:11:27 发布

阅读量584

点赞数

分类专栏： MapReduce编程(java)

本文链接：https://blog.csdn.net/pat_datamine/article/details/42784685

版权

MapReduce编程(java) 专栏收录该内容

4 篇文章

订阅专栏

最近一个朋友问我，能不能不搭建hadoop就可以练习MapReduce编程呢？下面就是我用java设计的一个MapReduce编程练习框架，它虽然不是分布式计算，但模拟MapReduce处理过程，你可以在这个框架上编写mapper函数和reducer函数，编写的格式与在hadoop上编写的要求相同。。。编写这个框架的作用在于：练习与测试（不需要有安装hadoop就可以练习MapReduce编程，可以方便练习分布式编程，以及在编写MapReduce算法中，可以利用该框架对所设计的算法进行快速的测试）

第一个类：<span style="font-family: Arial, Helvetica, sans-serif;">MapperReduce</span>


<pre name="code" class="java">import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;


public class MapperReduce {
	public MapperReduce(){//文件初始化，如果存在则删除
		File file=new File("F:","map_result.txt");
		if(file.exists()){
			file.delete();
		}
		File file1=new File("F:","output.txt");
		if(file1.exists()){
			file1.delete();
		}
	}
	
	public Map<String,List<String>> shuff() throws IOException{//shuffle过程，将mapper处理结果相同key的记录合并
		BufferedReader br=new BufferedReader(new FileReader("F:\\map_result.txt"));
        String line="";
        Map<String, List<String>> map=new HashMap<String, List<String>>();
        while((line=br.readLine())!=null){
        	String[] content=line.split(" ",2);
        	if(map.containsKey(content[0])){
        		List<String> list=new ArrayList<String>();
        		list=map.get(content[0]);
        		if(content.length==2){
        		    list.add(content[1]);
        		}else{
        			list.add(" ");
        		}
        		map.remove(content[0]);
        		map.put(content[0],list);
        	}else{
        			List<String> list=new ArrayList<String>();
            		if(content.length==2){
        		        list.add(content[1]);
            		}else{
            			list.add(" ");
            		}
        		    map.put(content[0],list);
        	}
        }
        map.remove("");//把key为空格的行删除
		return map;
	}
	
	public void IterMapper() throws IOException{
		//指定input数据文本路径，循环地按行输入，每一行中分key和value，其中区分标志为第一个空格,按行遍历mapper
		BufferedReader br=new BufferedReader(new FileReader("F:\\input.txt"));
        String line="";
        int count=1;
        while((line=br.readLine())!=null){
        	if(!line.trim().equals("")){
        		Mapper(String.valueOf(count),line);
        	} 
        	count +=1;
        }
	}
	
	public void IterReducer() throws IOException{
		//shuffle过程的生成的数据是一个map数据类型记录，按每条记录遍历reducer
		Map<String,List<String>> map=shuff();
		Iterator<String> Iter=map.keySet().iterator();
    	while(Iter.hasNext()){
    		String tmp=Iter.next();
    		Iterator<String> IterList=map.get(tmp).iterator();
    		Reducer(tmp,IterList);
    	}
		
	}
	
	public void WriteMaper(String new_key,String new_value) throws IOException{
		//指定mapper输出的文本路径，然后写入一行  new_key 与 new_value 用空格空开
		FileWriter fw=new FileWriter("F:\\map_result.txt",true); 
		String line=new_key+" "+new_value+"\r\n";
		fw.write(line);
		fw.close();
	}
	
	public void WriteReducer(String new_key,String new_value) throws IOException{
		//指定reducer输出的文件路径，然后写入一行  new_key 与 new_value 用空格空开
		FileWriter fw=new FileWriter("F:\\output.txt",true); 
		String line=new_key+" "+new_value+"\r\n";
		fw.write(line);
		fw.close();
	}
	
//map函数开始
	public void Mapper(String key,String value) throws IOException{

	}
//map函数结束
	
//reduce函数开始	
	public void Reducer(String key,Iterator<String> value) throws IOException{

	}
//reduce函数结束		
}

第二个类：<span style="font-family: Arial, Helvetica, sans-serif;">MapReduce</span>

<pre name="code" class="java">import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;


public class MapReduce extends MapperReduce{

	public MapReduce(){
		super();
	}
	
	//map函数开始
		public void Mapper(String key,String value) throws IOException{
			//注意：当input文本传过来的数据中行为 （      str），传给Mapper函数参数则key有可能为空，即(key="",value="  str")
			String[] count=value.split(" ");
			for(int i=0;i<count.length;i++){
				 if(!count[i].trim().equals("")){
			           WriteMaper(count[i].trim(),String.valueOf(1));
			     }
			}
		}
	//map函数结束
		
	//reduce函数开始	
		public void Reducer(String key,Iterator<String> value) throws IOException{
			String new_key = key;
			int count=0;
			while(value.hasNext()){
				value.next();
				count +=1;
			}
			WriteReducer(new_key,String.valueOf(count));
		}
	//reduce函数结束
	public static void main(String[] args) throws IOException {
		MapReduce a=new MapReduce();
		a.IterMapper();
		a.IterReducer();
	}
}

在第二个类中编写 Mapper和 Reducer 函数就可以了(上面这个例子我编写了WordCount函数).，其中输入文件放在F：input.txt，输出文件放在F：output.txt

本例子中的一些知识点总结如下：

1、文件的删除：
File file=new File("F:","map_result.txt");
if(file.exists()){
file.delete();
}

2、按行读取文本方法：
BufferedReader br=new BufferedReader(new
FileReader("F:\\map_result.txt"));

String line="";
while((line=br.readLine())!=null){}

3、建立可扩展数组方法：List<String> list=new ArrayList<String>()

;list.add(str);

4、对一个字符串按某个字符串划分的方法：String[] content=line.split(" ",n); 其中n表示划分n-1次，由左至右前n-1个

5、判断某个字符串是否只有空格键的方法：!str.trim().equals("") 其中trim是把去除str两边的空格

6、在文本中输出换行符的方法：
FileWriter file=new FileWriter("F:\\output.txt",true);
String line=str+"\r\n";
file.write(line)