程序如下:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
public class MapperReduce {
public MapperReduce(){//文件初始化,如果存在则删除
File file=new File("F:","map_result.txt");
if(file.exists()){
file.delete();
}
File file1=new File("F:","output.txt");
if(file1.exists()){
file1.delete();
}
}
public Map<String,List<String>> shuff() throws IOException{//shuffle过程,将mapper处理结果相同key的记录合并
BufferedReader br=new BufferedReader(new FileReader("F:\\map_result.txt"));
String line="";
Map<String, List<String>> map=new HashMap<String, List<String>>();
while((line=br.readLine())!=null){
String[] content=line.split(" ",2);
if(map.containsKey(content[0])){
List<String> list=new ArrayList<String>();
list=map.get(content[0]);
if(content.length==2){
list.add(content[1]);
}else{
list.add(" ");
}
map.remove(content[0]);
map.put(content[0],list);
}else{
List<String> list=new ArrayList<String>();
if(content.length==2){
list.add(content[1]);
}else{
list.add(" ");
}
map.put(content[0],list);
}
}
map.remove("");//把key为空格的行删除
Iterator<String> It = map.keySet().iterator();
//while(It.hasNext()){System.out.println(map.get(It.next()));}//测试的时候用
return map;
}
public void IterMapper() throws IOException{
//指定input数据文本路径,循环地按行输入,每一行中分key和value,其中区分标志为第一个空格,按行遍历mapper
BufferedReader br=new BufferedReader(new FileReader("F:\\input.txt"));
String line="";
int count=1;
while((line=br.readLine())!=null){
if(!line.trim().equals("")){
Mapper(String.valueOf(count),line);
}
count +=1;
}
}
public void IterReducer() throws IOException{
//shuffle过程的生成的数据是一个map数据类型记录,按每条记录遍历reducer
Map<String,List<String>> map=shuff();
Iterator<String> Iter=map.keySet().iterator();
while(Iter.hasNext()){
String tmp=Iter.next();
Iterator<String> IterList=map.get(tmp).iterator();
Reducer(tmp,IterList);
}
}
public void WriteMaper(String new_key,String new_value) throws IOException{
//指定mapper输出的文本路径,然后写入一行 new_key 与 new_value 用空格空开
FileWriter fw=new FileWriter("F:\\map_result.txt",true);
String line=new_key+" "+new_value+"\r\n";
fw.write(line);
fw.close();
}
public void WriteReducer(String new_key,String new_value) throws IOException{
//指定reducer输出的文件路径,然后写入一行 new_key 与 new_value 用空格空开
FileWriter fw=new FileWriter("F:\\output.txt",true);
String line=new_key+" "+new_value+"\r\n";
fw.write(line);
fw.close();
}
//map函数开始
public void Mapper(String key,String value) throws IOException{
}
//map函数结束
//reduce函数开始
public void Reducer(String key,Iterator<String> value) throws IOException{
}
//reduce函数结束
}
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
public class MapReduce extends MapperReduce{
public MapReduce(){
super();
}
public void printf(List<String> list1,List<String> list2) throws IOException{
if((!list1.isEmpty()) && (!list2.isEmpty())){
Iterator<String> Iter1=list1.iterator();
while(Iter1.hasNext()){
String tmp1=Iter1.next();
Iterator<String> Iter2=list2.iterator();
while(Iter2.hasNext()){
String tmp2=Iter2.next();
WriteReducer(tmp1,tmp2);
}
}
}
}
//map函数开始
public void Mapper(String key,String value) throws IOException{
//key为行偏移量,value为每一行的值
String[] count=value.split(" ");
if(!count[1].trim().equals("")){
WriteMaper(count[1].trim(),count[0].trim()+" "+"1");
WriteMaper(count[1].trim(),count[0].trim()+" "+"2");
}
}
//map函数结束
//reduce函数开始
public void Reducer(String key,Iterator<String> value) throws IOException{
List<String> list1=new ArrayList<String>();
List<String> list2=new ArrayList<String>();
while(value.hasNext()){
String tmp=value.next();
String[] tmp1=tmp.split(" ");
if(tmp1[1].equals("1")){
list1.add(tmp1[0]);
}else{
list2.add(tmp1[0]);
}
}
printf(list1,list2);
}
//reduce函数结束
public static void main(String[] args) throws IOException {
MapReduce a=new MapReduce();
a.IterMapper();
a.IterReducer();
}
}
本程序实例是:单表自连接,连接字段是 user
输入:
star1 user1
star2 user1
star3 user1
star3 user2
输出:
star1 star1
star1 star2
star1 star3
star2 star1
star2 star2
star2 star3
star3 star1
star3 star2
star3 star3
star3 star3