一:安装eclipse插件
要想在eclipse上进行map-reduce编程,需要安装一个插件,hadoop-eclipse-plugin-1.0.0.jar,可以在这里下载
将插件拷贝到eclipse的plugins目录里即可。打开Eclipse,windows–preferences选择Hadoop Map/Reduce填写hadoop的安装目录
Eclipse所在的机器可以是hadoop集群中的任意一个节点
显示map-reduce视图:windows–show views–others 选择map-reduce,此时我们可以在Eclipse输出栏看到map-reduce视图
右键选择new hadoop location
填入配置文件中的主机名和端口号,到此就配置完了,可以在Eclipse上看到hdfs文件了,用Eclipse可以方便的上传删除查看文件等等
如果你安装插件出现问题,那么最可能的原因就是hadoop安装目录没有填写正确
二:编写Map-Reduce程序
新建map-reduce工程,src目录里新建class文件,工程名和类名一样
实现一个倒排索引
输入:
输出:
思路:
代码:
/**
* map_reduce code
* @author wyp
*
*/
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Test_Phonenum extends Configured implements Tool{
enum Counter{
LINESKIP,
}
public static class Map extends Mapper<LongWritable, Text, Text, Text>
{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String line = value.toString();
try
{
String [] lineSplit = line.split(" ");
String anum = lineSplit[0];
String bnum = lineSplit[1];
context.write(new Text(bnum), new Text(anum));
}
catch(java.lang.ArrayIndexOutOfBoundsException e)
{
context.getCounter(Counter.LINESKIP).increment(1);
return;
}
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text>
{
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
{
String valueString;
String out = "";
for(Text value:values)
{
valueString = value.toString();
out += valueString + "|";
}
context.write(key, new Text(out));
}
}
public int run(String[] args) throws Exception
{
Configuration conf = getConf();
Job job = new Job(conf, "WYP_Test_2");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}
public static void main(String[] args) throws Exception{
int res = ToolRunner.run(new Configuration(), new Test_Phonenum(), args);
System.exit(res);
}
}