一,环境准备
java环境,eclipse,hadoop -2.x(windows环境下)
此处本人所用hadoop包链接:https://pan.baidu.com/s/1230HUG2HluDsP1FT-tXa-g密码:sodt(此处文件已全部替换完毕)
首先从网上下载64位winutils.exe,hadoop.dll将文件复制到hadoop / bin目录下,将lib文件中的本地库替换为windows版本库,新建系统环境变量HADOOP_HOME路径
此时,Hadoop的\ BIN中的文件
LIB中文件
二,本次运行的MapReduce
此处运行例子为简单的单词计数统计程序,代码如下
1.WordcountMapper:
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public WordcountMapper extends Mapper<LongWritable,Text,Text,IntWritable>
{
@Override
protected void map(LongWritable key ,Text,Context context)throws IOException, InterruptedException
{
//拿到一行数据转换为字符串
String line= value.toString ();
//将这一行切分出各个单词
String [] words = line.split(“”);
//遍历数组,输出<word,1>
for(String word:words){
context.write(new Text(word),new IntWritable(1));
}
}
}
2.WordcountReducer
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
puiblic void WordcountReducer extens Reducer<Text,IntWritable,Text,IntWritable>
{
//键单词//值:[1,1] //可迭代<IntWritable>迭代器
@Override
protected void reduce(Text key, Iterable<IntWritable> values,Context context)throws IOException,InterruptedException
{
//定义一个计数器
int count = 0;
//遍历这一组kv的所有v,累加到count中
for(IntWritable value:values){
count + = value.get();
}
context.write(key,new IntWritable(count));
}
}
3. WordcountDriver:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordcountDriver {
public static void main(String[] args) throws Exception {
Configuration conf =new Configuration();
//是否运行为本地模式,就是看这个参数值是否为local,默认就是local
conf.set("mapreduce.framework.name", "local");
conf.set("fs.defaults", "file:///");
Job job=Job.getInstance(conf);
//jar包路径
job.setJarByClass(WordcountDriver.class);
//指定本业务job
job.setMapperClass(WordcountMapper.class);
job.setReducerClass(WordcountReducer.class);