今天学着配了一下hadoop 的mapreduce 在这里整理一下学习笔记。 大神勿喷orz
1、设置JobTrack
我设置在我的第一台虚拟机上
设置 conf/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>192.168.121.128:9001</value>
</property>
</configuration>
并且把这个配置文件丢到另外一台上面
2、Eclipse上配置hadoop插件
在这里不建议使用idea 没插件。会显得很麻烦
先把权限设置掉
在hdfs-site.xml 中取消权限校验,即加入以下配置:
<property>
<name>dfs.permissions</name>
<value>false</value>
<description>
</description>
</property>
资源下载链接:免费的。求5分好评QAQ
http://download.csdn.net/detail/haoliang94/9276219
丢到ec里面。。不会丢的可以百度
设置一下该插件
3、开始编码
先写一个字符统计的程序上手
WcMapper
package com.hadoop.mr;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
StringTokenizer st = new StringTokenizer(value.toString());
while(st.hasMoreTokens()){
String word = st.nextToken();
context.write(new Text(word), new IntWritable(1)); //表示map输出
}
}
}
WcReducer
package com.hadoop.mr;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WcReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> iterable, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable i : iterable) {
sum = sum + i.get();
}
context.write(key, new IntWritable(sum));
}
}
JobRun
package com.hadoop.mr;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class JobRun {
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.set("mapred.job.tracker", "192.168.121.128:9001");
try {
Job job = new Job(conf);
job.setJarByClass(JobRun.class);
job.setMapperClass(WcMapper.class);
job.setReducerClass(WcReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
/* job.setNumReduceTasks(1); */ // 设置reduce任务的个数,默认为1
// 设置输入数据所在的目录或者文件
FileInputFormat.addInputPath(job, new Path("/opt/hadoop-1.2/mapred/input"));
FileOutputFormat.setOutputPath(job, new Path("/opt/hadoop-1.2/mapred/out"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (Exception e) {
e.printStackTrace();
}
}
}
当然还要包括一个测试的文件。这个自己加。丢到目录里
把这个项目打成jar包。然后丢到虚拟机里面
使用./hadoop jar /root/wc.jar com.hadoop.mr.JobRun 命令。。就可以运行了