本人是搭建比较新版本的hadoop,版本号是hadoop-2.6.0.
1.模拟假的集群搭建环境
在linux环境上安装ssh,
安装jdk,配置jdk环境变量。
配置hadoop参数
配置mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value>
</property>
</configuration>
配置core-site.xml
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop-2.6.0/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<!-- file system properties -->
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
配置完以后基本就算可以了
启动hadoop
进入sbin 执行命令 ./start-all.sh
完成后 用jps指令查看对应的应用是否正常启动。
2.hadoop进阶(使用eclipse在linux环境开发)
贴下下面的代码 简单实现一个map-reduce功能
package mapreduce;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/***
* 定义一个AvgScore 求学生的平均值 要实现一个Tool 工具类,是为了初始化一个hadoop配置实例
*/
public class AvgScore implements Tool{
public static final Logger log=LoggerFactory.getLogger(AvgScore.class);
Configuration configuration;
// 是版本 0.20.2的实现
public static class MyMap extends Mapper<Object, Text, Text, IntWritable>{
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String stuInfo = value.toString();//将输入的纯文本的数据转换成String
System.out.println("studentInfo:"+stuInfo);
log.info("MapSudentInfo:"+stuInfo);
//将输入的数据先按行进行分割
StringTokenizer tokenizerArticle = new StringTokenizer(stuInfo, "\n");
//分别对每一行进行处理
while(tokenizerArticle.hasMoreTokens()){
// 每行按空格划分
StringTokenizer tokenizer = new StringTokenizer(tokenizerArticle.nextToken());
String name = tokenizer.nextToken();//学生姓名
String score = tokenizer.nextToken();//学生成绩
Text stu = new Text(name);
int intscore = Integer.parseInt(score);
log.info("MapStu:"+stu.toString()+" "+intscore);
context.write(stu,new IntWritable(intscore));//输出学生姓名和成绩
}
}
}
public static class MyReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
int sum=0;
int count=0;
Iterator<IntWritable> iterator= values.iterator();
while(iterator.hasNext()){
sum+=iterator.next().get();//计算总分
count++;//统计总科目
}
int avg= (int)sum/count;
context.write(key,new IntWritable(avg));//输出学生姓名和平均值
}
}
public int run(String [] args) throws Exception{
Job job = new Job(getConf());
job.setJarByClass(AvgScore.class);
job.setJobName("avgscore");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(MyMap.class);
job.setCombinerClass(MyReduce.class);
job.setReducerClass(MyReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));//设置输入文件路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));//设置输出文件路径
boolean success= job.waitForCompletion(true);
return success ? 0 : 1;
}
public static void main(String[] args) throws Exception {
//在eclipse 工具上配置输入和输出参数
int ret = ToolRunner.run(new AvgScore(), args);
System.exit(ret);
}
@Override
public Configuration getConf() {
return configuration;
}
@Override
public void setConf(Configuration conf) {
conf = new Configuration();
configuration=conf;
}
}