注意:此次编程实践,在伪分布式上实践,所以首先需要启动伪分布式的节点
进阶任务代码已上传资源中
start-dfs.sh
目录
一.新建项目
使用xftp导入相关配置文件
使用xftp导入到自己所建项目的src目录下
配置文件 mapred-site.xml yarn-site.xml
hdfs-site.xml core-site.xml jar包 hadoop_jars(在实验eclipse中已创建好)
二.编写代码(包:com.jxxy.mr.test)
(1)WordCount主类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapred.TestMerge.MyMapper;
import org.apache.hadoop.mapred.TestMiniMRClientCluster.MyReducer;
//import org.apache.hadoop.mapred.WordCount;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static void main(String[] args) {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
try{
//创建一个新作业
Job job=Job.getInstance(conf);
job.setJarByClass(WordCount.class); //jar包
job.setJobName("myjob");
//job.setInputPath(new Path());
//job.setOutputPath(new Path());
Path inPath=new Path("/user/root/hjq20192697.txt");
FileInputFormat.addInputPath(job,inPath);
//org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
Path outPath=new Path("/output/wordcount");
//如果输出路径存在,则先删除
if(outPath.getFileSystem(conf).exists(outPath))
outPath.getFileSystem(conf).delete(outPath,true);
FileOutputFormat.setOutputPath(job,outPath);
//org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
//创建MyMapper,MyReducer两个类
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setReducerClass(MyReducer.class);
//提交作业
job.waitForCompletion(true);
}
catch(Exception e){
}
}
}
(2) MyMapper类
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MyMapper extends Mapper<Object,Text,Text,IntWritable> {
private final static IntWritable one=new IntWritable(1);
private Text word=new Text();
public void map(Object key,Text value,Context context) throws IOException,InterruptedException{
StringTokenizer str=new
StringTokenizer(value.toString());
while(str.hasMoreTokens()){
word.set(str.nextToken());
context.write(word,one);
}
}
}
(3)MyReducer类
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
//迭代计算
private IntWritable result=new IntWritable();
public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{
int sum=0;
for(IntWritable val:values){
sum+=val.get();
}
result.set(sum);
context.write(key,result);
}
}
三.制作jar包
1.右击项目——>Export——>java——>jre file
2.选择 jre 打包后保存的路径
3.使用xftp,将这个 wc.jar 传到 root/hjq 目录下
四.运行程序
start-dfs.sh
start-yarn.sh
#如果启动了这处忽略
hadoop jar 3.jar com.jxxy.mr.test.WordCount