新建WordCountMapper.java
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
Text txt=new Text();
IntWritable intWritable=new IntWritable();
/**
*
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*
* key:位置信息 value:hello jdk hello java
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
System.out.println("WordCountMapper key:"+key+" value:"+value);
String[] words = value.toString().split(" ");
for (String word:
words) {
txt.set(word);
intWritable.set(1);
context.write(txt,intWritable);
}
}
}
新建WordCountReduce.java
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WordCountReduce extends Reducer<Text, IntWritable,Text, LongWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count=0;
for (IntWritable intWritable:
values) {
count+=intWritable.get();
}
LongWritable longWritable=new LongWritable(count);
System.out.println("WordCountReduce key:"+key+" value:"+longWritable.get());
context.write(key,longWritable);
}
}
新建WordCountDriver.java,使用本地进行测试
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(WordCountDriver.class);
// 配置当前job 执行的mapper类
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 配置当前job 执行的reduce类
job.setReducerClass(WordCountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 指定读取文件夹的路径
Path pathin = new Path("G:\\hadoopstu\\in\\demo1");
FileInputFormat.setInputPaths(job,pathin);
// 指定执行任务完成后的输出路径
Path pathout = new Path("G:\\hadoopstu\\in\\out1");
FileSystem fileSystem = FileSystem.get(pathout.toUri(),conf);
if(fileSystem.exists(pathout)){
fileSystem.delete(pathout,true);
}
FileOutputFormat.setOutputPath(job,pathout);
job.waitForCompletion(true);
}
}
将需要统计字符的文本文件放到项目的./in/demo1文件夹下
进行本地测试
将要统计字符的文本文件上传到HDFS
[root@mihaoyu151 shelldemo]# ll
total 8
-rw-r--r--. 1 root root 2918 Oct 27 2021 hello2.txt
-rw-r--r--. 1 root root 57 Oct 27 2021 hello.txt
[root@mihaoyu151 shelldemo]# hdfs dfs -put ./* /input
21/10/27 15:06:12 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
[root@mihaoyu151 shelldemo]#
上传到HDFS,使用args参数
args[0]=hdfs://mihaoyu151:9000/input
args[1]=hdfs://mihaoyu151:9000/output
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WordCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(WordCountDriver.class);
// 配置当前job 执行的mapper类
job.setMapperClass(WordCountMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 配置当前job 执行的reduce类
job.setReducerClass(WordCountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 指定读取文件夹的路径
// Path pathin=new Path("hdfs://mihaoyu151:9000/input");
Path pathin=new Path(args[0]);
FileInputFormat.setInputPaths(job,pathin);
// 指定执行任务完成后的输出路径
// Path pathout=new Path("hdfs://mihaoyu151:9000/output");
Path pathout=new Path(args[1]);
FileSystem fileSystem = FileSystem.get(pathout.toUri(),conf);
if(fileSystem.exists(pathout)){
fileSystem.delete(pathout,true);
}
FileOutputFormat.setOutputPath(job,pathout);
job.waitForCompletion(true);
}
}
导入包的时候需要注意是org.apache.hadoop.mapreduce包,而不是org.apache.hadoop.mapred包。
进行测试