org.apache.hadoop
hadoop-client
2.8.3
org.apache.hadoop
hadoop-hdfs
2.8.3
2,编写单词统计的Java代码。
主类WordCountMain.java:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCountMain {
public WordCountMain(String[] args) throws Exception {
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration, “word_count”);
job.setJarByClass(WordCountMain.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setMapOutputKeyClass(Text.class);
<