先配置pom文件
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.czxy</groupId> <artifactId>sadas</artifactId> <version>1.0-SNAPSHOT</version> <repositories> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url> </repository> </repositories> <dependencies> <dependency> <groupId>org.apache.Hadoop</groupId> <artifactId>Hadoop-client</artifactId> <version>2.6.0-mr1-cdh5.14.0</version> </dependency> <dependency> <groupId>org.apache.Hadoop</groupId> <artifactId>Hadoop-common</artifactId> <version>2.6.0-cdh5.14.0</version> </dependency> <dependency> <groupId>org.apache.Hadoop</groupId> <artifactId>Hadoop-hdfs</artifactId> <version>2.6.0-cdh5.14.0</version> </dependency> <dependency> <groupId>org.apache.Hadoop</groupId> <artifactId>Hadoop-mapreduce-client-core</artifactId> <version>2.6.0-cdh5.14.0</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> <scope>test</scope> </dependency> <dependency> <groupId>org.testng</groupId> <artifactId>testng</artifactId> <version>RELEASE</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.0</version> <configuration> <source>1.8</source> <target>1.8</target> <encoding>UTF-8</encoding> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>2.4.3</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <minimizeJar>true</minimizeJar> </configuration> </execution> </executions> </plugin> </plugins> </build> </project>
编写三个java代码
WordCountMap
package com.czxy; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class WordCountMap extends Mapper<LongWritable,Text,Text,LongWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //1. 将value 从test转为string String datas = value.toString(); //2. 切分数据 String[] s = datas.split(","); //3. 遍历输出 for (String data : s) { //输出 context.write(new Text(data),new LongWritable(1)); } } }
WordCountReduce
package com.czxy; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class WordCountReduce extends Reducer<Text, LongWritable,Text,LongWritable> { @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0; for (LongWritable value : values) { sum += value.get(); } context.write(key,new LongWritable(sum)); } }
WordCountDriver
package com.czxy; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class WordCountDriver extends Configured implements Tool { //执行job public static void main(String[] args) throws Exception { ToolRunner.run(new WordCountDriver(), args); } @Override public int run(String[] args) throws Exception { //将已经编写好的map reduce 添加到计算框架 //1. 实例一个 job Job job = Job.getInstance(new Configuration(), "WordCount34"); //2. 使用 job 设置读取数据 (数据类型 ,数据路径) job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path("C:\\a.txt")); //3. 使用 job 设置map 类(map输入的类型) job.setMapperClass(WordCountMap.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); //4. 使用 job 设置reduce 类 (输入的类型) job.setReducerClass(WordCountReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //5. 使用 job 设置数据输出的路径 job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path("C:\\BBB2")); //6.返回代码执行状态编号 return job.waitForCompletion(true) ? 0 : 1; } }