3. 编写Reducer类
AccessReducer
类负责对相同手机号的流量进行汇总:
package com.example.phonetraffic; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class AccessReducer extends Reducer<Text, Access, Text, Access> { @Override protected void reduce(Text key, Iterable<Access> values, Context context) throws IOException, InterruptedException { long upSum = 0; long downSum = 0; for (Access access : values) { upSum += access.getUp(); downSum += access.getDown(); } Access result = new Access(); result.setPhone(key.toString()); result.setUp(upSum); result.setDown(downSum); result.setSum(upSum + downSum); context.write(key, result); } }
4. 编写Partitioner类
AccessPartitioner
类负责将不同前缀的手机号分配到不同的分区:
package com.example.phonetraffic; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Partitioner; public class AccessPartitioner extends Partitioner<Text, Access> { @Override public int getPartition(Text key, Access value, int numPartitions) { if (key.toString().startsWith("13")) { return 0; } else if (key.toString().startsWith("15")) { return 1; } else { return 2; } } }
5. 编写Driver类
AccessDriver
类负责配置整个作业并提交:
package com.example.phonetraffic; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class AccessDriver { public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: AccessDriver <input path> <output path>"); System.exit(-1); } Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Phone Traffic"); job.setJarByClass(AccessDriver.class); job.setMapperClass(AccessMapper.class); job.setReducerClass(AccessReducer.class); job.setPartitionerClass(AccessPartitioner.class); job.setNumReduceTasks(3); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Access.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Access.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
打包与运行
1. 配置pom.xml
确保你的pom.xml
中包含所需的依赖:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.example</groupId> <artifactId>PhoneTraffic</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <!-- Hadoop dependencies --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>3.1.3</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>3.1.3</version> </dependency> </dependencies> </project>
2. 打包
在项目根目录下运行以下命令打包:
mvn clean package
3. 上传数据
将access.log
上传到HDFS:
hadoop fs -mkdir -p /user/atguigu/input hadoop fs -put /path/to/access.log /user/atguigu/input/
4. 运行作业
执行以下命令运行MapReduce作业:
hadoop jar /home/atguigu/IdeaProjects/dianhua/target/PhoneTraffic-1.0-SNAPSHOT.jar com.example.phonetraffic.AccessDriver /user/atguigu/input /user/atguigu/output
5. 下载结果
将HDFS上的结果下载到本地:
mkdir -p /home/atguigu/data/phone hadoop fs -get /user/atguigu/output/* /home/atguigu/data/phone/