一、java代码
1、主类
package com.baohuajie.test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import java.io.IOException;
public class EventCount {
public static class MyMapper extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text event = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
int idx = value.toString().indexOf(" ");
if (idx > 0) {
String e = value.toString().substring(0, idx);
event.set(e);
context.write(event, one);
}
}
}
public static class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: EventCount <in> <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "event count");
job.setJarByClass(EventCount.class);
job.setMapperClass(MyMapper.class);
job.setCombinerClass(MyReducer.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
2、pom
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>hadoopDemo1</groupId>
<artifactId>hadoopDemo1</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
二、运行
1、在HDFS上创建目录,并把待计算文件放到HDFS系统上
[root@1312e81054b9 hadoop_demo]# hadoop fs -mkdir /application/input
[root@1312e81054b9 hadoop_demo]# hadoop fs -put ./input/test.txt /application/input
2、运行
[root@1312e81054b9 hadoop_demo]# hadoop jar hadoopDemo1-1.0-SNAPSHOT.jar com.baohuajie.test.EventCount /application/input /application/output
19/05/06 03:25:51 INFO client.RMProxy: Connecting to ResourceManager at hadoop2/172.19.0.2:8032
19/05/06 03:25:52 INFO input.FileInputFormat: Total input paths to process : 1
19/05/06 03:25:53 INFO mapreduce.JobSubmitter: number of splits:1
19/05/06 03:25:53 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1557110364565_0001
19/05/06 03:25:54 INFO impl.YarnClientImpl: Submitted application application_1557110364565_0001
19/05/06 03:25:54 INFO mapreduce.Job: The url to track the job: http://hadoop2:8088/proxy/application_1557110364565_0001/
19/05/06 03:25:54 INFO mapreduce.Job: Running job: job_1557110364565_0001
19/05/06 03:26:25 INFO mapreduce.Job: Job job_1557110364565_0001 running in uber mode : false
19/05/06 03:26:25 INFO mapreduce.Job: map 0% reduce 0%
19/05/06 03:27:38 INFO mapreduce.Job: map 100% reduce 0%
19/05/06 03:27:47 INFO mapreduce.Job: map 100% reduce 100%
19/05/06 03:27:50 INFO mapreduce.Job: Job job_1557110364565_0001 completed successfully
19/05/06 03:27:52 INFO mapreduce.Job: Counters: 49
File System Counters
FILE: Number of bytes read=6
FILE: Number of bytes written=237131
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
HDFS: Number of bytes read=175
HDFS: Number of bytes written=0
HDFS: Number of read operations=6
HDFS: Number of large read operations=0
HDFS: Number of write operations=2
Job Counters
Launched map tasks=1
Launched reduce tasks=1
Data-local map tasks=1
Total time spent by all maps in occupied slots (ms)=42957
Total time spent by all reduces in occupied slots (ms)=7073
Total time spent by all map tasks (ms)=42957
Total time spent by all reduce tasks (ms)=7073
Total vcore-milliseconds taken by all map tasks=42957
Total vcore-milliseconds taken by all reduce tasks=7073
Total megabyte-milliseconds taken by all map tasks=43987968
Total megabyte-milliseconds taken by all reduce tasks=7242752
Map-Reduce Framework
Map input records=8
Map output records=0
Map output bytes=0
Map output materialized bytes=6
Input split bytes=106
Combine input records=0
Combine output records=0
Reduce input groups=0
Reduce shuffle bytes=6
Reduce input records=0
Reduce output records=0
Spilled Records=0
Shuffled Maps =1
Failed Shuffles=0
Merged Map outputs=1
GC time elapsed (ms)=195
CPU time spent (ms)=2120
Physical memory (bytes) snapshot=364904448
Virtual memory (bytes) snapshot=3882463232
Total committed heap usage (bytes)=249561088
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=69
File Output Format Counters
Bytes Written=0
[root@1312e81054b9 hadoop_demo]#