Hadoop MapReduce WordCount程序编写


package rock.lee.wordcount;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MyWordCount {

/**
* @author Rock Lee
*
* @Description
* LongWritable,输入
* key类型 Text,
* 输入value类型
* Text, 输出key类型
* IntWritable,输出vlaue类型
*/
static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final IntWritable ONE = new IntWritable(1);

@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {

//读取每行的数据
String lineValue = value.toString();
//对每行数据进行分割\r\n\t
StringTokenizer stzer = new StringTokenizer(lineValue);
Text text = new Text();
while (stzer.hasMoreTokens()) {
//获取分割后的每个值
String val = stzer.nextToken();
//key值
text.set(val);
//key-->value
context.write(text, ONE);
}
}
}

/**
*
* @author Rock Lee
*
* @Description
*/
static class MyReduce extends Reducer<Text, IntWritable, Text, IntWritable> {

@Override
protected void reduce(Text key, Iterable<IntWritable> values,Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum+= val.get();
}
context.write(key, new IntWritable(sum));
}
}

public static void main(String[] args) throws Exception {
//获取配置信息
Configuration configuration = new Configuration();

//创建任务,设置名称
Job job = new Job(configuration,"WC");

//设置任务运行类
job.setJarByClass(MyWordCount.class);

//设置Mapper和Reducer类
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReduce.class);

//设置输入/输出路径
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

//设置输出结果key/value类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

//提交任务,等待运行结果,并在客户端显示运行信息
boolean success = job.waitForCompletion(true);
System.exit(success?0:1);

}
}



运行wc.jar

[code="command"]
# hadoop jar wc.jar /opt/wc/input/ /opt/wc/output
Warning: $HADOOP_HOME is deprecated.

15/06/11 04:29:10 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
15/06/11 04:29:10 INFO input.FileInputFormat: Total input paths to process : 2
15/06/11 04:29:10 INFO util.NativeCodeLoader: Loaded the native-hadoop library
15/06/11 04:29:10 WARN snappy.LoadSnappy: Snappy native library not loaded
15/06/11 04:29:10 INFO mapred.JobClient: Running job: job_201506110402_0006
15/06/11 04:29:11 INFO mapred.JobClient: map 0% reduce 0%
15/06/11 04:29:32 INFO mapred.JobClient: map 50% reduce 0%
15/06/11 04:29:42 INFO mapred.JobClient: map 100% reduce 0%
15/06/11 04:30:05 INFO mapred.JobClient: map 100% reduce 100%
15/06/11 04:30:05 INFO mapred.JobClient: Job complete: job_201506110402_0006
15/06/11 04:30:05 INFO mapred.JobClient: Counters: 29
15/06/11 04:30:05 INFO mapred.JobClient: Job Counters
15/06/11 04:30:05 INFO mapred.JobClient: Launched reduce tasks=1
15/06/11 04:30:05 INFO mapred.JobClient: SLOTS_MILLIS_MAPS=40074
15/06/11 04:30:05 INFO mapred.JobClient: Total time spent by all reduces waiting after reserving slots (ms)=0
15/06/11 04:30:05 INFO mapred.JobClient: Total time spent by all maps waiting after reserving slots (ms)=0
15/06/11 04:30:05 INFO mapred.JobClient: Launched map tasks=2
15/06/11 04:30:05 INFO mapred.JobClient: Data-local map tasks=2
15/06/11 04:30:05 INFO mapred.JobClient: SLOTS_MILLIS_REDUCES=21707
15/06/11 04:30:05 INFO mapred.JobClient: File Output Format Counters
15/06/11 04:30:05 INFO mapred.JobClient: Bytes Written=30
15/06/11 04:30:05 INFO mapred.JobClient: FileSystemCounters
15/06/11 04:30:05 INFO mapred.JobClient: FILE_BYTES_READ=96
15/06/11 04:30:05 INFO mapred.JobClient: HDFS_BYTES_READ=260
15/06/11 04:30:05 INFO mapred.JobClient: FILE_BYTES_WRITTEN=160215
15/06/11 04:30:05 INFO mapred.JobClient: HDFS_BYTES_WRITTEN=30
15/06/11 04:30:05 INFO mapred.JobClient: File Input Format Counters
15/06/11 04:30:05 INFO mapred.JobClient: Bytes Read=44
15/06/11 04:30:05 INFO mapred.JobClient: Map-Reduce Framework
15/06/11 04:30:05 INFO mapred.JobClient: Map output materialized bytes=102
15/06/11 04:30:05 INFO mapred.JobClient: Map input records=4
15/06/11 04:30:05 INFO mapred.JobClient: Reduce shuffle bytes=102
15/06/11 04:30:05 INFO mapred.JobClient: Spilled Records=16
15/06/11 04:30:05 INFO mapred.JobClient: Map output bytes=74
15/06/11 04:30:05 INFO mapred.JobClient: CPU time spent (ms)=820
15/06/11 04:30:05 INFO mapred.JobClient: Total committed heap usage (bytes)=413466624
15/06/11 04:30:05 INFO mapred.JobClient: Combine input records=0
15/06/11 04:30:05 INFO mapred.JobClient: SPLIT_RAW_BYTES=216
15/06/11 04:30:05 INFO mapred.JobClient: Reduce input records=8
15/06/11 04:30:05 INFO mapred.JobClient: Reduce input groups=4
15/06/11 04:30:05 INFO mapred.JobClient: Combine output records=0
15/06/11 04:30:05 INFO mapred.JobClient: Physical memory (bytes) snapshot=313032704
15/06/11 04:30:05 INFO mapred.JobClient: Reduce output records=4
15/06/11 04:30:05 INFO mapred.JobClient: Virtual memory (bytes) snapshot=1127878656
15/06/11 04:30:05 INFO mapred.JobClient: Map output records=8

[/code]

转自:[url]http://mvplee.iteye.com/blog/2218989[/url]
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值