mapreduce实例

最新推荐文章于 2022-05-29 22:14:24 发布

尚琬凝霏

最新推荐文章于 2022-05-29 22:14:24 发布

阅读量568

点赞数 1

分类专栏： hadoop

本文链接：https://blog.csdn.net/qq_32632603/article/details/76350758

版权

hadoop 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

1.统计单词个数

（1）统计样本：word.txt（hadoop fs -put word.txt /word）

number linux hadoop word count
linux number hadoop word count
linux hadoop eclipse word count
linux hadoop word java count
eclipse
java map reduce

(2)WordMap.java

package com.test.org;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

//例:number linux hadoop word count

public class WordMap extends Mapper<Object, Text, Text, IntWritable>{

//每次传入一行数据，输入类型为object,text-->0，number linux hadoop word count

@Override

protected void map(Object key, Text value,Context context)

throws IOException, InterruptedException {

//以空格“ ”切分字符，{number，linux，hadoop，word，count}==>lines

String[] lines=value.toString().split(" ");

for (String word : lines) {

context.write(new Text(word), new IntWritable(1));

/*输出类型为text，intwritable

*(number,1)

*(linux,1)

*(hadoop,1)

*(word,1)

*count,1)

}

(2)WordReduce.java

package com.test.org;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

//<linux,{1,1,1,1,}>

public class WordReduce extends Reducer<Text, IntWritable, Text,IntWritable>{

@Override

protected void reduce(Text key, Iterable<IntWritable> values,

Context context) throws IOException, InterruptedException {

int sum=0; //计数器

for (IntWritable count : values) {

sum+=count.get();

}

context.write(key, new IntWritable(sum)); //<linux,4>

}

(3)WordCount.java

package com.test.org;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount {

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{

//if语句判断输入输出路径是否正确

if(args.length !=2||args ==null){

System.out.println("input error");

System.exit(0);

}

Configuration conf=new Configuration();

@SuppressWarnings("deprecation")

Job job=new Job(conf, "WordCount");

//设置主类及mapper、reducer的业务实现类

job.setJarByClass(WordCount.class);

job.setMapperClass(WordMap.class);

job.setReducerClass(WordReduce.class);

//设置输入输出路径

FileInputFormat.setInputPaths(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));

//设置输出的key与value的类型

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

//判断任务是否完成

job.waitForCompletion(true);

}

2.以“序号单词”输出

package com.test.word;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SingleNum {

public static class NumMap extends Mapper<Object, Text , Text, IntWritable>{

@Override

protected void map(Object key, Text value, Context context)

throws IOException, InterruptedException {

String[] lines=value.toString().split(" ");

for (String word : lines) {

//不需要计数，故不设置value值

context.write(new Text(word),new IntWritable());

}

//设置输出的<key，value>的数据类型为intwritable,text

public static class NumReduce extends Reducer<Text, IntWritable, IntWritable,Text>{

int number=1;//序号值

@Override

protected void reduce(Text key, Iterable<IntWritable> values,

Context context ) throws IOException, InterruptedException {

context.write(new IntWritable(number), key);

number++;

}

public static void main(String []args) throws IOException, ClassNotFoundException, InterruptedException{

if (args.length!=2||args==null) {

System.out.println("error!");

System.exit(0);

}

Configuration configuration=new Configuration();

@SuppressWarnings("deprecation")

Job job=new ~~Job~~(configuration,"SingleNum");

job.setJarByClass(SingleNum.class);

job.setMapperClass(NumMap.class);

job.setReducerClass(NumReduce.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));

//因为mapper输出的数据类型与reduce的数据类型不同，故需要设置mapper的key与value值

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setOutputKeyClass(IntWritable.class);

job.setOutputValueClass(Text.class);

job.waitForCompletion(true);

}

3.计算平均分

（1）样本：grade.txt

小红 89
小明 78
小明 90
小红 79
小明 98
小明 84
灵儿 86
灵儿 78
灵儿 96

（2）AvgGrade.java

package com.test.word;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class AvgGrade {

public static class AvgMap extends Mapper<Object, Text, Text, IntWritable>{

@Override

protected void map(Object key, Text value, Context context)

throws IOException, InterruptedException {

//按Tab键切分数据

String[] lines=value.toString().split("\t");

//lines[0]代表姓名，lines[1]代表各科成绩

int grade=Integer.parseInt(lines[1]);

context.write(new Text(lines[0]),new IntWritable(grade));

}

//输出的数据类型为text，doublewritable

public static class AvgReduce extends Reducer<Text, IntWritable, Text, DoubleWritable>{

@Override

protected void reduce(Text key, Iterable<IntWritable> values,

Context context) throws IOException, InterruptedException {

double sum=0;//计算总成绩

int count=0;//计数器

for (IntWritable word : values) {

sum+=word.get();

count++;

}

context.write(key, new DoubleWritable(sum/count));

}

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException{

if(args.length!=2||args==null){

System.out.println("error");

System.exit(0);

}

Configuration configuration=new Configuration();

@SuppressWarnings("deprecation")

Job job=new ~~Job~~(configuration, "AvgGrade");

job.setJarByClass(AvgGrade.class);

job.setMapperClass(AvgMap.class);

job.setReducerClass(AvgReduce.class);

FileInputFormat.setInputPaths(job,new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(IntWritable.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(DoubleWritable.class);

job.waitForCompletion(true);

}

尚琬凝霏

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录