reduce中的迭代器的坑
1)迭代器中的每一个值和key是一一对应的
2)这个迭代器职能迭代一次
迭代器是指针操作,每次迭代完成,指针就跳到这一组的最后了
3)reduce端 key(一个内存地址)和values(所有的value都用一个内存地址)使用了两个对象
jvm的对象重用
解决方案:
重新创建对象接收即可
Stu s = new Stu(key.getCourse(),key.getName(),key.getAvgscore());
案例2:
package GroupByMapreduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class myGroup {
static class myMapper extends Mapper<LongWritable, Text, Stu, NullWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//获取每一行数据
String[] datas = value.toString().split(",");
//求平均分
int sum=0;
for (int i=2;i<datas.length;i++) {
sum+= Integer.parseInt(datas[i].trim());
}
double avg=sum*1.0/(datas.length-2);
Stu stu = new Stu(datas[0], datas[1], avg);
context.write(stu, NullWritable.get());
}
}
/*
* shuffle
* 先进行排序 按平均分排序
*
* */
/*
* 自定义分组类
* */
static class Group extends WritableComparator{
//重写父类的构造方法
public Group() {
//调用父类的构造, 第二个参数为true 创建map的key对象
super(Stu.class,true);
}
//重写compare
/*
* a=this
* b=o
* a和b都代表map的key
* a b ---Stu ---->WritableComparable 父类
* */
@Override
public int compare(WritableComparable a, WritableComparable b) {
//自定义分组规则
//将父类转换为子类
Stu as=(Stu)a;
Stu bs=(Stu)b;
//比较科目
return as.getCourse().compareTo(bs.getCourse());
}
}
static class myReduce extends Reducer<Stu, NullWritable, Stu, NullWritable>{
/*
* 按照科目分
* 每个科目一组 共四组
* values代表的是每个科目的所有null值
* algorithm huangjiaju 82.28571428571429 null
algorithm liutao 82.0 null
algorithm huanglei 74.42857142857143 null
algorithm huangzitao 72.75 null
algorithm liuyifei 62.142857142857146 null
algorithm huangdatou 56.0 null
传过来的key都是科目相同的不同Stu对象
key是指针操作,对应的是每一个values中的value值
每一个values中的value值都会对应一个key
key的默认指针 指向 每一组的第一个key
*
* */
@Override
protected void reduce(Stu key, Iterable<NullWritable> values,
Context context) throws IOException, InterruptedException {
//System.out.println("-------------------");
int count=0;
for (NullWritable v : values) {
/**
count++;
context.write(key, NullWritable.get());
if(count==2) {
break;
}
*/
System.out.println("1111111111"+key);
}
System.out.println("==========="+"finish");
//第二次迭代
for (NullWritable v : values) {
/**
count++;
context.write(key, NullWritable.get());
if(count==2) {
break;
}
*/
System.out.println("22222222"+key);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job=Job.getInstance(conf);
job.setJarByClass(myGroup.class);
job.setMapperClass(myMapper.class);
job.setReducerClass(myReduce.class);
job.setOutputKeyClass(Stu.class);
job.setOutputValueClass(NullWritable.class);
//指定分组类
job.setGroupingComparatorClass(Group.class);
FileInputFormat.addInputPath(job, new Path("E:\\stu.txt"));
//只要有reduce类,就会有输出路径
//输出路径的作用:存放输出标志文件_success
FileOutputFormat.setOutputPath(job, new Path("e:\\stu_out3"));
boolean waitForCompletion = job.waitForCompletion(true);
System.exit(waitForCompletion?0:1);
}
}
}
结果
1111111111algorithm huangjiaju 82.28571428571429
1111111111algorithm liutao 82.0
1111111111algorithm huanglei 74.42857142857143
1111111111algorithm huangzitao 72.75
1111111111algorithm liuyifei 62.142857142857146
1111111111algorithm huangdatou 56.0
===========finish
1111111111computer huangjiaju 83.2
1111111111computer liutao 83.0
1111111111computer huanglei 74.42857142857143
1111111111computer huangzitao 72.42857142857143
1111111111computer huangxiaoming 72.42857142857143
1111111111computer huangbo 65.25
1111111111computer xuzheng 65.0
1111111111computer liujialing 64.11111111111111
1111111111computer liuyifei 62.142857142857146
1111111111computer huangdatou 56.0
===========finish
1111111111english huanglei 83.0
1111111111english liuyifei 74.42857142857143
1111111111english huangxiaoming 72.42857142857143
1111111111english zhaobenshan 69.28571428571429
1111111111english zhouqi 64.18181818181819
1111111111english liujialing 62.142857142857146
1111111111english liuyifei 59.57142857142857
1111111111english huangdatou 56.0
1111111111english huangbo 55.0
===========finish
1111111111math huangxiaoming 83.0
1111111111math huangjiaju 82.28571428571429
1111111111math huanglei 74.42857142857143
1111111111math liujialing 72.75
1111111111math wangbaoqiang 72.42857142857143
1111111111math xuzheng 69.28571428571429
1111111111math liutao 56.0
===========finish
案例3:
package GroupByMapreduce;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class myGroup {
static class myMapper extends Mapper<LongWritable, Text, Stu, NullWritable>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//获取每一行数据
String[] datas = value.toString().split(",");
//求平均分
int sum=0;
for (int i=2;i<datas.length;i++) {
sum+= Integer.parseInt(datas[i].trim());
}
double avg=sum*1.0/(datas.length-2);
Stu stu = new Stu(datas[0], datas[1], avg);
context.write(stu, NullWritable.get());
}
}
/*
* shuffle
* 先进行排序 按平均分排序
*
* */
/*
* 自定义分组类
* */
static class Group extends WritableComparator{
//重写父类的构造方法
public Group() {
//调用父类的构造, 第二个参数为true 创建map的key对象
super(Stu.class,true);
}
//重写compare
/*
* a=this
* b=o
* a和b都代表map的key
* a b ---Stu ---->WritableComparable 父类
* */
@Override
public int compare(WritableComparable a, WritableComparable b) {
//自定义分组规则
//将父类转换为子类
Stu as=(Stu)a;
Stu bs=(Stu)b;
//比较科目
return as.getCourse().compareTo(bs.getCourse());
}
}
static class myReduce extends Reducer<Stu, NullWritable, Stu, NullWritable>{
/*
* 按照科目分
* 每个科目一组 共四组
* values代表的是每个科目的所有null值
* algorithm huangjiaju 82.28571428571429 null
algorithm liutao 82.0 null
algorithm huanglei 74.42857142857143 null
algorithm huangzitao 72.75 null
algorithm liuyifei 62.142857142857146 null
algorithm huangdatou 56.0 null
传过来的key都是科目相同的不同Stu对象
key是指针操作,对应的是每一个values中的value值
每一个values中的value值都会对应一个key
key的默认指针 指向 每一组的第一个key
*
* */
List<Stu> list=new ArrayList<Stu>();
@Override
protected void reduce(Stu key, Iterable<NullWritable> values,
Context context) throws IOException, InterruptedException {
//System.out.println("-------------------");
int count=0;
for (NullWritable v : values) {
/**
count++;
context.write(key, NullWritable.get());
if(count==2) {
break;
}
*/
Stu s = new Stu(key.getCourse(),key.getName(),key.getAvgscore());
list.add(s);
System.out.println("0000000000"+list);
System.out.println("1111111111"+key);
}
//循环遍历所有的key
for (Stu s : list) {
System.out.println(s);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job=Job.getInstance(conf);
job.setJarByClass(myGroup.class);
job.setMapperClass(myMapper.class);
job.setReducerClass(myReduce.class);
job.setOutputKeyClass(Stu.class);
job.setOutputValueClass(NullWritable.class);
//指定分组类
job.setGroupingComparatorClass(Group.class);
FileInputFormat.addInputPath(job, new Path("E:\\stu.txt"));
//只要有reduce类,就会有输出路径
//输出路径的作用:存放输出标志文件_success
FileOutputFormat.setOutputPath(job, new Path("e:\\stu_out6"));
boolean waitForCompletion = job.waitForCompletion(true);
System.exit(waitForCompletion?0:1);
}
}
}