MapReduce测试数据计算补全

package xxx.hadoop;


import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;


import xxx.common.ConfigurationHadoop;


/* 构造测试数据:姓名、科目、得分、名次
 * 张4     数学      96
 * 张5     数学      95      5
 * 张1     数学      100     1
 * 张3     数学      98
 * 张2     数学      99      2
 * 需要把缺的名次补全,并按名次顺序输出
 */
public class FillAndSortTest {


public static class FillAndSortMapper extends
Mapper<LongWritable, Text, KeyWritable, ValueWritable> {


private KeyWritable keyWritable;


private ValueWritable valueWritable;


@Override
protected void setup(Context context) throws IOException, InterruptedException {
// InputSplit split = context.getInputSplit();
// String text = split.toString();
super.setup(context);
}


public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
System.out.println(value.toString());
String line = value.toString();
String[] col = line.split("\t");


keyWritable = new KeyWritable(col[1], Integer.parseInt(col[2]));
if (col.length == 3) {
valueWritable = new ValueWritable(col[0], col[1], Integer.parseInt(col[2]), Integer.parseInt("0"));
} else {
valueWritable = new ValueWritable(col[0], col[1], Integer.parseInt(col[2]), Integer.parseInt(col[3]));
}
context.write(keyWritable, valueWritable);
}


@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
super.cleanup(context);
}
}


public static class FillAndSortReducer extends
Reducer<KeyWritable, ValueWritable, NullWritable, Text> {


protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
}


public void reduce(KeyWritable key, Iterable<ValueWritable> values,
Context context) throws IOException, InterruptedException {


Iterator<ValueWritable> it = values.iterator();
// LinkedHashMultimap<Integer, String> noRank = LinkedHashMultimap.create();
// LinkedHashMultimap<Integer, Integer> haveRank = LinkedHashMultimap.create();
HashMap<Integer, String> noRank = new HashMap<Integer, String>();
HashMap<Integer, Integer> haveRank = new HashMap<Integer, Integer>();


while (it.hasNext()) {
ValueWritable value = it.next();
System.out.println(value.toString());
if (value.rank == 0) {
noRank.put(value.getScore(), value.getName() + "\t" + value.getSubject());
} else {
haveRank.put(value.getScore(), value.getRank());
}
}


Set<Integer> scores = noRank.keySet();
Set<Integer> scores2 = haveRank.keySet();


// 处理逻辑不严谨,仅为展示处理过程
for (Integer score : scores) {
Integer minScore = 0;
Integer newRank = 0;
for (Integer score2 : scores2) {
if (minScore == 0) {
minScore = Math.abs(score2 - score);
newRank = haveRank.get(score2);
newRank += score > score2 ? -1 : 1; 
} else if (minScore > Math.abs(score2 - score)) {
minScore = Math.abs(score2 - score);
newRank = haveRank.get(score2);
newRank += score > score2 ? -1 : 1;
}
}
System.out.println(noRank.get(score) + "\t" + score + "\t" + newRank);
// 省略排序输出
}
}


@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {

super.cleanup(context);
}
}


public static void main(String[] args) throws Exception {
Configuration conf = ConfigurationHadoop.getConfigurationHadoop();


String[] otherArgs = new String[] { "/user/hdfs/a/test",
"/user/hdfs/b" };


Job job = Job.getInstance(conf, "FillAndSortTest");
job.setJarByClass(MultipleOutPut.class);


/*
* 关掉 speculative execution功能。 speculative
* execution功能是指,假如Hadoop发现有些任务执行的比较慢
* ,那么,它会在其他的节点上再运行一个同样的任务。这两个任务,哪个先完成就以哪个结果为准。
* 但Reduce任务需要将数值写入到HDFS的文件里
* ,而且这个文件名是固定的,如果同时运行两个以上的Reduce任务,会导致写入出错,所以要关闭这个功能。
*/
job.setSpeculativeExecution(false);
job.setMapSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);


job.setMapperClass(FillAndSortMapper.class);
job.setGroupingComparatorClass(KeyComparator.class);
job.setReducerClass(FillAndSortReducer.class);

job.setMapOutputKeyClass(KeyWritable.class);
job.setMapOutputValueClass(ValueWritable.class);

job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);

FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
// CombineFileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileSystem hdfs = FileSystem.get(conf);
if (hdfs.exists(new Path(otherArgs[1]))) {
hdfs.delete(new Path(otherArgs[1]), true);
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
// TextOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

System.exit(job.waitForCompletion(true) ? 0 : 1);
}

public static class KeyWritable implements WritableComparable<KeyWritable>{

private String subject;


private Integer score;


public KeyWritable() {

}


public KeyWritable(String subject, Integer score) {
// super();
this.subject = subject;
this.score = score;
}


@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(subject);
out.writeInt(score);
}


@Override
public void readFields(DataInput in) throws IOException {
subject = in.readUTF();
score = in.readInt();
}


@Override
public int compareTo(KeyWritable o) {


return -1*this.toString().compareTo(o.toString());
}


@Override
public String toString() {
return "KeyWritable [subject=" + subject + ", score=" + score + "]";
}


public String constructGroup() {

return subject;
}


public Integer getScore() {
return score;
}


public String getSubject() {
return subject;
}
}

public static class KeyComparator extends WritableComparator {


public KeyComparator() {
super(KeyWritable.class,true);
}


@SuppressWarnings("rawtypes")
@Override
public int compare(WritableComparable a, WritableComparable b) {


KeyWritable key1 = (KeyWritable)a;
KeyWritable key2 = (KeyWritable)b;


return key1.constructGroup().compareTo(key2.constructGroup());
}
}


public static class ValueWritable implements Writable {


private String name;


private String subject;


private Integer score;


private Integer rank;


public ValueWritable() {


}


public ValueWritable(String name, String subject, Integer score,
Integer rank) {
this.name = name;
this.subject = subject;
this.score = score;
this.rank = rank;
}


@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(name);
out.writeUTF(subject);
out.writeInt(score);
out.writeInt(rank);
}


@Override
public void readFields(DataInput in) throws IOException {
name = in.readUTF();
subject = in.readUTF();
score = in.readInt();
rank = in.readInt();
}


@Override
public String toString() {
return "ValueWritable [name=" + name + ", subject=" + subject
+ ", score=" + score + ", rank=" + rank + "]";
}


public String getName() {
return name;
}


public String getSubject() {
return subject;
}


public Integer getScore() {
return score;
}


public Integer getRank() {
return rank;
}
}
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值