package org.test.CommonDep;
/*
* 主要功能是针对输入为年份tab温度格式的数据,返回每年的最高汽温
* 1,partitioner分区,将同一年份的数据放一起。key=年份 温度 value=空
* 2,KeyComparator,key比较算法,让数据先按年份升序排序,如果年份相同,按温度降序。
* 3,GroupComparator,同一年份的数据为一组。
* 4,输出为每个分组里面的第一个数据
* 5,分布式缓存的使用
* */
/**
*author:carl.zhang
*email:18510665908@163.com
*/
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.nio.Buffer;
import java.util.HashMap;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.examples.SecondarySort.IntPair;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class PartitionGroupingDemo{
enum MYCounter
{
MISSING,
O
}
public static final Pattern DELIMITER = Pattern.compile("[\t,]");
public static class PartitionGroupingMapper extends Mapper<LongWritable, Text, Text, NullWritable>{
HashMap<String,String> cache=new HashMap<String,String>();
public void setup(Context context) throws IOException
{
FileSplit fileSplit=(FileSplit) context.getInputSplit();
Configuration conf=context.getConfiguration();
String name=conf.get("whoami");
}
private Text k=new Text();
public void map(LongWritable key, Text value, Context context ) throws IOException, InterruptedException
{
String []valueArray = PartitionGroupingDemo.DELIMITER.split(value.toString().trim());
if (value.toString().trim().length()==0 || valueArray.length!=2 || value.toString().length()==0) return;
if (valueArray[0].length()==0 || valueArray[1].length()==0) return;
k.set(valueArray[0]+" "+valueArray[1]);
context.write(k,NullWritable.get());
}
}
public static class PartitionGroupingReducer
extends Reducer<Text,NullWritable,Text,NullWritable>
{
public void cleanup(Context context)
{
}
HashMap<String,String> cache=new HashMap<String,String>();
public void setup(Context context) throws IOException
{
/*单项的配置信息*/
BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream("symLink")));
String pair=null;
while(null!=(pair=br.readLine()))
{
cache.put(pair.split("\t")[0], pair.split("\t")[1]);
}
br.close();
}
public void reduce(Text key, Iterable<NullWritable> values,
Context context
) throws IOException, InterruptedException {
String k=null;
if (cache.get(key.toString().split(" ")[0]) != null)
k=cache.get(key.toString().split(" ")[0])+" "+key.toString().split(" ")[1];
else
k=key.toString();
context.write(new Text(k), NullWritable.get());
}
}
public static class FirstPartitioner extends Partitioner<Text,NullWritable>
{
@Override
public int getPartition(Text arg0, NullWritable arg1, int arg2) {
// TODO Auto-generated method stub
String []keyArray=arg0.toString().split(" ");
return Math.abs(Integer.parseInt(keyArray[0].trim())*127)%arg2;
}
}
public static class KeyComparator extends WritableComparator
{
protected KeyComparator()
{
super(Text.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b)
{
String []keyArray1=((Text)a).toString().split(" ");
String []keyArray2=((Text)b).toString().split(" ");
int w1Left=Integer.parseInt(keyArray1[0].trim());
int w1Right=Integer.parseInt(keyArray1[1].trim());
int w2Left=Integer.parseInt(keyArray2[0].trim());
int w2Right=Integer.parseInt(keyArray2[1].trim());
int result=0;
if (w1Left>w2Left)
result=1;
else if (w1Left<w2Left)
result=-1;
else if (w1Right>w2Right)
result=-1;
else if (w1Right<w2Right)
result=1;
else
result=0;
return result;
}
}
public static class GroupComparator extends WritableComparator
{
protected GroupComparator()
{
super(Text.class,true);
}
@Override
public int compare(WritableComparable a,WritableComparable b)
{
String []keyArray1=((Text)a).toString().split(" ");
String []keyArray2=((Text)b).toString().split(" ");
int w1Left=Integer.parseInt(keyArray1[0].trim());
int w1Right=Integer.parseInt(keyArray1[1].trim());
int w2Left=Integer.parseInt(keyArray2[0].trim());
int w2Right=Integer.parseInt(keyArray2[1].trim());
int result=0;
if (w1Left>w2Left)
result=1;
else if (w1Left<w2Left)
result=-1;
return result;
}
}
@SuppressWarnings("deprecation")
public static int run(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
//conf.set("mapreduce.framework.name", "yarn");
//conf.set("yarn.resourcemanager.address", "192.168.10.225:8032");
conf.set("mapreduce.job.jar","/export/workspace/CommonDep/CommonScheduler.jar" );
/*配置数据可以直接通过configuration传递*/
conf.set("whoami", "carlzhang");
/*分布式缓存使用*/
String cacheFile="hdfs://192.168.10.225:9000/input/cacheFiles/yeartocode.conf";
Path inPath=new Path(cacheFile);
String pathLink=inPath.toUri().toString()+"#symLink";
DistributedCache.addCacheFile(new URI(pathLink), conf);
DistributedCache.createSymlink(conf);
String []remaingArgs=new GenericOptionsParser(conf,args).getRemainingArgs();
Job job=new Job(conf);
job.setJobName("PartitionGroupingDemo");
job.setJarByClass(PartitionGroupingDemo.class);
job.setNumReduceTasks(2);
job.setMapperClass(PartitionGroupingMapper.class);
job.setReducerClass(PartitionGroupingReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setPartitionerClass(FirstPartitioner.class);
job.setSortComparatorClass(KeyComparator.class);
job.setGroupingComparatorClass(GroupComparator.class);
/*避免输出空文件*/
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
job.setInputFormatClass(TextInputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
TextOutputFormat.setOutputPath(job,new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
}
mapreduce 的partitioner,GroupComparator,KeyComparator,分布式缓存使用示例
最新推荐文章于 2023-03-25 12:09:13 发布