实例内容:按照年份进行分区,按照分数进行降序处理
package mapreduce_3xia.fourth;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import mapreduce_3xia.TopK.MRDPUtils;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class Subarea {
public static class KeyPair implements WritableComparable<KeyPair>{
//年份
private int year;
//score
private int score;
public void setYear(int year) {
this.year = year;
}
public void setScore(int score) {
this.score = score;
}
public int getYear() {
return year;
}
public int getScore() {
return score;
}
public KeyPair() {
}
public KeyPair(int year, int score) {
this.year = year;
this.score = score;
}
@Override
public int compareTo(KeyPair o) {
//传过来的对象和当前的year比较 相等为0 不相等为1
int result=Integer.compare(year,o.getYear());
if(result != 0){
//两个year不相等
return 0;
}
//如果年份相等 比较score
return Integer.compare(score,o.getScore());
}
@Override
//序列化
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(year);
dataOutput.writeInt(score);
}
@Override
//反序列化
public void readFields(DataInput dataInput) throws IOException {
this.year=dataInput.readInt();
this.score=dataInput.readInt();
}
@Override
public String toString() {
return year+"\t"+score;
}
@Override
public int hashCode() {
return new Integer(year+score).hashCode();
}
}
public static class MRDPUtils {
public static final String[] REDIS_INSTANCES = { "p0", "p1", "p2", "p3", "p4", "p6" };
// This helper function parses the stackoverflow into a Map for us.
public static Map<String, String> transformXmlToMap(String xml) {
Map<String, String> map = new HashMap<String, String>();
try {
String[] tokens = xml.trim().substring(5, xml.trim().length() - 3).split("\"");
for (int i = 0; i < tokens.length - 1; i += 2) {
String key = tokens[i].trim();
String val = tokens[i + 1];
map.put(key.substring(0, key.length() - 1), val);
}
} catch (StringIndexOutOfBoundsException e) {
System.err.println(xml);
}
return map;
}
}
public static class GroupTemp extends WritableComparator{
public GroupTemp() {
super(KeyPair.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
//年份相同返回的是0
KeyPair o1=(KeyPair)a;
KeyPair o2=(KeyPair)b;
return Integer.compare(o1.getYear(),o2.getYear());
}
}
public static class SortTemp extends WritableComparator{
public SortTemp() {
super(KeyPair.class,true);
}
//自定义排序
@Override
public int compare(WritableComparable a, WritableComparable b) {
//按照年份升序排序 按照score降序排序
KeyPair o1=(KeyPair)a;
KeyPair o2=(KeyPair)b;
int result=Integer.compare(o1.getYear(),o2.getYear());
//比较年份 如果年份不相等
if(result != 0){
return result;
}
//两个年份相等 对score进行降序排序,注意 - 号
return -Integer.compare(o1.getScore(),o2.getScore());
}
}
//自定义分区
//每一个年份生成一个reduce任务
public static class FirstPartition extends Partitioner<KeyPair,Text>{
@Override
public int getPartition(KeyPair key, Text value, int num) {
//按照年份进行分区 年份相同,返回的是同一个值
return (key.getYear()*127)%num;
}
}
//字符串转日期format
//public static SimpleDateFormat SDF=new SimpleDateFormat("yyyy-MM-ddTHH:mm:ss.730");
/**
* Mapper
* 输出的Key是自定义的KeyPair
*/
static class TempMapper extends Mapper<LongWritable,Text,KeyPair,Text>{
protected void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{
Map<String, String> parsed = MRDPUtils.transformXmlToMap(value.toString());
String strDate = parsed.get("CreationDate");
String strScore = parsed.get("Score");
String strId = parsed.get("Id");
int year = Integer.parseInt(strDate.substring(0,4));
int score = Integer.parseInt(strScore);
KeyPair kp = new KeyPair(year, score);
String strResult = "Date:" + strDate + " Id:" + strId + " Score:" + strScore;
Text result = new Text(strResult);
context.write(kp, result);
}
}
/**
* Reduce 区域
* Map的输出是Reduce的输出
*/
static class TempReducer extends Reducer<KeyPair,Text,KeyPair,Text> {
@Override
protected void reduce(KeyPair kp, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text value:values){
context.write(kp,value);
}
}
}
//client
public static void main(String args[]) throws IOException, InterruptedException{
//获取配置
Configuration conf=new Configuration();
//修改命令行的配置
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: temp <in> <out>");
System.exit(2);
}
//创建Job
Job job=new Job(conf,"temp");
//1.设置job运行的类
job.setJarByClass(Subarea.class);
//2.设置map和reduce的类
job.setMapperClass(Subarea.TempMapper.class);
job.setReducerClass(Subarea.TempReducer.class);
//3.设置map的输出的key和value 的类型
job.setMapOutputKeyClass(KeyPair.class);
job.setMapOutputValueClass(Text.class);
//4.设置输入文件的目录和输出文件的目录
FileInputFormat.addInputPath(job,new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
//5.设置Reduce task的数量 每个年份对应一个reduce task
job.setNumReduceTasks(15);//15个年份
//5.设置partition sort Group的class
job.setPartitionerClass(FirstPartition.class);
job.setSortComparatorClass(SortTemp.class);
job.setGroupingComparatorClass(GroupTemp.class);
//6.提交job 等待运行结束并在客户端显示运行信息
boolean isSuccess= false;
try {
isSuccess = job.waitForCompletion(true);
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
//7.结束程序
System.exit(isSuccess ?0:1);
}
}
导出jar包后,输入命令 :
hadoop jar jarname.jar /datapath/data.xml /out_name 运行。
data.xml:
<row Id="1" PostId="35314" Score="31" Text="not sure why this is getting do is correct! Double !" CreationDate="2018-09-06T09:07:10.730" UserId="1" />
<row Id="1" PostId="35315" Score="32" Text="not sure why this is getting downvoted -- it is correct! Double check itm!" CreationDate="2007-09-06T02:05:33.730" UserId="1" />
<row Id="1" PostId="35316" Score="33" Text="not get -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:07:10.730" UserId="1" />
<row Id="1" PostId="35317" Score="34" Text="not sure why this is getting downvoted -- it is cobelieve him!" CreationDate="2008-08-06T04:07:26.730" UserId="1" />
<row Id="2" PostId="35318" Score="35" Text="not sure why this ist! Double check it in your compiler if you don't believe him!" CreationDate="2008-05-06T08:11:10.730" UserId="1" />
<row Id="2" PostId="35319" Score="36" Text="not sure why tf you don'jdslfjdkfj kjf jkdjkfjd ksjfk t believe him!" CreationDate="2008-09-06T01:12:10.730" UserId="1" />
<row Id="2" PostId="35320" Score="37" Text="not sure why this is get! Double check it in your compiler if you don't believe him!" CreationDate="2008-06-06T08:03:10.730" UserId="1" />
<row Id="2" PostId="35321" Score="38" Text="not sure why this is gcorrect! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:07:10.880" UserId="1" />
<row Id="2" PostId="35322" Score="39" Text="not sure why ng downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2016-09-06T08:07:39.730" UserId="1" />
<row Id="2" PostId="35323" Score="40" Text="not sure wheve him!" CreationDate="2008-03-06T03:07:10.730" UserId="1" />
<row Id="3" PostId="35324" Score="41" Text="not sure why this is getting downvoted -- it is correct! Double check ie him!" CreationDate="2007-09-06T09:00:22.730" UserId="1" />
<row Id="3" PostId="35325" Score="42" Text="not sure why this is gettinyour compiler if you don't believe him!" CreationDate="2008-09-06T02:07:10.730" UserId="2" />
<row Id="3" PostId="35326" Score="42" Text="not sure why this is getting downvoted -- it is correct! Double check im!" CreationDate="2012-09-06T04:07:10.730" UserId="2" />
<row Id="3" PostId="35327" Score="43" Text="not sure why thble check it in your compiler if you don't believe him!" CreationDate="2008-07-06T01:05:10.730" UserId="2" />
<row Id="3" PostId="35314" Score="44" Text="is correct! Double check it in your compiler if you don't believe him!" CreationDate="2003-09-06T03:12:10.730" UserId="2" />
<row Id="3" PostId="35328" Score="45" Text="not surwnvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T02:07:11.730" UserId="2" />
<row Id="3" PostId="35329" Score="46" Text="not sure why this is gettiouble check it in your compiler if you don't believe him!" CreationDate="2010-08-06T03:08:10.730" UserId="2" />
<row Id="3" PostId="35330" Score="46" Text="not sure why this orrect! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T01:07:10.730" UserId="2" />
<row Id="1" PostId="35331" Score="47" Text="is getting downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:07:10.730" UserId="2" />
<row Id="1" PostId="35332" Score="48" Text="not sure why this is getrect! Double check it in your compiler if you don't believe him!" CreationDate="2014-09-06T09:07:45.730" UserId="2" />
<row Id="1" PostId="35333" Score="49" Text="not sure why this is getct! Double check it in your compiler if you don't believe him!" CreationDate="2008-02-06T04:07:10.730" UserId="3" />
<row Id="1" PostId="35334" Score="50" Text="not sure why this is gettin Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T09:07:10.730" UserId="3" />
<row Id="4" PostId="35335" Score="51" Text="nong downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2015-01-06T08:07:10.730" UserId="3" />
<row Id="4" PostId="35336" Score="52" Text="notvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T01:07:19.730" UserId="3" />
<row Id="4" PostId="35337" Score="53" Text="not sure why tct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T02:07:10.730" UserId="3" />
<row Id="4" PostId="35338" Score="54" Text="ng downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T07:07:10.730" UserId="3" />
<row Id="4" PostId="35339" Score="55" Text="not sure why this is getting downf you don't believe him!" CreationDate="2017-08-06T08:07:19.730" UserId="4" />
<row Id="4" PostId="35340" Score="56" Text="not sure why this is getting Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T07:04:10.730" UserId="4" />
<row Id="4" PostId="35341" Score="57" Text="not downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2011-09-06T06:07:10.730" UserId="4" />
<row Id="4" PostId="35342" Score="58" Text="not sure why this is getting downvoten your compiler if you don't believe him!" CreationDate="2019-09-06T02:07:10.730" UserId="4" />
<row Id="4" PostId="35343" Score="59" Text="not sure why this is getting doDouble check it in your compiler if you don't believe him!" CreationDate="2008-04-06T04:02:18.730" UserId="4" />
<row Id="6" PostId="35344" Score="60" Text="not sure why this is gettirect! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:07:10.730" UserId="5" />
<row Id="6" PostId="35345" Score="60" Text="not sure why this is gettinguble check it in your compiler if you don't believe him!" CreationDate="2006-04-06T08:07:10.730" UserId="5" />
<row Id="6" PostId="35346" Score="61" Text="downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:08:10.730" UserId="5" />
<row Id="6" PostId="35347" Score="62" Text="not sure why this is getting downvoted -- it is correct! Double check it in yourm!" CreationDate="2011-08-06T08:07:45.730" UserId="5" />
<row Id="6" PostId="35348" Score="63" Text="not sure why this is getting downvoted -compiler if you don't believe him!" CreationDate="2008-09-06T08:04:10.730" UserId="5" />
<row Id="6" PostId="35349" Score="64" Text="not sure - it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2005-05-06T08:07:10.730" UserId="6" />
<row Id="6" PostId="35350" Score="65" Text="whgetting downvoted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2008-09-06T08:06:10.730" UserId="6" />
<row Id="6" PostId="35351" Score="66" Text="not sure! Double check it our compiler if you don't believe him!" CreationDate="2008-05-06T08:07:23.730" UserId="6" />
<row Id="6" PostId="35352" Score="66" Text="not sureted -- it is correct! Double check it in your compiler if you don't believe him!" CreationDate="2099-09-06T08:09:19.730" UserId="6" />