一、题目
1、主方法,配置作业:
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//自定义配置信息,不改动则按照默认配置
Configuration conf = new Configuration();
// Create a new Job 作业实例
Job job = Job.getInstance(conf);
job.setJarByClass(TestTQ.class);
// Specify various job-specific parameters 作业名称
job.setJobName("myjob");
// job.setInputPath(new Path("in"));
// job.setOutputPath(new Path("out"));
//设置输入输出路径 格式化类 在数据还没有给map之前inputformat产生和作用
Path inPath = new Path("/mr/tq/input");
FileInputFormat.addInputPath(job, inPath);
Path outPath = new Path("/mr/tq/output");
FileSystem fs = outPath.getFileSystem(conf);
if(fs.exists(outPath)){
fs.delete(outPath, true);
}
FileOutputFormat.setOutputPath(job, outPath );
//设置Mapper与Reducer类.注意数据从mapIO到reduce,所以要进行序列化反序列化,故而设置class类型
job.setMapperClass(TMapper.class);
job.setMapOutputKeyClass(TQ.class);
job.setMapOutputValueClass(IntWritable.class);
//自定义分区器
job.setPartitionerClass(TPartitioner.class);
job.setSortComparatorClass(TSorter.class);
//定义reduce并行数量
job.setNumReduceTasks(2);
//设置组比较器 界定key的分界
job.setGroupingComparatorClass(TGroupComparator.class);
//设置reduce
job.setReducerClass(TReducer.class);
// Submit the job, then poll for progress until the job is complete
job.waitForCompletion(true);
}
2、抽象出天气类
public class TQ implements WritableComparable<TQ>{
private int year;
private int month;
private int day;
private int wd;
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public int getMonth() {
return month;
}
public void setMonth(int month) {
this.month = month;
}
public int getDay() {
return day;
}
public void setDay(int day) {
this.day = day;
}
public int getWd() {
return wd;
}
public void setWd(int wd) {
this.wd = wd;
}
/**
* 序列化
* <p>Title: write</p>
* <p>Description: </p>
* @param out
* @throws IOException
*/
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(this.getYear());
out.writeInt(this.getMonth());
out.writeInt(this.getDay());
out.writeInt(this.getWd());
}
/**
* 反序列化
* <p>Title: readFields</p>
* <p>Description: </p>
* @param in
* @throws IOException
*/
@Override
public void readFields(DataInput in) throws IOException {
this.setYear(in.readInt());
this.setMonth(in.readInt());
this.setDay(in.readInt());
this.setWd(in.readInt());
}
/**
* 正序 年月日
* <p>Title: compareTo</p>
* <p>Description: </p>
* @param arg0
* @return
*/
@Override
public int compareTo(TQ o) {
int a = Integer.compare(this.getYear(), o.getYear());
if(a == 0){
int b = Integer.compare(this.getMonth(), o.getMonth());
if(b ==0){
return Integer.compare(this.getDay(), o.getDay());
}
return b;
}
return a;
}
3、自定义mapper
public class TMapper extends Mapper<LongWritable, Text, TQ, IntWritable>{
TQ tkey = new TQ();
IntWritable tval = new IntWritable();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//时间与温度
String[] words = StringUtils.split(value.toString(),'\t');
String pattern = "yyyy-MM-dd";
SimpleDateFormat sdf = new SimpleDateFormat(pattern);
try {
Date date = sdf.parse(words[0]);
Calendar cal = Calendar.getInstance();
cal.setTime(date);
tkey.setYear(cal.get(Calendar.YEAR));
tkey.setMonth(cal.get(Calendar.MONTH)+1);
tkey.setDay(cal.get(Calendar.DAY_OF_MONTH));
int wd = Integer.parseInt(words[1].substring(0, words[1].lastIndexOf("c")));
tkey.setWd(wd);
tval.set(wd);
//输出
context.write(tkey, tval);
} catch (ParseException e) {
e.printStackTrace();
}
}
4、排序器sorter
public class TSorter extends WritableComparator{
TQ t1;
TQ t2;
public TSorter() {
super(TQ.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
t1 = (TQ)a;
t2 = (TQ)b;
int c1 = Integer.compare(t1.getYear(), t2.getYear());
if(c1 == 0){
int c2 = Integer.compare(t1.getMonth(), t2.getMonth());
if(c2 ==0){
return -Integer.compare(t1.getWd(), t2.getWd());
}
return c2;
}
return c1;
}
5、自定义分区器
public class TPartitioner extends Partitioner<TQ, IntWritable>{
@Override
public int getPartition(TQ key, IntWritable value, int numPartitions) {
return key.getYear() % numPartitions;
}
}
6、分组排序器
public class TGroupComparator extends WritableComparator{
TQ t1;
TQ t2;
public TGroupComparator() {
super(TQ.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
t1 = (TQ)a;
t2 = (TQ)b;
int c1 = Integer.compare(t1.getYear(), t2.getYear());
if(c1 == 0){
return Integer.compare(t1.getMonth(), t2.getMonth());
}
return c1;
}
}
7、自定义reducer
public class TReducer extends Reducer<TQ, IntWritable, Text, IntWritable>{
IntWritable tval = new IntWritable();
Text tkey = new Text();
@Override
protected void reduce(TQ key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int flag = 0;
int day = 0;
for (IntWritable val : values) {
if(flag == 0){
tkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());
tval.set(val.get());
flag ++;
day = key.getDay();
context.write(tkey, tval);
}
if(flag != 0 && day!=key.getDay()){
tkey.set(key.getYear()+"-"+key.getMonth()+"-"+key.getDay());
tval.set(val.get());
context.write(tkey, tval);
return ;
}
}
}
打包成jar,传入集群后执行hadoop jar xx.jar com.kanq.hd.mr.tq.TextTQ
(前提是路径正常,文件也在相关路径下。)