Hadoop天气数据分析案例

最新推荐文章于 2023-11-25 12:59:26 发布

宇直豪

最新推荐文章于 2023-11-25 12:59:26 发布

阅读量5.7k

点赞数 1

分类专栏：大数据文章标签： hadoop

本文链接：https://blog.csdn.net/csdn_xuwenhao/article/details/101113517

版权

大数据专栏收录该内容

8 篇文章 0 订阅

订阅专栏

需求：

找出每个月气温最高的2天（案例测试）

数据源：

1949-10-01 14:21:02 34c

1949-10-01 19:21:02 38c

1949-10-02 14:01:02 36c

1950-01-01 11:21:02 32c

1950-10-01 12:21:02 37c

1951-12-01 12:21:02 23c

1950-10-02 12:21:02 41c

1950-10-03 12:21:02 27c

1951-07-01 12:21:02 45c

1951-07-02 12:21:02 46c

1951-07-03 12:21:03 47c

项目结构：

TQtest.java

package com.tq.test;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class TQtest {

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

// TODO Auto-generated method stub

// 1配置

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

job.setJarByClass(TQtest.class);

job.setJobName("tq");

// 2设置输入路径和输出路径

Path inpath = new Path("/tq/input");

FileInputFormat.addInputPath(job, inpath);

Path outpath = new Path("/tq/output");

if (outpath.getFileSystem(conf).exists(outpath))

outpath.getFileSystem(conf).delete(outpath, true);

FileOutputFormat.setOutputPath(job, outpath);

// 3设置Mapper

job.setMapperClass(Tmapper.class);

job.setMapOutputKeyClass(Tq.class);

job.setMapOutputValueClass(IntWritable.class);

// 4 自定义比较器

job.setSortComparatorClass(TqSortComparator.class);

// 5自定义分区器

job.setPartitionerClass(TPartitioner.class);

// 6 自定义组排序

job.setGroupingComparatorClass(TGroupCmparator.class);

// 7 设置reducetask数量

job.setNumReduceTasks(2);

// 8 设置reducer

job.setReducerClass(Treducer.class);

// 9

job.waitForCompletion(true);

}

Tmapper.java

package com.tq.test;

import java.io.IOException;

import java.text.ParseException;

import java.text.SimpleDateFormat;

import java.util.Calendar;

import java.util.Date;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import org.jboss.netty.util.internal.StringUtil;

public class Tmapper extends Mapper<LongWritable, Text, Tq, IntWritable> {

Tq tkey = new Tq();

IntWritable tvalue = new IntWritable();

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

// 获得时间温度数组

String[] words = StringUtil.split(value.toString(), '\t');

String pattern = "yyyy-MM-dd";

SimpleDateFormat sdf = new SimpleDateFormat(pattern);

try {

// 处理日期

Date date = sdf.parse(words[0]);

Calendar cal = Calendar.getInstance();

cal.setTime(date);

tkey.setYear(cal.get(Calendar.YEAR));

tkey.setMonth(cal.get(Calendar.MONTH) + 1);

tkey.setDay(cal.get(Calendar.DAY_OF_MONTH));

// 处理温度

int temp = Integer.parseInt(words[1].substring(0, words[1].lastIndexOf("c")));

tkey.setTemp(temp);

tvalue.set(temp);

context.write(tkey, tvalue);

} catch (ParseException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

Tq.java

package com.tq.test;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class Tq implements WritableComparable<Tq> {

private int year;

private int month;

private int day;

private int temp;

public int getYear() {

return year;

}

public void setYear(int year) {

this.year = year;

}

public int getMonth() {

return month;

}

public void setMonth(int month) {

this.month = month;

}

public int getDay() {

return day;

}

public void setDay(int day) {

this.day = day;

}

public int getTemp() {

return temp;

}

public void setTemp(int temp) {

this.temp = temp;

}

public void write(DataOutput out) throws IOException {

out.writeInt(this.getYear());

out.writeInt(this.getMonth());

out.writeInt(this.getDay());

out.writeInt(this.getTemp());

}

public void readFields(DataInput in) throws IOException {

this.setYear(in.readInt());

this.setMonth(in.readInt());

this.setDay(in.readInt());

this.setTemp(in.readInt());

}

public int compareTo(Tq o) {

int c1 = Integer.compare(this.getYear(), o.getYear());

if (c1 == 0) {

int c2 = Integer.compare(this.getMonth(), o.getMonth());

if (c2 == 0) {

return Integer.compare(this.getDay(), o.getDay());

}

return c2;

}

return c1;

}

@Override

public String toString() {

return year + "-" + month + "-" + day;

}

TqSortComparator.java

package com.tq.test;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

/**

* 实现天气年月正序，温度倒序

public class TqSortComparator extends WritableComparator {

Tq t1 = null;

Tq t2 = null;

public TqSortComparator() {

super(Tq.class, true);

}

public int compare(WritableComparable a, WritableComparable b) {

t1 = (Tq) a;

t2 = (Tq) b;

int c1 = Integer.compare(t1.getYear(), t2.getYear());

if (c1 == 0) {

int c2 = Integer.compare(t1.getMonth(), t2.getMonth());

if (c2 == 0) {

return -Integer.compare(t1.getTemp(), t2.getTemp());

}

return c2;

}

return c1;

}

TPartitioner.java

package com.tq.test;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.mapreduce.Partitioner;

public class TPartitioner extends Partitioner<Tq, IntWritable> {

@Override

public int getPartition(Tq key, IntWritable value, int numPartitions) {

// TODO Auto-generated method stub

return key.getYear() % numPartitions;

}

TGroupCmparator.java

package com.tq.test;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class TGroupCmparator extends WritableComparator {

Tq t1 = null;

Tq t2 = null;

public TGroupCmparator() {

super(Tq.class, true);

}

public int compare(WritableComparable a, WritableComparable b) {

t1 = (Tq) a;

t2 = (Tq) b;

int c1 = Integer.compare(t1.getYear(), t2.getYear());

if (c1 == 0) {

return Integer.compare(t1.getMonth(), t2.getMonth());

}

return c1;

}

Treducer.java

package com.tq.test;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class Treducer extends Reducer<Tq, IntWritable, Text, IntWritable> {

Text tkey = new Text();

IntWritable tval = new IntWritable();

@Override

protected void reduce(Tq key, Iterable<IntWritable> vals, Context context)

throws IOException, InterruptedException {

int flag = 0;

int day = 0;

for (IntWritable val : vals) {

if (flag == 0) {

tkey.set(key.toString());

tval.set(val.get());

context.write(tkey, tval);

flag++;

day = key.getDay();

}

if (flag != 0 && day != key.getDay()) {

tkey.set(key.toString());

tval.set(val.get());

context.write(tkey, tval);

return;

}

打包成jar，放到linux虚拟机上执行（要先启动hdfs yarn zookeeper DFSZKFailoverController）

hadoop jar tq.jar com.tq.test.TQtest

执行结果：

宇直豪

关注

1
点赞
踩
37

收藏

觉得还不错? 一键收藏
0
评论
Hadoop天气数据分析案例

需求：找出每个月气温最高的2天（案例测试）数据源：1949-10-01 14:21:0234c1949-10-01 19:21:0238c1949-10-02 14:01:0236c1950-01-01 11:21:0232c1950-10-01 12:21:0237c1951-12-01 12:21:0223c1...
复制链接

扫一扫