05 MapReduce应用案例02

最新推荐文章于 2023-04-21 13:02:05 发布

ALL--IN

最新推荐文章于 2023-04-21 13:02:05 发布

阅读量292

点赞数

分类专栏： Hadoop技术-学习笔记

Hadoop技术-学习笔记专栏收录该内容

11 篇文章 0 订阅

订阅专栏

6、統計每個月份中，最高的三個溫度。

輸入格式：年月日空格時分秒 TAB 溫度

inputfile:

1949-10-01 14:21:02   34c
1949-10-02 14:01:02   36c
1950-01-01 11:21:02   32c
1950-10-01 12:21:02   37c
1951-12-01 12:21:02   23c
1950-10-02 12:21:02   41c
1950-10-03 12:21:02   27c
1951-07-01 12:21:02   45c
1951-07-02 12:21:02   46c
1951-07-03 12:21:03   47c

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class WRunner {
	public static void main(String[] args) throws Exception{
		Configuration conf = new Configuration();

		Job job = Job.getInstance(conf);
		
		job.setJobName("weather");
		job.setJarByClass(WRunner.class);
		
		job.setMapperClass(WMapper.class);
		job.setReducerClass(WReducer.class);
		
		job.setMapOutputKeyClass(MyKey.class);
		job.setMapOutputValueClass(DoubleWritable.class);
		
		job.setPartitionerClass(MyPartitioner.class);
		job.setSortComparatorClass(MySort.class);
		job.setGroupingComparatorClass(MyGroup.class);
		
		job.setInputFormatClass(KeyValueTextInputFormat.class);
		
		job.setNumReduceTasks(3);
		
		Path in = new Path("/home/jinzhao/mrtest/input");
		FileInputFormat.setInputPaths(job, in);
		
		Path out = new Path("/home/jinzhao/mrtest/output");
		FileSystem fs = FileSystem.get(conf);
		if (fs.exists(out))
			fs.delete(out, true);
		FileOutputFormat.setOutputPath(job, out);
		
		job.waitForCompletion(true);
	}
	
	static class WMapper extends Mapper<Text, Text, MyKey, DoubleWritable>{
		SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
		NullWritable nw = NullWritable.get();
		
		@Override
		protected void map(Text key, Text value, Context context)
				throws IOException, InterruptedException {
			try {
				Date date = sdf.parse(key.toString());
				Calendar c = Calendar.getInstance();
				c.setTime(date);
				int year = c.get(Calendar.YEAR);
				int month = c.get(Calendar.MONTH);
				int day = c.get(Calendar.DAY_OF_MONTH);
				
				String h = value.toString().trim();
				double hot = Double.parseDouble(h.substring(0, h.length()-1));
				
				context.write(new MyKey(year, month, day, hot), new DoubleWritable(hot));
			} catch (ParseException e) {
				e.printStackTrace();
			}
		}
		
	}
	
	static class WReducer extends Reducer<MyKey, DoubleWritable, Text, NullWritable>{

		@Override
		protected void reduce(MyKey key, Iterable<DoubleWritable> values, Context context)
				throws IOException, InterruptedException {
			int i=0;
			for(DoubleWritable v : values){
				++i;
				String msg = key.getYear() + "\t" + (key.getMonth() + 1) + "\t" + (key.getDay()+1) + "\t" + v.get();
				context.write(new Text(msg), NullWritable.get());
				if (i == 3)
					break;
			}
		}

		
		
	}
}

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

/**
 * 序列化所传输的对象
 */
public class MyKey implements WritableComparable<MyKey> {
	private int year;
	private int month;
	private int day;
	private double hot;
	
	public MyKey(){
		super();
	}
	
	public MyKey(int year, int month, int day, double hot){
		this.year = year;
		this.month = month;
		this.day = day;
		this.hot = hot;
	}
	
	
	
	public int getYear() {
		return year;
	}

	public void setYear(int year) {
		this.year = year;
	}

	public int getMonth() {
		return month;
	}

	public void setMonth(int month) {
		this.month = month;
	}

	public int getDay() {
		return day;
	}

	public void setDay(int day) {
		this.day = day;
	}

	public double getHot() {
		return hot;
	}

	public void setHot(double hot) {
		this.hot = hot;
	}

	@Override
	public void readFields(DataInput arg0) throws IOException {
		this.year = arg0.readInt();
		this.month = arg0.readInt();
		this.hot = arg0.readDouble();
		this.day = arg0.readInt();
		
	}
	@Override
	public void write(DataOutput arg0) throws IOException {
		arg0.writeInt(year);
		arg0.writeInt(month);
		arg0.writeDouble(hot);	
		arg0.writeInt(day);
	}
	
	/**
	 * 判断是否是同一个对象，当对象作为key时。
	 */
	@Override
	public int compareTo(MyKey arg0) {
		int r1 = Integer.compare(this.year, arg0.getYear());
		
		if (r1 == 0){
			int r2 = Integer.compare(this.month, arg0.getMonth());
			if (r2 == 0){
				return Double.compare(this.hot, arg0.getHot());
			}
			else{
				return r2;
			}
			
		}
		else
			return r1;
	}
	
	
}

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/**
 * 分组，将具有相同年份和月份的MyKey作为一组，即传递给一个reduce函数进行处理。
 */
public class MyGroup extends WritableComparator{
	public MyGroup(){
		super(MyKey.class, true);
	}
	
	public int compare (WritableComparable a, WritableComparable b){
		MyKey k1 = (MyKey)a;
		MyKey k2 = (MyKey)b;
		
		int r1 = Integer.compare(k1.getYear(), k2.getYear());
		if (r1 == 0){
			return Integer.compare(k1.getMonth(), k2.getMonth());
		}
		else
			return r1;
		
	}
}

package hadoop.wheather;

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

/**
 * 自定义的排序，先分组，再排序
 */
public class MySort extends WritableComparator{
	public MySort(){
		super(MyKey.class, true);
	}
	
	public int compare (WritableComparable a, WritableComparable b){
		MyKey k1 = (MyKey)a;
		MyKey k2 = (MyKey)b;
		
		int r1 = Integer.compare(k1.getYear(), k2.getYear());
		if (r1 == 0){
			int r2 = Integer.compare(k1.getMonth(), k2.getMonth());
			if (r2 == 0){
				return -Double.compare(k1.getHot(), k2.getHot());
			}
			else
				return r2;
		}
		else
			return r1;
		
	}
}

package hadoop.wheather;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.mapreduce.Partitioner;

/**
 * 分区，每个分区由一个reduce进程来处理
 */
public class MyPartitioner extends Partitioner<MyKey, DoubleWritable>{

	@Override
	public int getPartition(MyKey key, DoubleWritable value, int numReduceTasks) {
		return(key.getYear() - 1949)%numReduceTasks;
	}
	
}

7、社交網路的朋友推薦算法

格式：用戶 TAB 朋友1 空格朋友2 空格 ...

inputfile：

小明    老王如花林志玲
老王    小明凤姐
如花    小明李刚凤姐
林志玲    小明李刚凤姐郭美美
李刚    如花凤姐林志玲
郭美美    凤姐林志玲
凤姐    如花老王林志玲郭美美

第一次輸出：

格式：用戶1 空格用戶2 TAB 次數

第二次輸出：

格式：用戶 TAB 推薦1 空格推薦2 空格...

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Friends {

	static class FofMapper extends Mapper<Text, Text, Fof, IntWritable>{

		@Override
		protected void map(Text key, Text value, Context context)
				throws IOException, InterruptedException {
			String user = key.toString();
			String[] friends = value.toString().split(" ");
			for (int i = 0; i < friends.length; ++i){
				context.write(new Fof(user, friends[i]), new IntWritable(0));
				for (int j = i + 1; j < friends.length; ++j)
					context.write(new Fof(friends[i], friends[j]), new IntWritable(1));
			}
		}
	}
	
	static class FofReducer extends Reducer<Fof, IntWritable, Fof, IntWritable>{

		@Override
		protected void reduce(Fof key, Iterable<IntWritable> values, Context context)
				throws IOException, InterruptedException {
			int sum = 0;
			boolean flag = true;
			for (IntWritable i : values){
				if (i.get() == 0){
					flag = false;
					break;
				}else{
					sum = sum + i.get();
				}
			}
			
			if (flag)
				context.write(key, new IntWritable(sum));
		}
		
	}
	
	public static void main(String[] args){
		try {
			Configuration conf = new Configuration();
			
			Job job = Job.getInstance(conf);
			job.setJarByClass(Friends.class);
			job.setJobName("friend-I");
			job.setMapperClass(FofMapper.class);
			job.setReducerClass(FofReducer.class);
			job.setMapOutputKeyClass(Fof.class);
			job.setMapOutputValueClass(IntWritable.class);
			
			job.setInputFormatClass(KeyValueTextInputFormat.class);
			
			Path in = new Path("/home/jinzhao/mrtest/input");
			FileInputFormat.setInputPaths(job, in);
			
			Path out = new Path("/home/jinzhao/mrtest/output");
			FileSystem fs = FileSystem.get(conf);
			if (fs.exists(out))
				fs.delete(out, true);
			FileOutputFormat.setOutputPath(job,  out);
			
			if ( job.waitForCompletion(true)){
				Job job2 = Job.getInstance(conf);
				job2.setJarByClass(Friends.class);
				job2.setJobName("friend-II");
				job2.setMapperClass(SortMapper.class);
				job2.setReducerClass(SortReducer.class);
				job2.setMapOutputKeyClass(User.class);
				job2.setMapOutputValueClass(User.class);
				
				job2.setInputFormatClass(KeyValueTextInputFormat.class);
				
				job2.setSortComparatorClass(FSort.class);
				job2.setGroupingComparatorClass(FGroup.class);
				
				Path in2 = new Path("/home/jinzhao/mrtest/output");
				FileInputFormat.setInputPaths(job2, in2);
				
				Path out2 = new Path("/home/jinzhao/mrtest/output2");
				if (fs.exists(out2))
					fs.delete(out2, true);
				FileOutputFormat.setOutputPath(job2,  out2);
				job2.waitForCompletion(true);
			}
		} catch (Exception e){
			e.printStackTrace();
		}

	}
	
	static class SortMapper extends Mapper<Text, Text, User, User>{

		@Override
		protected void map(Text key, Text value, Context context)
				throws IOException, InterruptedException {
			String[] friends = key.toString().split(" ");
			int count = Integer.parseInt(value.toString());
			context.write(new User(friends[0], count), new User(friends[1], count));
			context.write(new User(friends[1], count), new User(friends[0], count));
		}
		
	}
	
	static class SortReducer extends Reducer<User, User, Text, Text>{

		@Override
		protected void reduce(User key, Iterable<User> values, Context context)
				throws IOException, InterruptedException {
			StringBuilder sb = new StringBuilder();
			for (User i : values)
				sb.append(i.getUsername() + "," + i.getCount() + " ");
			
			context.write(new Text(key.getUsername()), new Text(sb.toString().trim()));
		}
		
		
	}
}

import org.apache.hadoop.io.Text;

public class Fof extends Text{
	public Fof(){
		super();
	}
	
	public Fof(String a, String b){
		super(getFof(a, b));
	}
	
	public static String getFof(String a, String b){
		int r = a.compareTo(b);
		if (r < 0)
			return a + " " + b;
		else 
			return b + " " + a;
	} 
}

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;


public class User implements WritableComparable<User>{
	private String username;
	private int count;
	
	public User(){}
	public User(String username, int count){
		this.username = username;
		this.count = count;
	}

	public String getUsername() {
		return username;
	}

	public void setUsername(String username) {
		this.username = username;
	}

	public int getCount() {
		return count;
	}

	public void setCount(int count) {
		this.count = count;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(username);
		out.writeInt(count);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		this.username = in.readUTF();
		this.count = in.readInt();
	}


	@Override
	public int compareTo(User arg0) {
		int c1 = this.username.compareTo(arg0.username);
		if (c1 == 0){
			return this.count - arg0.getCount();
		} else
			return c1;
	}

}

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class FGroup extends WritableComparator{
	public FGroup(){
		super(User.class, true);
	}

	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		User u1 = (User)a;
		User u2 = (User)b;
		
		return  u1.getUsername().compareTo(u2.getUsername());

	}
	
	
}

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class FSort extends WritableComparator{
	public FSort(){
		super(User.class, true);
	}

	@Override
	public int compare(WritableComparable a, WritableComparable b) {
		User u1 = (User)a;
		User u2 = (User)b;
		
		int c1 = u1.getUsername().compareTo(u2.getUsername());
		if (c1==0){
			return u2.getCount() - u1.getCount();
		} else
			return c1;

	}
}

ALL--IN

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
05 MapReduce应用案例02

6、統計每個月份中，最高的三個溫度。輸入格式：年月日空格時分秒 TAB 溫度inputfile:1949-10-01 14:21:02 34c 1949-10-02 14:01:02 36c 1950-01-01 11:21:02 32c 1950-10-01 12:21:02 37c 1951-12-01 12:21:02 23c
复制链接

扫一扫