Hadoop2.2.0 mapreduce 例子

最新推荐文章于 2019-03-25 11:09:03 发布

hxpjava1

最新推荐文章于 2019-03-25 11:09:03 发布

阅读量6k

点赞数

CC 4.0 BY-SA版权

分类专栏： hadoop

本文链接：https://blog.csdn.net/hxpjava1/article/details/20069829

hadoop 专栏收录该内容

73 篇文章

订阅专栏

本教程涵盖Kubernetes的各种部署方法，从开发测试到生产环境，并深入讲解K8s核心资源的配置与使用，包括ConfigMap、Pod、Service、Deployment等。同时，通过实例演示MapReduce中的词频统计、去重、排序及表连接操作。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

发布一个k8s部署视频：https://edu.csdn.net/course/detail/26967

课程内容：各种k8s部署方式。包括minikube部署，kubeadm部署，kubeasz部署，rancher部署，k3s部署。包括开发测试环境部署k8s，和生产环境部署k8s。

腾讯课堂连接地址https://ke.qq.com/course/478827?taid=4373109931462251&tuin=ba64518

第二个视频发布 https://edu.csdn.net/course/detail/27109

腾讯课堂连接地址https://ke.qq.com/course/484107?tuin=ba64518

介绍主要的k8s资源的使用配置和命令。包括configmap，pod，service，replicaset，namespace，deployment，daemonset，ingress，pv，pvc，sc，role，rolebinding，clusterrole，clusterrolebinding，secret，serviceaccount，statefulset，job，cronjob，podDisruptionbudget，podSecurityPolicy,networkPolicy,resourceQuota，limitrange，endpoint，event，conponentstatus，node，apiservice，controllerRevision等。

第三个视频发布：https://edu.csdn.net/course/detail/27574

详细介绍helm命令，学习helm chart语法，编写helm chart。深入分析各项目源码，学习编写helm插件
————————————————------------------------------------------------------------------------------------------------------------------

1wordcount

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

//Administrator
public class WordCountExample {
	private static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			String str=value.toString();
			String []strArray=str.split(" ");
			for(String s:strArray){
				context.write(new Text(s), new IntWritable(1));
			}
		}
		
	}
	
	private static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>{

		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Context context)
				throws IOException, InterruptedException {
			int sum=0;
			for(IntWritable count:values){
				sum+=count.get();
			}
			context.write(key, new IntWritable(sum));
		}
		
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
		Configuration conf=new Configuration();
		String []argArray=new GenericOptionsParser(conf,args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("需要两个参数");
			System.exit(1);
		}
		Job job=new Job(conf,"wordcount");
		job.setJarByClass(WordCountExample.class);
		job.setMapperClass(WordCountMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		job.setReducerClass(WordCountReducer.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);
	}

}

2去重

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

//Administrator
public class DeleteRepeatExample {
	private static class DeleteRepeatMapper extends Mapper<Object, Text, Text, IntWritable>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			context.write(value, new IntWritable(0));
		}
		
	}
	
	private static class DeleteRepeatReducer extends Reducer<Text, IntWritable, Text, Object>{

		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Context context)
				throws IOException, InterruptedException {
			context.write(key, null);
		}
		
	}
	
	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
		Configuration conf=new Configuration();
		String[]argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("请提供两个参数");
			System.exit(1);
		}
		Job job=new Job(conf,"delete repeat");
		job.setJarByClass(DeleteRepeatExample.class);
		job.setMapperClass(DeleteRepeatMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setReducerClass(DeleteRepeatReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Object.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job,new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);

	}

}

3排序

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

//Administrator
public class SortExample {
	private static class SortMapper extends Mapper<Object, Text, IntWritable, IntWritable>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			context.write(new IntWritable(Integer.parseInt(value.toString())), new IntWritable(0));
		}
		
	}
	
	private static class SortReducer extends Reducer<IntWritable, IntWritable, Text,Text>{
		private int index=0;
		@Override
		protected void reduce(IntWritable key, Iterable<IntWritable> values,
				Context context)
				throws IOException, InterruptedException {
			for(IntWritable i:values){
				index++;
				context.write(new Text(index+""),new Text(key.get()+""));
			}
		}
		
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
		Configuration conf=new Configuration();
		String[]argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("请输入两个参数");
			System.exit(1);
		}
		Job job=new Job(conf,"sort");
		job.setJarByClass(SortExample.class);
		job.setMapperClass(SortMapper.class);
		job.setMapOutputKeyClass(IntWritable.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setReducerClass(SortReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);

	}

}

4表自连接

package demo;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.vod.Ejob;

public class SelfJoin {
	private static class SelfJoinMapper extends Mapper<Object, Text, Text, Text>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			String str=value.toString();
			String[] nameArray=str.split(" ");
			context.write(new Text(nameArray[1]), new Text("1-"+nameArray[0]+"-"+nameArray[1]));
			context.write(new Text(nameArray[0]), new Text("2-"+nameArray[0]+"-"+nameArray[1]));
			
		}
		
	}
	private static class SelfJoinReducer extends Reducer<Text, Text, Text, Text>{

		@Override
		protected void reduce(Text key, Iterable<Text> values,
				Context context)
				throws IOException, InterruptedException {
			List<String> outKey=new ArrayList<String>();
			List<String> outValue=new ArrayList<String>();
			/*for(Text value:values){
			context.write(NullWritable.get(), value);
			}
			context.write(NullWritable.get(), new Text("---------"));*/
			for(Text value:values){
				String[] relationArray=value.toString().split("-");
				if(relationArray[0].equals("1")){
					outKey.add(relationArray[1]);
				}else if(relationArray[0].equals("2")){
					outValue.add(relationArray[2]);
				}
			}
			for(String k:outKey){
				for(int i=0;i<outValue.size();i++){
					context.write(new Text(k), new Text(outValue.get(i)));
				}
			}
		}
		
	}
	public static void main(String[] args) throws Exception{
		File jarFile = Ejob.createTempJar("bin");
		  //Ejob.addClasspath("/opt/hadoop/conf");
	      ClassLoader classLoader = Ejob.getClassLoader();
		  Thread.currentThread().setContextClassLoader(classLoader);
		  
		Configuration conf=new Configuration();
		String [] argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("参数错误");
			System.exit(1);
		}
		JobConf jobConf=new JobConf(conf);
		jobConf.setJar(jarFile.toString());
		Job job=new Job(jobConf,"self join");
		job.setJarByClass(SelfJoin.class);
		job.setMapperClass(SelfJoinMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setReducerClass(SelfJoinReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);

	}

}

数据：

Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Mary
Lucy Ben
Jack Alice
Jack Jesse
Terry Alice
Terry Jesse
Philip Terry
Philip Alma
Mark Terry
Mark Alma

结果：

Tom	Alice
Tom	Jesse
Jone	Alice
Jone	Jesse
Tom	Mary
Tom	Ben
Jone	Mary
Jone	Ben
Philip	Alice
Philip	Jesse
Mark	Alice
Mark	Jesse

5多表连接

package demo;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.vod.Ejob;

public class MultiTableJoin {
	private static class MultiTableMapper extends Mapper<Object, Text, Text, Text>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			String str=value.toString();
			if(str.charAt(0)>'0'&&str.charAt(0)<'9'){
				context.write(new Text(str.charAt(0)+""), new Text("2-"+str.substring(1).trim()));
			}else{
				context.write(new Text(str.substring(str.length()-1)), new Text("1-"+str.substring(0, str.length()-1).trim()));
			}
		}
		
	}
	
	private static class MultiTableReducer extends Reducer<Text, Text, Text, Text>{

		@Override
		protected void reduce(Text key, Iterable<Text> values,
				Context context)
				throws IOException, InterruptedException {
			List<String>keyList=new ArrayList<String>();
			List<String>valueList=new ArrayList<String>();
			for(Text value:values){
				String str=value.toString();
				String []strArray=str.split("-");
				if(strArray[0].equals("1")){
					keyList.add(strArray[1]);
				}else if(strArray[0].equals("2")){
					valueList.add(strArray[1]);
				}
			}
			for(String skey:keyList){
				for(String svalue:valueList){
					context.write(new Text(skey), new Text(svalue));
				}
			}
		}
		
	}
	
	
	public static void main(String[] args) throws Exception{
		File jarFile=Ejob.createTempJar("bin");
		ClassLoader classLoader=Ejob.getClassLoader();
		Thread.currentThread().setContextClassLoader(classLoader);
		
		Configuration conf=new Configuration();
		String [] argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("参数错误");
			System.exit(1);
		}
		JobConf jobConf=new JobConf(conf);
		jobConf.setJar(jarFile.toString());
		Job job=new Job(jobConf,"multiTalbe join");
		job.setMapperClass(MultiTableMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setReducerClass(MultiTableReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);
		
	}
}

数据：table1.txt

Beijing Red Star 1
Shenzhen Thunder 3
Guangzhou Honda 2
Beijing Rising 1
Guangzhou Development Bank 2
Tencent 3
Bank of Beijing 1

table2.txt

1 Beijing
2 Guangzhou
3 Shenzhen
4 Xian

运行结果：

Beijing Red Star	Beijing
Beijing Rising	Beijing
Bank of Beijing	Beijing
Guangzhou Honda	Guangzhou
Guangzhou Development Bank	Guangzhou
Shenzhen Thunder	Shenzhen
Tencent	Shenzhen