Hadoop2.2.0 mapreduce 例子

发布一个k8s部署视频:https://edu.csdn.net/course/detail/26967

课程内容:各种k8s部署方式。包括minikube部署,kubeadm部署,kubeasz部署,rancher部署,k3s部署。包括开发测试环境部署k8s,和生产环境部署k8s。

腾讯课堂连接地址https://ke.qq.com/course/478827?taid=4373109931462251&tuin=ba64518

第二个视频发布  https://edu.csdn.net/course/detail/27109

腾讯课堂连接地址https://ke.qq.com/course/484107?tuin=ba64518

介绍主要的k8s资源的使用配置和命令。包括configmap,pod,service,replicaset,namespace,deployment,daemonset,ingress,pv,pvc,sc,role,rolebinding,clusterrole,clusterrolebinding,secret,serviceaccount,statefulset,job,cronjob,podDisruptionbudget,podSecurityPolicy,networkPolicy,resourceQuota,limitrange,endpoint,event,conponentstatus,node,apiservice,controllerRevision等。

第三个视频发布:https://edu.csdn.net/course/detail/27574

详细介绍helm命令,学习helm chart语法,编写helm chart。深入分析各项目源码,学习编写helm插件
————————————————------------------------------------------------------------------------------------------------------------------

1wordcount

 

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

//Administrator
public class WordCountExample {
	private static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			String str=value.toString();
			String []strArray=str.split(" ");
			for(String s:strArray){
				context.write(new Text(s), new IntWritable(1));
			}
		}
		
	}
	
	private static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>{

		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Context context)
				throws IOException, InterruptedException {
			int sum=0;
			for(IntWritable count:values){
				sum+=count.get();
			}
			context.write(key, new IntWritable(sum));
		}
		
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
		Configuration conf=new Configuration();
		String []argArray=new GenericOptionsParser(conf,args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("需要两个参数");
			System.exit(1);
		}
		Job job=new Job(conf,"wordcount");
		job.setJarByClass(WordCountExample.class);
		job.setMapperClass(WordCountMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		job.setReducerClass(WordCountReducer.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);
	}

}


2去重

 

 

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

//Administrator
public class DeleteRepeatExample {
	private static class DeleteRepeatMapper extends Mapper<Object, Text, Text, IntWritable>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			context.write(value, new IntWritable(0));
		}
		
	}
	
	private static class DeleteRepeatReducer extends Reducer<Text, IntWritable, Text, Object>{

		@Override
		protected void reduce(Text key, Iterable<IntWritable> values,
				Context context)
				throws IOException, InterruptedException {
			context.write(key, null);
		}
		
	}
	
	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
		Configuration conf=new Configuration();
		String[]argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("请提供两个参数");
			System.exit(1);
		}
		Job job=new Job(conf,"delete repeat");
		job.setJarByClass(DeleteRepeatExample.class);
		job.setMapperClass(DeleteRepeatMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setReducerClass(DeleteRepeatReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Object.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job,new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);

	}

}


3排序

 

 

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

//Administrator
public class SortExample {
	private static class SortMapper extends Mapper<Object, Text, IntWritable, IntWritable>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			context.write(new IntWritable(Integer.parseInt(value.toString())), new IntWritable(0));
		}
		
	}
	
	private static class SortReducer extends Reducer<IntWritable, IntWritable, Text,Text>{
		private int index=0;
		@Override
		protected void reduce(IntWritable key, Iterable<IntWritable> values,
				Context context)
				throws IOException, InterruptedException {
			for(IntWritable i:values){
				index++;
				context.write(new Text(index+""),new Text(key.get()+""));
			}
		}
		
	}

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
		Configuration conf=new Configuration();
		String[]argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("请输入两个参数");
			System.exit(1);
		}
		Job job=new Job(conf,"sort");
		job.setJarByClass(SortExample.class);
		job.setMapperClass(SortMapper.class);
		job.setMapOutputKeyClass(IntWritable.class);
		job.setMapOutputValueClass(IntWritable.class);
		job.setReducerClass(SortReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);

	}

}

4表自连接

 

package demo;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.vod.Ejob;

public class SelfJoin {
	private static class SelfJoinMapper extends Mapper<Object, Text, Text, Text>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			String str=value.toString();
			String[] nameArray=str.split(" ");
			context.write(new Text(nameArray[1]), new Text("1-"+nameArray[0]+"-"+nameArray[1]));
			context.write(new Text(nameArray[0]), new Text("2-"+nameArray[0]+"-"+nameArray[1]));
			
		}
		
	}
	private static class SelfJoinReducer extends Reducer<Text, Text, Text, Text>{

		@Override
		protected void reduce(Text key, Iterable<Text> values,
				Context context)
				throws IOException, InterruptedException {
			List<String> outKey=new ArrayList<String>();
			List<String> outValue=new ArrayList<String>();
			/*for(Text value:values){
			context.write(NullWritable.get(), value);
			}
			context.write(NullWritable.get(), new Text("---------"));*/
			for(Text value:values){
				String[] relationArray=value.toString().split("-");
				if(relationArray[0].equals("1")){
					outKey.add(relationArray[1]);
				}else if(relationArray[0].equals("2")){
					outValue.add(relationArray[2]);
				}
			}
			for(String k:outKey){
				for(int i=0;i<outValue.size();i++){
					context.write(new Text(k), new Text(outValue.get(i)));
				}
			}
		}
		
	}
	public static void main(String[] args) throws Exception{
		File jarFile = Ejob.createTempJar("bin");
		  //Ejob.addClasspath("/opt/hadoop/conf");
	      ClassLoader classLoader = Ejob.getClassLoader();
		  Thread.currentThread().setContextClassLoader(classLoader);
		  
		Configuration conf=new Configuration();
		String [] argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("参数错误");
			System.exit(1);
		}
		JobConf jobConf=new JobConf(conf);
		jobConf.setJar(jarFile.toString());
		Job job=new Job(jobConf,"self join");
		job.setJarByClass(SelfJoin.class);
		job.setMapperClass(SelfJoinMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setReducerClass(SelfJoinReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);

	}

}


数据:

Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Mary
Lucy Ben
Jack Alice
Jack Jesse
Terry Alice
Terry Jesse
Philip Terry
Philip Alma
Mark Terry
Mark Alma

结果:

 

 

Tom	Alice
Tom	Jesse
Jone	Alice
Jone	Jesse
Tom	Mary
Tom	Ben
Jone	Mary
Jone	Ben
Philip	Alice
Philip	Jesse
Mark	Alice
Mark	Jesse

 

 

 

 

 

5多表连接

 

package demo;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.vod.Ejob;

public class MultiTableJoin {
	private static class MultiTableMapper extends Mapper<Object, Text, Text, Text>{

		@Override
		protected void map(Object key, Text value, Context context)
				throws IOException, InterruptedException {
			String str=value.toString();
			if(str.charAt(0)>'0'&&str.charAt(0)<'9'){
				context.write(new Text(str.charAt(0)+""), new Text("2-"+str.substring(1).trim()));
			}else{
				context.write(new Text(str.substring(str.length()-1)), new Text("1-"+str.substring(0, str.length()-1).trim()));
			}
		}
		
	}
	
	private static class MultiTableReducer extends Reducer<Text, Text, Text, Text>{

		@Override
		protected void reduce(Text key, Iterable<Text> values,
				Context context)
				throws IOException, InterruptedException {
			List<String>keyList=new ArrayList<String>();
			List<String>valueList=new ArrayList<String>();
			for(Text value:values){
				String str=value.toString();
				String []strArray=str.split("-");
				if(strArray[0].equals("1")){
					keyList.add(strArray[1]);
				}else if(strArray[0].equals("2")){
					valueList.add(strArray[1]);
				}
			}
			for(String skey:keyList){
				for(String svalue:valueList){
					context.write(new Text(skey), new Text(svalue));
				}
			}
		}
		
	}
	
	
	public static void main(String[] args) throws Exception{
		File jarFile=Ejob.createTempJar("bin");
		ClassLoader classLoader=Ejob.getClassLoader();
		Thread.currentThread().setContextClassLoader(classLoader);
		
		Configuration conf=new Configuration();
		String [] argArray=new GenericOptionsParser(conf, args).getRemainingArgs();
		if(argArray.length!=2){
			System.out.println("参数错误");
			System.exit(1);
		}
		JobConf jobConf=new JobConf(conf);
		jobConf.setJar(jarFile.toString());
		Job job=new Job(jobConf,"multiTalbe join");
		job.setMapperClass(MultiTableMapper.class);
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		job.setReducerClass(MultiTableReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		FileInputFormat.addInputPath(job, new Path(argArray[0]));
		FileOutputFormat.setOutputPath(job, new Path(argArray[1]));
		System.exit(job.waitForCompletion(true)?0:1);
		
	}
}


数据:table1.txt

 

 

Beijing Red Star 1
Shenzhen Thunder 3
Guangzhou Honda 2
Beijing Rising 1
Guangzhou Development Bank 2
Tencent 3
Bank of Beijing 1


table2.txt

 

 

1 Beijing
2 Guangzhou
3 Shenzhen
4 Xian


运行结果:

 

 

Beijing Red Star	Beijing
Beijing Rising	Beijing
Bank of Beijing	Beijing
Guangzhou Honda	Guangzhou
Guangzhou Development Bank	Guangzhou
Shenzhen Thunder	Shenzhen
Tencent	Shenzhen

 

 

 

 

 

 

评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

hxpjava1

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值