MapReduce
在MapReduce中,Join是一种常用的数据合并操作,用于将具有相同键的不同数据集合并到一起。它允许你在处理大规模数据时进行连接操作,类似于关系型数据库中的JOIN操作。
数据类型
MapReduce | java |
---|---|
Text | String |
IntWritable | int |
LongWritable | long |
BooleanWritable | boolean |
ByteWritable | byte |
BytesWritable | byte[] |
DoubleWritable | Double |
FlautWritable | flaut |
NullWritable | null |
Map
//初始化方法,它在Mapper任务执行前被调用一次。setup()方法通常用于执行一些初始化工作,例如读取配置参数、建立数据库连接、加载外部资源等。 protected void setup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException { } //它负责对输入数据进行处理,并将处理结果输出为键值对形式。Map方法通常被实现为一个纯函数,即输入相同的数据,输出结果也相同,不会对系统状态产生任何影响。 protected void map(KEYIN key, VALUEIN value, Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException { //读取一行数据 String line = value.toString(); context.write(key, value); } //销毁函数 protected void cleanup(Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context) throws IOException, InterruptedException { }
Reduce
@Override protected void reduce(IntWritable key, Iterable<NullWritable> values, Reducer<IntWritable, NullWritable, IntWritable, NullWritable>.Context context) throws IOException, InterruptedException { //不去重输出key /*for (NullWritable value : values) { context.write(key,NullWritable.get()); }*/ //去重输出key context.write(key,NullWritable.get()); }
Launch配置提交
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { //打印日志 BasicConfigurator.configure(); //获取job对象 Job job = Job.getInstance(); job.setJobName("compare"); //设置行类 job.setJarByClass(CompareNumberLaunch.class); //设置Mapper job.setMapperClass(CompareNumberMapper.class); //设置Reduce job.setReducerClass(CompareNumberReducer.class); //设置输入输出key,value类型 job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(NullWritable.class); //设置排序比较规则 job.setSortComparatorClass(KeyComparator.class); //获取文件管理目录 FileSystem fs = FileSystem.get(job.getConfiguration()); //输出地址判断是否存在目录存在目录删除 Path out = new Path("D:\\biancheng\\sofeware\\idea\\hadoop_myaredus\\sort2\\output"); if (fs.exists(out)){ fs.delete(out); } //设置录取文件位置 FileInputFormat.addInputPath(job,new Path("D:\\biancheng\\sofeware\\idea\\hadoop_myaredus\\sort2\\input\\number")); //设置输出文件位置 FileOutputFormat.setOutputPath(job,out); //设置reduce任务数量 单个reduce任务默认排序 job.setNumReduceTasks(1); //提交启动程序 job.waitForCompletion(true); }
KeyComparator类
// 提供无参构造 public KeyComparator() { super(IntWritable.class,true); } //重新文本比较规则 @Override public int compare(WritableComparable a, WritableComparable b) { IntWritable left = (IntWritable) a; IntWritable right = (IntWritable) b; return right.compareTo(left); }