去重(MapReduce)
创建三个类,分别为Mapper,Reduce,Launch。添加input数据(单一数据只含K)
说明:1.与通常的Mapper,Reduce,Launch一样只是有略微区别
2.因数据只含K,所以Mapper输出值为Null
3.因为数据只含K,所以Reduce输出Value不需要迭代(实现相同K覆盖去重的效果)
代码如下:
Mapper
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @ClassName SortMapper
* @Desescription: TODO
* @Author yangRui
*/
public class SortMapper extends Mapper<LongWritable, Text,Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
context.write(value, NullWritable.get());
}
}
Reduce
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @ClassName SortReduce
* @Desescription: TODO
* @Author yangRui
*/
public class SortReduce extends Reducer<Text, NullWritable,Text, NullWritable> {
@Override
protected void reduce(Text key, Iterable<NullWritable> values,Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
Launch
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @ClassName SortLaunch
* @Desescription: TODO
* @Author yangRui
*/
public class SortLaunch {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
// 创建job对象
Job job = Job.getInstance();
// 获取当前模块名
job.setJobName("sort1");
// 设置map,reduce,Launch
job.setMapperClass(SortMapper.class);
job.setReducerClass(SortReduce.class);
job.setJarByClass(SortLaunch.class);
// 设置map输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
// 设置最终的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
// 获得配置,判断输出文件在指定位置是否存在
FileSystem fs = FileSystem.get(job.getConfiguration());
Path out= new Path("D:\\ideaProject\\hadoop\\hadoop_pro\\sort1\\output");
if(fs.exists(out)){
fs.delete(out, true);
}
// 设置文件读取路径,输出路径
FileInputFormat.addInputPath(job, new Path("D:\\ideaProject\\hadoop\\hadoop_pro\\sort1\\input"));
FileOutputFormat.setOutputPath(job ,out);
// 提交任务
job.waitForCompletion(true);
}
}
实现效果
效果前
效果后
排序(MapReduce)
创建:1.三个类,分别为Mapper,Reduce,Launch。
2.工具类,KeyCompartor继承WritableComparator
3.添加input数据(单一数据只含K)
说明:1.与通常的Mapper,Reduce,Launch一样只是有略微区别
2.因数据只含K,所以Mapper输出值为Null
3.因为数据只含K,所以Reduce输出Value需要迭代(防止相同K覆盖去重)
4.Launch,添加一个set用于调用工具类实现排序,设置Reduce任务数量
4.工具类,实现排序
Mapper
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @ClassName SortMapper
* @Desescription: TODO
* @Author yangRui
*/
public class SortMapper extends Mapper<LongWritable, Text,LongWritable, NullWritable> {
@Override
protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
String line = value.toString(); //获取一行数据
LongWritable outKey = new LongWritable(Long.parseLong(line)); //将String类型转换为Long
context.write(outKey, NullWritable.get());
}
}
Reduce
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @ClassName SortReduce
* @Desescription: TODO
* @Author yangRui
*/
public class SortReduce extends Reducer<LongWritable, NullWritable,LongWritable, NullWritable> {
@Override
protected void reduce(LongWritable key, Iterable<NullWritable> values, Reducer<LongWritable, NullWritable, LongWritable, NullWritable>.Context context) throws IOException, InterruptedException {
// 迭代空value,有相同K不覆盖
for(NullWritable item:values){
context.write(key,NullWritable.get());
}
}
}
Launch
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* @ClassName SortLaunch
* @Desescription: TODO
* @Author yangRui
*/
public class SortLaunch {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
// 创建job对象
Job job = Job.getInstance();
// 获取当前模块取别名
job.setJobName("sort1");
// 设置map,reduce,Launch
job.setMapperClass(SortMapper.class);
job.setReducerClass(SortReduce.class);
job.setJarByClass(SortLaunch.class);
// 设置map输出类型
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(NullWritable.class);
// 设置最终的输出类型
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(NullWritable.class);
// 调用工具类实现排序
job.setSortComparatorClass(KeyCompartor.class);
// 获得配置,判断输出文件在指定位置是否存在
FileSystem fs = FileSystem.get(job.getConfiguration());
Path out= new Path("D:\\ideaProject\\hadoop\\hadoop_pro\\sort1\\output");
if(fs.exists(out)){
fs.delete(out, true);
}
// 设置文件读取路径,输出路径
FileInputFormat.addInputPath(job, new Path("D:\\ideaProject\\hadoop\\hadoop_pro\\sort1\\input"));
FileOutputFormat.setOutputPath(job ,out);
// 设置Reduce任务数量
job.setNumReduceTasks(1);
// 提交任务
job.waitForCompletion(true);
}
}
KeyCompartor
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* @ClassName KeyCompartor
* @Desescription: TODO
* @Author yangRui
*/
public class KeyCompartor extends WritableComparator {
public KeyCompartor() {
super(LongWritable.class , true); //创建比较器对象,默认false不创建对象
}
/*
* 0 =>相等
* -1 =>小于
* 1 =>大于
* */
@Override
public int compare(WritableComparable a, WritableComparable b) {
// 比较K大小进行排序
LongWritable left = (LongWritable) a;
LongWritable right = (LongWritable) b;
return -(left.compareTo(right));
}
}
实现效果
实现前
实现后
报错解决
报错内容
解决方法
添加资源文件:log4j.properties放到resources资源文件下
log4j.rootLogger=INFO, stdout, file
log4j.appender.file=org.apache.log4j.RollingFileAppender
log4j.appender.file.File=/logs/myapp.log
log4j.appender.file.MaxFileSize=10MB
log4j.appender.file.MaxBackupIndex=10
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} [%-5p] [%-30F:%-5L] - %m%n
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=[%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} [%-20t] %c:%L %m%n