前言
MapReduce默认情况下,一个reducer产生一个文件,以name-r-nnnnn来命名,其中默认的name为part,nnnnn从(00000开始递增),保证了每个reducer不会产生重复的文件。
一、仅替代文件名part,输出结果为score-r-00000
1.使用org.apache.hadoop.mapreduce.lib.output.MultipleOutputs类
2.MultipleOutputs类需要在Reduce的setup()方法初始化,最好在cleanup()中关闭
3.这个时候还会生产成part-r-000000这种文件,发现是里面是空的,需要LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
代码样例:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
/**
* Created by HuiQ on 2019-10-16.
*/
public class WordCount {
public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
@Override
public void map(Object key,Text value,Context context) throws IOException, InterruptedException {
String[] words = value.toString().split(" ");
for (String str: words){
word.set(str);
context.write(word,one);
}
}
}
public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
private MultipleOutputs<Text, IntWritable> multipleOutputs;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
multipleOutputs = new MultipleOutputs<Text, IntWritable>(context);
}
@Override
public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
int total=0;
for (IntWritable val : values){
total++;
}
// 自定义输出文件名
multipleOutputs.write(key, new IntWritable(total), "score");
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
multipleOutputs.close();
}
}
public static void main (String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 去掉临时输出目录会生成part-r-00000或者part-m-00000的空文件
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); // 注意:想全部自定义文件名这行一定不能有,否则最终生成的还是part-r-00000
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path("/huiqiang/output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
二、要想全部自定义文件名,需要重写RecordWriter
自定义reducer类输出是通过重写FileOutputFormat类和RecordWriter类实现的。具体操作是通过重写RecordWriter类中的write方法,然后通过FileOutFormat类返回一个RecordWriter对象。
代码样例:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* Created by HuiQ on 2019-10-16.
*/
public class WordCount {
public static class WordCountMapper extends Mapper<Object,Text,Text,IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
@Override
public void map(Object key,Text value,Context context) throws IOException, InterruptedException {
String[] words = value.toString().split(" ");
for (String str: words){
word.set(str);
context.write(word,one);
}
}
}
public static class WordCountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
@Override
public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
int total=0;
for (IntWritable val : values){
total++;
}
context.write(key, new IntWritable(total));
}
}
// 注意:1.必须要把static关键字加上 2.FileOutputFormat<Text,IntWritable>中的数据类型一定要和reduce端<Text,IntWritable>输出对应上
public static class MyFileOutputFormat extends FileOutputFormat<Text,IntWritable>{
@Override
public RecordWriter<Text, IntWritable> getRecordWriter(TaskAttemptContext job)throws IOException, InterruptedException {
FileSystem fileSystem=FileSystem.newInstance(job.getConfiguration());
//自定义的输出路径
final FSDataOutputStream title=fileSystem.create(new Path("/huiqiang/output/test.txt"));
RecordWriter<Text,IntWritable> recordWriter=new RecordWriter<Text, IntWritable>() {
@Override
public void close(TaskAttemptContext arg0) throws IOException,
InterruptedException {
if(title!=null){
title.close();
}
}
@Override
public void write(Text key, IntWritable value) throws IOException,
InterruptedException {
String fenGe=" ";
String charSet="UTF-8";
System.out.println("key="+key.toString());
//输出key
title.write(key.toString().getBytes(charSet),0,key.toString().getBytes(charSet).length);
//输出key和value的分隔符
title.write(fenGe.getBytes(charSet),0,fenGe.getBytes(charSet).length);
//输出value
title.write(value.toString().getBytes(charSet),0,value.toString().getBytes(charSet).length);
title.write("\n".getBytes(charSet),0,"\n".getBytes(charSet).length);
title.flush();
}
};
return recordWriter;
}
}
public static void main (String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputFormatClass(MyFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
MyFileOutputFormat.setOutputPath(job, new Path("/huiqiang/output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
参考:MapReduce重写FileInputFormat和FileOutputFormat
三、补充:Hadoop之HDFS的FileSystem接口
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* @author: huiq
* @createTime: 2021/9/15 18:06
* @description:
*/
public class classOperatingFiles {
static Configuration conf = new Configuration();
static FileSystem hdfs;
static {
// String path ="C:\\Users\\9\\Desktop\\hadoop-2.6.0\\etc\\hadoop";
// conf.addResource(new Path(path + "core-site.xml"));
// conf.addResource(new Path(path + "hdfs-site.xml"));
// conf.addResource(new Path(path + "mapred-site.xml"));
conf.set("fs.defaultFS", "hdfs://bigdatanode01:8020/");
try {
hdfs =FileSystem.get(conf);
} catch (IOException e) {
e.printStackTrace();
}
}
public void createDir(String dir)throws IOException {
Path path = new Path(dir);
hdfs.mkdirs(path);
System.out.println("newdir \t" + conf.get("fs.default.name") + dir);
}
public void copyFile(String localSrc,String hdfsDst) throws IOException{
Path src = new Path(localSrc);
Path dst = new Path(hdfsDst);
hdfs.copyFromLocalFile(src,dst);
//list all the files in thecurrent direction
FileStatus files[] =hdfs.listStatus(dst);
System.out.println("Uploadto \t" + conf.get("fs.default.name") + hdfsDst);
for (FileStatus file : files){
System.out.println(file.getPath());
}
}
public void createFile(String fileName,String fileContent) throws IOException {
Path dst = new Path(fileName);
byte[] bytes =fileContent.getBytes();
FSDataOutputStream output =hdfs.create(dst);
output.write(bytes);
System.out.println("newfile \t" + conf.get("fs.default.name") + fileName);
}
public void listFiles(String dirName)throws IOException {
Path f = new Path(dirName);
FileStatus[] status =hdfs.listStatus(f);
System.out.println(dirName +" has all files:");
for (int i = 0; i<status.length; i++) {
System.out.println(status[i].getPath().toString());
}
}
public void deleteFile(String fileName)throws IOException {
Path f = new Path(fileName);
boolean isExists =hdfs.exists(f);
if (isExists) { //if exists, delete
boolean isDel =hdfs.delete(f,true);
System.out.println(fileName+ " delete? \t" + isDel);
} else {
System.out.println(fileName+ " exist? \t" + isExists);
}
}
public static void main(String[] args)throws IOException {
classOperatingFiles ofs = new classOperatingFiles();
System.out.println("\n=======createdir=======");
String dir ="/huiq";
ofs.createDir(dir);
// System.out.println("\n=======copyfile=======");
// String src ="/home/ictclas/Configure.xml";
// ofs.copyFile(src, dir);
System.out.println("\n=======createa file=======");
String fileContent ="Hello, world! Just a test.";
ofs.createFile(dir+"/word.txt",fileContent);
}
}