package com.hisome.jsdk.mapreduce;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat;
import org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat;
import org.apache.hadoop.util.Tool;
public class ImagesSplitMR extends Configured implements Tool {
public static class MapClass extends MapReduceBase implements Mapper<BytesWritable,BytesWritable,BytesWritable,BytesWritable>{
public void map(BytesWritable key , BytesWritable value ,OutputCollector< BytesWritable, BytesWritable> output,Reporter reporter) throws IOException{
byte[] keys=key.getBytes();
File file=new File(key+".jpg");
OutputStream out=new FileOutputStream(file);
byte[] b=value.getBytes();
System.out.println("keys.length:"+keys.length);
System.out.println("b.length:"+b.length);
out.write(b,0,b.length);
try {
// output.collect(new Text("1") ,new IntWritable(1));
output.collect(key,value);
} catch (Exception e) {
e.printStackTrace();
}
}
}
public int run(String[] args) throws Exception{
JobConf conf = new JobConf(new Configuration(),ImagesSplitMR.class);
Configuration conf1=new Configuration();
FileSystem fs=FileSystem.get(URI.create("jyl/testByteImageSequenceFileMR"), conf1);
fs.delete(new Path("jyl/testByteImageSequenceFileMR"),true);
conf.setJobName("ImagesSplitMR");
// conf.setOutputKeyClass(NullWritable.class);
// conf.setOutputValueClass(BytesWritable.class);
conf.setOutputKeyClass(BytesWritable.class);
conf.setOutputValueClass(BytesWritable.class);
conf.setInputFormat(SequenceFileAsBinaryInputFormat.class);
conf.setOutputFormat(SequenceFileAsBinaryOutputFormat.class);
conf.setNumMapTasks(1);
conf.setMapperClass(MapClass.class);
// conf.setCombinerClass(ReduceClass.class);
// conf.setReducerClass(ReduceClass.class);
conf.setNumReduceTasks(0);
FileInputFormat.addInputPath(conf,new Path("jyl/testByteImageSequenceFile"));
FileOutputFormat.setOutputPath(conf, new Path("jyl/testByteImageSequenceFileMR"));
RunningJob rj=JobClient.runJob(conf);
return 0;
}
public static void main(String[] args) throws Exception {
ImagesSplitMR decoder=new ImagesSplitMR();
decoder.run(args);
}
}
以Sequence方式处理上传的文件(此处文件以图片为例)
最新推荐文章于 2021-04-07 07:36:03 发布