以Sequence方式处理上传的文件(此处文件以图片为例)

package com.hisome.jsdk.mapreduce;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat;
import org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat;
import org.apache.hadoop.util.Tool;



public class ImagesSplitMR extends Configured implements Tool {
	
	public static class MapClass extends MapReduceBase implements Mapper<BytesWritable,BytesWritable,BytesWritable,BytesWritable>{
		
		public void map(BytesWritable key , BytesWritable value ,OutputCollector< BytesWritable, BytesWritable> output,Reporter reporter) throws IOException{
			byte[] keys=key.getBytes();
			File file=new File(key+".jpg");
			OutputStream out=new FileOutputStream(file);
			byte[] b=value.getBytes();
			System.out.println("keys.length:"+keys.length);
			System.out.println("b.length:"+b.length);
			out.write(b,0,b.length);

			try {
//				output.collect(new Text("1") ,new IntWritable(1));
				output.collect(key,value);
			} catch (Exception e) {
				e.printStackTrace();
			}			
		}
	}	
		
	public int run(String[] args) throws Exception{
		
		JobConf conf = new JobConf(new Configuration(),ImagesSplitMR.class);
		
		Configuration conf1=new Configuration();
	    FileSystem fs=FileSystem.get(URI.create("jyl/testByteImageSequenceFileMR"), conf1);
	    fs.delete(new Path("jyl/testByteImageSequenceFileMR"),true);
		
		conf.setJobName("ImagesSplitMR");
//		conf.setOutputKeyClass(NullWritable.class);
//		conf.setOutputValueClass(BytesWritable.class);
		conf.setOutputKeyClass(BytesWritable.class);
		conf.setOutputValueClass(BytesWritable.class);
		
		conf.setInputFormat(SequenceFileAsBinaryInputFormat.class);
		conf.setOutputFormat(SequenceFileAsBinaryOutputFormat.class);
		
		conf.setNumMapTasks(1);
		conf.setMapperClass(MapClass.class);
//		conf.setCombinerClass(ReduceClass.class);
//		conf.setReducerClass(ReduceClass.class);
		conf.setNumReduceTasks(0);

		FileInputFormat.addInputPath(conf,new Path("jyl/testByteImageSequenceFile"));
		FileOutputFormat.setOutputPath(conf, new Path("jyl/testByteImageSequenceFileMR"));
		
		RunningJob rj=JobClient.runJob(conf);
		return 0;
	}
	
	public static void main(String[] args) throws Exception {  
		ImagesSplitMR decoder=new ImagesSplitMR();
		decoder.run(args);
	}
}




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值