hadoop job 数量_hadoop map(分片)数量确定

该博客介绍了一个使用Hadoop MapReduce实现的矩阵乘法应用。代码展示了如何设置Job、Mapper和Reducer,以及如何处理SequenceFile输入和输出。Mapper阶段计算矩阵的每个元素,Reducer则整合结果。博客还涉及了DistributedCache的使用。
摘要由CSDN通过智能技术生成

1 /**

2 * Created with IntelliJ IDEA.3 * User: hadoop4 * Date: 16-3-145 * Time: 下午3:136 * To change this template use File | Settings | File Templates.7 */

8 importorg.apache.hadoop.conf.Configuration;9 importorg.apache.hadoop.fs.FileSystem;10 importjava.io.IOException;11 importjava.net.URI;12 importorg.apache.hadoop.fs.Path;13 import org.apache.hadoop.io.*;14 importorg.apache.hadoop.io.DoubleWritable;15 importorg.apache.hadoop.io.Writable;16 importorg.apache.hadoop.mapreduce.InputSplit;17 importorg.apache.hadoop.mapreduce.Job;18 importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;19 importorg.apache.hadoop.mapreduce.lib.input.FileSplit;20 importorg.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;21 importorg.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;22 importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;23 importorg.apache.hadoop.mapreduce.Reducer;24 importorg.apache.hadoop.mapreduce.Mapper;25 importorg.apache.hadoop.filecache.DistributedCache;26 importorg.apache.hadoop.util.ReflectionUtils;27

28 public classMutiDoubleInputMatrixProduct {29

30 public static void initDoubleArrayWritable(intlength,DoubleWritable[] doubleArrayWritable){31 for (int i=0;i

36 public static class MyMapper extends Mapper{37 public DoubleArrayWritable map_value=newDoubleArrayWritable();38 public double[][] leftMatrix=null;/******************************************/

39 //public Object obValue=null;

40 public DoubleWritable[] arraySum=null;41 public DoubleWritable[] tempColumnArrayDoubleWritable=null;42 public DoubleWritable[] tempRowArrayDoubleWritable=null;43 public double sum=0;44 public doubleuValue;45 public intleftMatrixRowNum;46 public intleftMatrixColumnNum;47 public void setup(Context context) throwsIOException {48 Configuration conf=context.getConfiguration();49 leftMatrixRowNum=conf.getInt("leftMatrixRowNum",10);50 leftMatrixColumnNum=conf.getInt("leftMatrixColumnNum",10);51 leftMatrix=new double[leftMatrixRowNum][leftMatrixColumnNum];52 uValue=(double)(context.getConfiguration().getFloat("u",1.0f));53 tempRowArrayDoubleWritable=newDoubleWritable[leftMatrixColumnNum];54 initDoubleArrayWritable(leftMatrixColumnNum,tempRowArrayDoubleWritable);55 tempColumnArrayDoubleWritable=newDoubleWritable[leftMatrixRowNum];56 initDoubleArrayWritable(leftMatrixRowNum,tempColumnArrayDoubleWritable);57 System.out.println("map setup() start!");58 //URI[] cacheFiles=DistributedCache.getCacheFiles(context.getConfiguration());

59 Path[] cacheFiles=DistributedCache.getLocalCacheFiles(conf);60 String localCacheFile="file://"+cacheFiles[0].toString();61 //URI[] cacheFiles=DistributedCache.getCacheFiles(conf);62 //DistributedCache.

63 System.out.println("local path is:"+cacheFiles[0].toString());64 //URI[] cacheFiles=DistributedCache.getCacheFiles(context.getConfiguration());

65 FileSystem fs =FileSystem.get(URI.create(localCacheFile), conf);66 SequenceFile.Reader reader=null;67 reader=new SequenceFile.Reader(fs,newPath(localCacheFile),conf);68 IntWritable key=(IntWritable)ReflectionUtils.newInstance(reader.getKeyClass(),conf);69 DoubleArrayWritable value=(DoubleArrayWritable)ReflectionUtils.newInstance(reader.getValueClass(),conf);70 //int valueLength=0;

71 int rowIndex=0;72 intindex;73 while(reader.next(key,value)){74 index=-1;75 for (Writable val:value.get()){ //ArrayWritable类的get方法返回Writable[]数组

76 tempRowArrayDoubleWritable[++index].set(((DoubleWritable)val).get());77 }78 //obValue=value.toArray();

79 rowIndex=key.get();80 leftMatrix[rowIndex]=new double[leftMatrixColumnNum];81 //this.leftMatrix=new double[valueLength][Integer.parseInt(context.getConfiguration().get("leftMatrixColumnNum"))];

82 for (int i=0;i

85 leftMatrix[rowIndex][i]=tempRowArrayDoubleWritable[i].get();86 }87

88 }89 arraySum=newDoubleWritable[leftMatrix.length];90 initDoubleArrayWritable(leftMatrix.length,arraySum);91 }92 public void map(IntWritable key,DoubleArrayWritable value,Context context) throwsIOException, InterruptedException {93 //obValue=value.toArray();

94 InputSplit inputSplit=context.getInputSplit();95 String fileName=((FileSplit)inputSplit).getPath().getName();96 if (fileName.startsWith("FB")) {97 context.write(key,value);98 }99 else{100 int ii=-1;101 for(Writable val:value.get()){102 tempColumnArrayDoubleWritable[++ii].set(((DoubleWritable)val).get());103 }104 //arraySum=new DoubleWritable[this.leftMatrix.length];

105 for (int i=0;i

110 sum+= this.leftMatrix[i][j]*tempColumnArrayDoubleWritable[j].get()*uValue;111 }112 arraySum[i].set(sum);113 //arraySum[i].set(sum);

114 }115 map_value.set(arraySum);116 context.write(key,map_value);117 }118 }119 }120 public static class MyReducer extends Reducer{121 public DoubleWritable[] sum=null;122 //public Object obValue=null;

123 public DoubleArrayWritable valueArrayWritable=newDoubleArrayWritable();124 public DoubleWritable[] tempColumnArrayDoubleWritable=null;125 private intleftMatrixRowNum;126

127 public voidsetup(Context context){128 //leftMatrixColumnNum=context.getConfiguration().getInt("leftMatrixColumnNum",100);

129 leftMatrixRowNum=context.getConfiguration().getInt("leftMatrixRowNum",100);130 sum=newDoubleWritable[leftMatrixRowNum];131 initDoubleArrayWritable(leftMatrixRowNum,sum);132 //tempRowArrayDoubleWritable=new DoubleWritable[leftMatrixColumnNum];

133 tempColumnArrayDoubleWritable=newDoubleWritable[leftMatrixRowNum];134 initDoubleArrayWritable(leftMatrixRowNum,tempColumnArrayDoubleWritable);135 }136 //如果矩阵的计算已经在map中完成了,貌似可以不使用reduce,如果不创建reduce类,MR框架仍然会调用一个默认的reduce,只是这个reduce什么也不做137 //但是,不使用reduce的话,map直接写文件,有多少个map就会产生多少个结果文件。这里使用reduce是为了将结果矩阵存储在一个文件中。

138 public void reduce(IntWritable key,Iterablevalue,Context context) throwsIOException, InterruptedException {139 //int valueLength=0;

140 for(DoubleArrayWritable doubleValue:value){141 int index=-1;142 for(Writable val:doubleValue.get()){143 tempColumnArrayDoubleWritable[++index].set(((DoubleWritable)val).get());144 }145 //valueLength=Array.getLength(obValue);

146 /*

147 for (int i=0;i

153 }154 //valueArrayWritable.set(sum);

155 valueArrayWritable.set(tempColumnArrayDoubleWritable);156 context.write(key,valueArrayWritable);157 /*

158 for (int i=0;i

162

163 }164 }165

166 public static void main(String[] args) throwsIOException, ClassNotFoundException, InterruptedException {167 String uri=args[3];168 String outUri=args[4];169 String cachePath=args[2];170 HDFSOperator.deleteDir(outUri);171 Configuration conf=newConfiguration();172 DistributedCache.addCacheFile(URI.create(cachePath),conf);//添加分布式缓存

173 /**************************************************/

174 //FileSystem fs=FileSystem.get(URI.create(uri),conf);175 //fs.delete(new Path(outUri),true);

176 /*********************************************************/

177 conf.setInt("leftMatrixColumnNum",Integer.parseInt(args[0]));178 conf.setInt("leftMatrixRowNum",Integer.parseInt(args[1]));179 conf.setFloat("u",1.0f);180 //conf.set("mapred.map.tasks",args[5]);181 //int mxSplitSize=Integer.valueOf(args[5])

182 conf.set("mapred.max.split.size",args[5]);//hadoop1.2.1中并没有setNumMapTasks方法,只能通过这种方式控制计算分片的大小来控制map数量

183 conf.set("mapred.jar","MutiDoubleInputMatrixProduct.jar");184 Job job=new Job(conf,"MatrixProdcut");185 job.setJarByClass(MutiDoubleInputMatrixProduct.class);186 job.setInputFormatClass(SequenceFileInputFormat.class);187 job.setOutputFormatClass(SequenceFileOutputFormat.class);188 job.setMapperClass(MyMapper.class);189 job.setReducerClass(MyReducer.class);190 job.setMapOutputKeyClass(IntWritable.class);191 job.setMapOutputValueClass(DoubleArrayWritable.class);192 job.setOutputKeyClass(IntWritable.class);193 job.setOutputValueClass(DoubleArrayWritable.class);194 FileInputFormat.setInputPaths(job, newPath(uri));195 FileOutputFormat.setOutputPath(job,newPath(outUri));196 System.exit(job.waitForCompletion(true)?0:1);197 }198

199

200 }201 class DoubleArrayWritable extendsArrayWritable {202 publicDoubleArrayWritable(){203 super(DoubleWritable.class);204 }205 /*

206 public String toString(){207 StringBuilder sb=new StringBuilder();208 for (Writable val:get()){209 DoubleWritable doubleWritable=(DoubleWritable)val;210 sb.append(doubleWritable.get());211 sb.append(",");212 }213 sb.deleteCharAt(sb.length()-1);214 return sb.toString();215 }216 */

217 }218

219 classHDFSOperator{220 public static boolean deleteDir(String dir)throwsIOException{221 Configuration conf=newConfiguration();222 FileSystem fs =FileSystem.get(conf);223 boolean result=fs.delete(new Path(dir),true);224 System.out.println("sOutput delete");225 fs.close();226 returnresult;227 }228 }

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值