hadoop示例程序

最新推荐文章于 2024-02-03 19:33:36 发布
gdin_ouwei
最新推荐文章于 2024-02-03 19:33:36 发布
阅读量525
点赞数
从零开始学习Hadoop--补一一个实际的例子

2013-11-04 15:23:46
   1. Pi值估算原理 
  
     Hadoop自带的例子中，有一个计算Pi值的例子。这例子比较全面，它用的API是旧版的。本章先分析一下这个例子，然后再用新版的API重新实现一下。 
  
     这个程序的原理是这样的。假如有一个边长为1的正方形。以正方形的一个端点为圆心，以1为半径，画一个圆弧，于是在正方形内就有了一个直角扇形。在正方形里随机生成若干的点，则有些点是在扇形内，有些点是在扇形外。正方形的面积是1，扇形的面积是0.25*Pi。设点的数量一共是n，扇形内的点数量是nc，在点足够多足够密集的情况下，会近似有nc/n的比值约等于扇形面积与正方形面积的比值，也就是nc/n = 0.25*Pi/1，即Pi = 4*nc/n。 
  
     如何生成随机点？最简单的方式是在[0, 1]的区间内每次生成两个随机小数作为随机点的x和y坐标。可惜这种生成方式效果不够好，随机点之间有间隙过大和重叠的可能，会让计算精度不够高。Halton序列算法生成样本点的效果要好得多，更均匀，计算精度比随机生成的点更高，因此这个例子用Halton序列算法生成点集。关于Halton序列可以参考这里 
  http://orion.math.iastate.edu/reu/2001/voronoi/halton_sequence.html和这里 
  http://www.aae.wisc.edu/dphaneuf/AAE%20875/Halton%20sequences.pdf，在这里就不详细说了。 
  
     在正方形内生成的样本点越多，计算Pi值越精确，这样，这个问题就很适合用Hadoop来处理啦。假设要在正方形内生成1000万个点，可以设置10个Map任务，每个Map任务处理100万个点，也可以设置100个Map任务，每个Map任务处理10万个点。 
  
 2. 旧版API的Pi值估算MapReduce程序 
  
     此处带来来自Hadoop的示例程序。 
  
     为了计算，设置10个Map任务，每个任务处理1000个点，具体流程是这样的： 
  
     1) 运行PiEstimator的MapReduce程序，输入参数是10，1000，意思是设置10个Map任务，每个Map任务处理1000个点。 
  
     2) PiEstimator进行初始化。初始化时，有一个步骤是在HDFS上生成一个目录，也就是输入目录。这个目录下有10个序列文件。Map任务的数量的数量决定序列文件的数量，PiEstimator就生成有10个序列文件。每个序列文件保存两个整数，分别是要处理的样本点在Halton序列的序号和生成样本点的数量。也就是说，第一个文件的内容是”0, 1000”，第二个文件的内容是”1000, 1000”，第三个文件的内容是“2000, 1000”，第四个文件的内容是“3000, 1000”，以此类推。如果用Halton序列算法生成一万个样本点，那么，第一个Map任务生成的点的序号是从0到999，第二个Map任务生成的点的序号是从1000到1999，第三个Map任务生成的点的序号是从2000到2999，以此类推。Halton序列算法生成随机点的的唯一参数是序号。 
  
     3) PiEstimator运行MapReduce任务。 
  
     4) PiEstimator从MapReduce的输出目录读取两个整数，它们分别是直角扇形内的点的数量和直角扇形外的点的数量。 
  
     5) 根据4)的结果数值，计算Pi值，然后返回。 
  
     PiEstimator.java文件的对应PiEstimator类。PiEstimator类有三个内部类，分别是HalthonSequence类，PiMapper类，PiReducer类。HalthonSequence类负责产生样本点，PiMapper类是Map过程，PiReducer类是Reduce过程。 
  
     PiRefuce.java的代码如下： 
  
 package org.apache.hadoop.examples; 
  
 import java.io.IOException; 
  
 import java.math.BigDecimal; 
  
 import java.util.Iterator; 
  
 import org.apache.hadoop.conf.Configured; 
  
 import org.apache.hadoop.fs.FileSystem; 
  
 import org.apache.hadoop.fs.Path; 
  
 import org.apache.hadoop.io.BooleanWritable; 
  
 import org.apache.hadoop.io.LongWritable; 
  
 import org.apache.hadoop.io.SequenceFile; 
  
 import org.apache.hadoop.io.Writable; 
  
 import org.apache.hadoop.io.WritableComparable; 
  
 import org.apache.hadoop.io.SequenceFile.CompressionType; 
  
 import org.apache.hadoop.mapred.FileInputFormat; 
  
 import org.apache.hadoop.mapred.FileOutputFormat; 
  
 import org.apache.hadoop.mapred.JobClient; 
  
 import org.apache.hadoop.mapred.JobConf; 
  
 import org.apache.hadoop.mapred.MapReduceBase; 
  
 import org.apache.hadoop.mapred.Mapper; 
  
 import org.apache.hadoop.mapred.OutputCollector; 
  
 import org.apache.hadoop.mapred.Reducer; 
  
 import org.apache.hadoop.mapred.Reporter; 
  
 import org.apache.hadoop.mapred.SequenceFileInputFormat; 
  
 import org.apache.hadoop.mapred.SequenceFileOutputFormat; 
  
 import org.apache.hadoop.util.Tool; 
  
 import org.apache.hadoop.util.ToolRunner; 
  
 public class PiEstimator extends Configured implements Tool { 
  
   //临时目录的路径，保存运行中的文件数据。 
  
   static private final Path TMP_DIR = new Path( 
  
       PiEstimator.class.getSimpleName() + "_TMP_3_141592654"); 
  
   //Halton序列类，产生随机点。 
  
   private static class HaltonSequence { 
  
     static final int[] P = {2, 3}; 
  
     static final int[] K = {63, 40}; 
  
     private long index; 
  
     private double[] x; 
  
     private double[][] q; 
  
     private int[][] d; 
  
      //构造函数 
  
     HaltonSequence(long startindex) { 
  
       index = startindex; 
  
       x = new double[K.length]; 
  
       q = new double[K.length][]; 
  
       d = new int[K.length][]; 
  
       for(int i = 0; i < K.length; i++) { 
  
         q[i] = new double[K[i］; 
  
         d[i] = new int[K[i］; 
  
       } 
  
       for(int i = 0; i < K.length; i++) { 
  
         long k = index; 
  
         x[i] = 0; 
  
         for(int j = 0; j < K[i]; j++) { 
  
           q[i][j] = (j == 0? 1.0: q[i][j-1])/P[i]; 
  
           d[i][j] = (int)(k % P[i]); 
  
           k = (k - d[i][j])/P[i]; 
  
           x[i] += d[i][j] * q[i][j]; 
  
         } 
  
       } 
  
     } 
  
     //生成点 
  
     double[] nextPoint() { 
  
       index++; 
  
       for(int i = 0; i < K.length; i++) { 
  
         for(int j = 0; j < K[i]; j++) { 
  
           d[i][j]++; 
  
           x[i] += q[i][j]; 
  
           if (d[i][j] < P[i]) { 
  
             break; 
  
           } 
  
           d[i][j] = 0; 
  
           x[i] -= (j == 0? 1.0: q[i][j-1]); 
  
         } 
  
       } 
  
       return x; 
  
     } 
  
   } 
  
   //PiMapper类，定义Map过程 
  
   public static class PiMapper extends MapReduceBase 
  
     implements Mapper<LongWritable, LongWritable, BooleanWritable, LongWritable> { 
  
     public void map(LongWritable offset, 
  
                     LongWritable size, 
  
                     OutputCollector<BooleanWritable, LongWritable> out, 
  
                     Reporter reporter) throws IOException { 
  
       /* 
  
      函数map从序列文件里得到offset和size，offset是要产生的样本点在Halton序列里的序号，size是这个Map任务需要产生的样本点数量。 
  
       */ 
  
       final HaltonSequence haltonsequence = new HaltonSequence(offset.get()); 
  
       long numInside = 0; 
  
       long numOutside = 0; 
  
       for(long i = 0; i < size.get(); ) { 
  
         //生成随机点 
  
         final double[] point = haltonsequence.nextPoint(); 
  
         //判断随机点是否在直角扇形内。 
  
         final double x = point[0] - 0.5; 
  
         final double y = point[1] - 0.5; 
  
         if (x*x + y*y > 0.25) { 
  
           numOutside++; 
  
         } else { 
  
           numInside++; 
  
         } 
  
         i++; 
  
         //每产生1000个点，就更新一下状态，提示作用。 
  
         if (i % 1000 == 0) { 
  
           reporter.setStatus("Generated " + i + " samples."); 
  
         } 
  
       } 
  
       //Map任务运行完毕，输出结果。 
  
       out.collect(new BooleanWritable(true), new LongWritable(numInside)); 
  
       out.collect(new BooleanWritable(false), new LongWritable(numOutside)); 
  
     } 
  
   } 
  
   //PiReduce类，定义Reduce过程 
  
   public static class PiReducer extends MapReduceBase 
  
     implements Reducer<BooleanWritable, LongWritable, WritableComparable<?>, Writable> { 
  
     private long numInside = 0; 
  
     private long numOutside = 0; 
  
     private JobConf conf; 
  
     @Override 
  
     public void configure(JobConf job) { 
  
       conf = job; 
  
     } 
  
     public void reduce(BooleanWritable isInside, 
  
                        Iterator<LongWritable> values, 
  
                        OutputCollector<WritableComparable<?>, Writable> output, 
  
                        Reporter reporter) throws IOException { 
  
       /* 
  
      isInside是布尔类型，或者是true或者是false。 
  
      values包含10个数值，分别来自10个Map任务。 
  
       */ 
  
       //在这里，累加计算10次Map任务所有的直角扇形之外和直角扇形之内的样本点数量。 
  
       if (isInside.get()) { 
  
         for(; values.hasNext(); numInside += values.next().get()); 
  
       } else { 
  
         for(; values.hasNext(); numOutside += values.next().get()); 
  
       } 
  
     } 
  
     //在Reduce过程结束后，将计算结果写到临时目录的文件中，以供PiEstimator类读取。 
  
     @Override 
  
     public void close() throws IOException { 
  
       Path outDir = new Path(TMP_DIR, "out"); 
  
       Path outFile = new Path(outDir, "reduce-out"); 
  
       FileSystem fileSys = FileSystem.get(conf); 
  
       SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf, 
  
           outFile, LongWritable.class, LongWritable.class, 
  
           CompressionType.NONE); 
  
       writer.append(new LongWritable(numInside), new LongWritable(numOutside)); 
  
       writer.close(); 
  
     } 
  
   } 
  
   //PiEstimator类的estimate函数，运行MapReduce，然后计算Pi值。 
  
   public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf 
  
       ) throws IOException { 
  
     jobConf.setJobName(PiEstimator.class.getSimpleName()); 
  
     jobConf.setInputFormat(SequenceFileInputFormat.class); 
  
     jobConf.setOutputKeyClass(BooleanWritable.class); 
  
     jobConf.setOutputValueClass(LongWritable.class); 
  
     jobConf.setOutputFormat(SequenceFileOutputFormat.class); 
  
     jobConf.setMapperClass(PiMapper.class); 
  
     jobConf.setNumMapTasks(numMaps); 
  
     jobConf.setReducerClass(PiReducer.class); 
  
     jobConf.setNumReduceTasks(1); 
  
     /* 
  
     关掉 speculative execution功能。 
  
     speculative execution功能是指，假如Hadoop发现有些任务执行的比较慢，那么，它会在其他的节点上再运行一个同样的任务。这两个任务，哪个先完成就以哪个结果为准。 
  
     但Reduce任务需要将数值写入到HDFS的文件里，而且这个文件名是固定的，如果同时运行两个以上的Reduce任务，会导致写入出错，所以要关闭这个功能。 
  
     */ 
  
     jobConf.setSpeculativeExecution(false); 
  
     //设置输入目录和输出目录 
  
     final Path inDir = new Path(TMP_DIR, "in"); 
  
     final Path outDir = new Path(TMP_DIR, "out"); 
  
     FileInputFormat.setInputPaths(jobConf, inDir); 
  
     FileOutputFormat.setOutputPath(jobConf, outDir); 
  
     //生成输入目录 
  
     final FileSystem fs = FileSystem.get(jobConf); 
  
     if (fs.exists(TMP_DIR)) { 
  
       throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR) 
  
           + " already exists. Please remove it first."); 
  
     } 
  
     if (!fs.mkdirs(inDir)) { 
  
       throw new IOException("Cannot create input directory " + inDir); 
  
     } 
  
     try { 
  
       //为每个Map任务生成一个序列文件，并写入数值。 
  
       for(int i=0; i < numMaps; ++i) { 
  
         final Path file = new Path(inDir, "part"+i); 
  
         final LongWritable offset = new LongWritable(i * numPoints); 
  
         final LongWritable size = new LongWritable(numPoints); 
  
         final SequenceFile.Writer writer = SequenceFile.createWriter( 
  
             fs, jobConf, file, 
  
             LongWritable.class, LongWritable.class, CompressionType.NONE); 
  
         try { 
  
           writer.append(offset, size); 
  
         } finally { 
  
           writer.close(); 
  
         } 
  
         System.out.println("Wrote input for Map #"+i); 
  
       } 
  
       //运行MapReduce任务 
  
       System.out.println("Starting Job"); 
  
       final long startTime = System.currentTimeMillis(); 
  
       JobClient.runJob(jobConf); 
  
       final double duration = (System.currentTimeMillis() - startTime)/1000.0; 
  
       System.out.println("Job Finished in " + duration + " seconds"); 
  
       //读取输出结果 
  
       Path inFile = new Path(outDir, "reduce-out"); 
  
       LongWritable numInside = new LongWritable(); 
  
       LongWritable numOutside = new LongWritable(); 
  
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); 
  
       try { 
  
         reader.next(numInside, numOutside); 
  
       } finally { 
  
         reader.close(); 
  
       } 
  
       //计算Pi值然后返回 
  
       return BigDecimal.valueOf(4).setScale(20) 
  
           .multiply(BigDecimal.valueOf(numInside.get())) 
  
           .divide(BigDecimal.valueOf(numMaps)) 
  
           .divide(BigDecimal.valueOf(numPoints)); 
  
     } finally { 
  
       fs.delete(TMP_DIR, true); 
  
     } 
  
   } 
  
   //实现Tool接口的run函数 
  
   public int run(String[] args) throws Exception { 
  
     if (args.length != 2) { 
  
       System.err.println("Usage: "+getClass().getName()+" <nMaps> <nSamples>"); 
  
       ToolRunner.printGenericCommandUsage(System.err); 
  
       return -1; 
  
     } 
  
     final int nMaps = Integer.parseInt(args[0]); 
  
     final long nSamples = Long.parseLong(args[1]); 
  
     System.out.println("Number of Maps = " + nMaps); 
  
     System.out.println("Samples per Map = " + nSamples); 
  
     final JobConf jobConf = new JobConf(getConf(), getClass()); 
  
     System.out.println("Estimated value of Pi is " 
  
         + estimate(nMaps, nSamples, jobConf)); 
  
     return 0; 
  
   } 
  
   //PiEstimator类的main函数 
  
   public static void main(String[] argv) throws Exception { 
  
     System.exit(ToolRunner.run(null, new PiEstimator(), argv)); 
  
   } 
  
 } 
  
     PiEstimator的运行次序是：1)PiEstimator类的main函数；2)PiEsitmator类的run函数；3)PiEsitmator类的estimat函数。 
  
 3. 新版API的Pi值估算的MapReduce程序 
  
     新版API和旧版API有较大差异，包括Mapper，Reducer，Job都有变化。本章在旧版API的代码上进行修改，给出一个基于新版API的程序。在多数地方已经做了注释，故不再一一解释。 
  
 3.1 NewPiEst.java文件的源代码 
  
 package com.brianchen.hadoop; 
  
 import java.lang.Exception; 
  
 import java.lang.Integer; 
  
 import java.math.BigDecimal; 
  
 import java.io.IOException; 
  
 import java.lang.InterruptedException; 
  
 import java.lang.Iterable; 
  
 import org.apache.hadoop.conf.Configured; 
  
 import org.apache.hadoop.util.Tool; 
  
 import org.apache.hadoop.util.ToolRunner; 
  
 import org.apache.hadoop.mapreduce.Job; 
  
 import org.apache.hadoop.fs.Path; 
  
 import org.apache.hadoop.io.LongWritable; 
  
 import org.apache.hadoop.mapreduce.Mapper; 
  
 import org.apache.hadoop.mapreduce.Reducer; 
  
 import org.apache.hadoop.conf.Configuration; 
  
 import org.apache.hadoop.fs.FileSystem; 
  
 import org.apache.hadoop.io.SequenceFile; 
  
 import org.apache.hadoop.io.SequenceFile.CompressionType; 
  
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
  
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
  
 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; 
  
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; 
  
 import org.apache.commons.logging.Log; 
  
 import org.apache.commons.logging.LogFactory; 
  
 import org.apache.hadoop.util.GenericOptionsParser; 
  
 public class NewPiEst extends Configured implements Tool{ 
  
   //临时目录，存储用 
  
   static private final Path TMP_DIR = new Path("pitmp"); 
  
   //Log 
  
   static final Log LOG = LogFactory.getLog(NewPiEst.class); 
  
   //Halton序列的类 
  
   private static class HaltonSequence{ 
  
     // bases 
  
     static final int[] P = {2, 3}; 
  
     // maximum number of digits allowed 
  
     static final int[] K = {63, 40}; 
  
     private long index; 
  
     private double[] x; 
  
     private double[][] q; 
  
     private int[][] d; 
  
     HaltonSequence(long startindex){ 
  
       index = startindex; 
  
       x = new double[K.length]; 
  
       q = new double[K.length][]; 
  
       d = new int[K.length][]; 
  
       for(int i = 0; i < K.length; i++){ 
  
         q[i] = new double[K[i］; 
  
         d[i] = new int[K[i］; 
  
       } 
  
       for(int i = 0; i < K.length; i++){ 
  
         long k = index; 
  
         x[i] = 0; 
  
         for(int j = 0; j < K[i]; j++){ 
  
           q[i][j] = (j == 0? 1.0: q[i][j-1])/P[i]; 
  
           d[i][j] = (int)(k % P[i]); 
  
           k = (k - d[i][j])/P[i]; 
  
           x[i] += d[i][j] * q[i][j]; 
  
         } 
  
       } 
  
     } 
  
     double[] nextPoint(){ 
  
       index++; 
  
       for(int i = 0; i < K.length; i++){ 
  
         for(int j = 0; j < K[i]; j++){ 
  
           d[i][j]++; 
  
           x[i] += q[i][j]; 
  
           if (d[i][j] < P[i]){ 
  
             break; 
  
           } 
  
           d[i][j] = 0; 
  
           x[i] -= (j == 0? 1.0: q[i][j-1]); 
  
         } 
  
       } 
  
       return x; 
  
     } 
  
   } 
  
   //新版API的Mapper类 
  
   public static class PiMapper extends Mapper<LongWritable, LongWritable, LongWritable, LongWritable>{ 
  
     public void map(LongWritable offset, LongWritable size, Context context) 
  
                    throws IOException, InterruptedException{ 
  
       final HaltonSequence hs = new HaltonSequence(offset.get()); 
  
       long nInside = 0; 
  
       long nOutside = 0; 
  
       for(int i = 0; i < size.get(); i++){ 
  
         final double[] point = hs.nextPoint(); 
  
         if (point[0]*point[0] + point[1]*point[1] > 1){ 
  
           nOutside++; 
  
         }else{ 
  
           nInside++; 
  
         } 
  
         context.write(new LongWritable(1), new LongWritable(nOutside)); 
  
         context.write(new LongWritable(2), new LongWritable(nInside)); 
  
       } 
  
     } 
  
   } 
  
   //新版API的Reducer类 
  
   public static class PiReducer extends 
  
        Reducer<LongWritable, LongWritable, LongWritable, LongWritable> { 
  
     long nInside = 0; 
  
     long nOutside = 0; 
  
     public void reduce(LongWritable isInside, Iterable<LongWritable> values, Context context) 
  
                         throws IOException, InterruptedException{ 
  
       if (isInside.get() == 2 ){ 
  
         for (LongWritable val : values) { 
  
           nInside += val.get(); 
  
         } 
  
       }else{ 
  
         for (LongWritable val : values) { 
  
           nOutside += val.get(); 
  
         } 
  
       } 
  
       LOG.info("reduce-log:" + "isInside = " + isInside.get() + ", nInside = "+ nInside + ", nOutSide = "+nOutside ); 
  
     } 
  
    //Reducer类在结束前执行cleanup函数，于是在这里将reduce过程计算的nInside和nOutSide写入文件。 
  
     @Override 
  
     protected void cleanup(Context context) throws IOException, InterruptedException{ 
  
       Path OutDir = new Path(TMP_DIR, "out"); 
  
       Path outFile = new Path(OutDir, "reduce-out"); 
  
       Configuration conf = new Configuration(); 
  
       FileSystem fs = FileSystem.get(conf); 
  
       SequenceFile.Writer writer = SequenceFile.createWriter( 
  
               fs, conf, outFile, LongWritable.class, LongWritable.class, CompressionType.NONE); 
  
       writer.append(new LongWritable(nInside), new LongWritable(nOutside)); 
  
       writer.close(); 
  
     } 
  
   } 
  
   public static BigDecimal estimate(int nMaps, int nSamples, Job job)throws Exception{ 
  
     LOG.info("\n\n estimate \n\n"); 
  
     //设置Job的Jar，Mapper，Reducer等等 
  
     job.setJarByClass(NewPiEst.class); 
  
     job.setMapperClass(PiMapper.class); 
  
     job.setReducerClass(PiReducer.class); 
  
     job.setNumReduceTasks(1); 
  
     //设置输入输出格式为序列文件格式 
  
     job.setInputFormatClass(SequenceFileInputFormat.class); 
  
     job.setOutputFormatClass(SequenceFileOutputFormat.class); 
  
     //设置输出键和输出值的类型 
  
     job.setOutputKeyClass(LongWritable.class); 
  
     job.setOutputValueClass(LongWritable.class); 
  
     job.setSpeculativeExecution(false); 
  
     Path inDir = new Path(TMP_DIR, "in"); 
  
     Path outDir = new Path(TMP_DIR, "out"); 
  
     //设置输入文件所在目录和输出结果所在目录 
  
     FileInputFormat.addInputPath(job, inDir); 
  
     FileOutputFormat.setOutputPath(job, outDir); 
  
     //检查目录 
  
     FileSystem fs = FileSystem.get(job.getConfiguration()); 
  
     if (fs.exists(TMP_DIR)){ 
  
       throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists, pls remove it."); 
  
     } 
  
     //生成目录 
  
     if (!fs.mkdirs(inDir)){ 
  
       throw new IOException("Cannot create input directory " + inDir); 
  
     } 
  
     try{ 
  
       //生成若干个序列文件，每个文件放两个整数。每个序列文件将对应一个Map任务 
  
       for(int i = 0; i < nMaps; i++){ 
  
         final Path file = new Path(inDir, "part"+i); 
  
         final LongWritable offset = new LongWritable(i*nSamples); 
  
         final LongWritable size = new LongWritable(nSamples); 
  
         final SequenceFile.Writer writer = SequenceFile.createWriter( 
  
             fs, job.getConfiguration(), file, 
  
             LongWritable.class, LongWritable.class, CompressionType.NONE); 
  
         writer.append(offset, size); 
  
         writer.close(); 
  
         System.out.println("wrote input for Map #" + i); 
  
       } 
  
       //执行MapReduce任务 
  
       System.out.println("starting mapreduce job"); 
  
       final long startTime = System.currentTimeMillis(); 
  
       boolean ret = job.waitForCompletion(true); 
  
       final double duration = (System.currentTimeMillis() - startTime)/1000.0; 
  
       System.out.println("Job finished in " + duration + " seconds."); 
  
       //从HDFS将MapReduce的结果读取出来 
  
       Path inFile = new Path(outDir, "reduce-out"); 
  
       LongWritable nInside = new LongWritable(); 
  
       LongWritable nOutside = new LongWritable(); 
  
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, job.getConfiguration()); 
  
       reader.next(nInside, nOutside); 
  
       reader.close(); 
  
       LOG.info("estimate-log: " + "nInside = "+nInside.get()+", nOutSide = "+nOutside.get()); 
  
       //计算Pi值然后返回 
  
       return BigDecimal.valueOf(4).multiply( 
  
                BigDecimal.valueOf(nInside.get()) 
  
              ).divide( 
  
                BigDecimal.valueOf(nInside.get() + nOutside.get()), 20, BigDecimal.ROUND_HALF_DOWN 
  
              ); 
  
     }finally{ 
  
       fs.delete(TMP_DIR, true); 
  
     } 
  
   } 
  
   public int run(String[] args) throws Exception{ 
  
     LOG.info("\n\n run \n\n"); 
  
     if (args.length != 2){ 
  
       System.err.println("Use: NewPieEst 10 10000"); 
  
       System.exit(1); 
  
     } 
  
     //解析参数 
  
     int nMaps = Integer.parseInt(args[0]); 
  
     int nSamples = Integer.parseInt(args[1]); 
  
     Configuration conf = new Configuration(); 
  
     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 
  
     Job job = new Job(conf, "Pi estimating job"); 
  
     System.out.println("Pi = " + estimate(nMaps, nSamples, job)); 
  
     return 0; 
  
   } 
  
   public static void main(String[] argv) throws Exception{ 
  
     LOG.info("\n\n main \n\n"); 
  
     System.exit(ToolRunner.run(null, new NewPiEst(), argv)); 
  
   } 
  
 } 
  
 3.2 编译和运行 
  
     项目的目录和文件结构跟前面章节类似，请作为练习处理，下面给出近似的各命令。 
  
     编译：”javac -cp /home/brian/usr/hadoop/hadoop-1.2.1/hadoop-core-1.2.1.jar:/home/brian/usr/hadoop/hadoop-1.2.1/lib/commons-logging-1.1.1.jar:/home/brian/usr/hadoop/hadoop-1.2.1/lib/commons-cli-1.2.jar -d ./classes/ src/*.java” 
  
          打包：”jar -cvf newpiest.jar -C ./classes/ . 
  
 ” 
  
          运行：”./bin/hadoop jar ~/all/work/ifitus/ifitus-dev-svn/hadoop_from_zero/src/chp08/newpiest/newpiest.jar com.brianchen.hadoop.NewPiEst 10 1000