Hadoop新旧Api的变化

http://blog.csdn.net/derekjiang/article/details/6836209

mapreduce的过程
http://blog.csdn.net/derekjiang/article/details/6851625



SimpleDateFormat dateformate=new SimpleDateFormat("yyyyMMdd");
Date today = dateformate.parse(date);

for(int i=-1;i>-30;i--){
tempDate=DateUtils.addDays(today, i);
FileInputFormat.addInputPath(job, new Path(coreActionInputPath+"/"+dateformate.format(tempDate)));

System.out.println(coreActionInputPath+"/"+dateformate.format(tempDate));
}
FileInputFormat.addInputPath(job, new Path(fKpidayInputPath + "/"
+ dateformate.format(today)));



获取参数
http://blog.sina.com.cn/s/blog_9ffceca501016vmj.html

hadoop cdh4 结构
http://blog.csdn.net/chenpingbupt/article/details/7922104

手动部署cdh4.5
http://wenku.baidu.com/view/6544c87f2e3f5727a5e962a3.html

http://www.xingxingge.com/Hadoop/134.html


http://dirlt.com/

关于hadoop新旧api的问题

12-09

各位大虾,有个问题不知道你们遇到没有,或者遇到后怎么解决的。rnhadoop在0.20.2后采用了新版的api,基本的区别大家都知道,但是问题是有些旧版api他没有开放出来的。举个具体的例子吧,rn比如我们要测试作业组合依赖式工作,这样就需要用到JobControl类,但是这个类是在org.apache.hadoop.mapred.jobcontrol这个包里,而这样的话,基本上就不能在调用新api的方法了,比如其中的一个方法addJobs(Collection jobs) 中的Job必须是org.apache.hadoop.mapred下的job而不能用org.apache.hadoop.mapreduce 包里面的job了,可是我们前面写的代码都是用org.apache.hadoop.mapreduce的类啊?这个问题在之前的地方也遇到过。当然有个解决办法,就是都用旧版的API,貌似可以解决,那这样的话还弄个新版的api干什么呢?rn源码如下:rnpackage multijob.dependencejob;rnrn/**rn * 此源码用于演示顺序组合式MapReduce作业的执行、rn * 这里我们把x,y,z任务的依赖表示为,z依赖于x,yrn * 但是问题是其依赖的控件JobControl是旧版本的API,而我们建立的任务是新版本的job,会报类型不匹配的错误rn * 如果我们改成就版本的job,那么其要求的map和reduce类也是就旧版本的了,这显然不对啊。rn * 如何解决这个版本问题呢?rn * 后面的链式处理中也出现了这个问题?这是个共性问题。rn */rnimport java.io.IOException;rnimport java.util.Iterator;rnrnimport org.apache.hadoop.conf.Configuration;rnimport org.apache.hadoop.fs.Path;rnimport org.apache.hadoop.io.LongWritable;rnimport org.apache.hadoop.io.Text;rnimport org.apache.hadoop.mapred.JobConf;rnimport org.apache.hadoop.mapred.jobcontrol.JobControl;rnimport org.apache.hadoop.mapreduce.Job;rnimport org.apache.hadoop.mapreduce.Mapper;rnimport org.apache.hadoop.mapreduce.Reducer;rnimport org.apache.hadoop.mapreduce.lib.input.FileInputFormat;rnimport org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;rnimport org.apache.hadoop.util.GenericOptionsParser;rnrnpublic class MainJob rnrn public static class MainMap extendsrn Mapper rn public void map(LongWritable key, Text value, Context context)rn throws IOException, InterruptedException rn context.write(key, value);rn rn rnrn public static class MainReduce extendsrn Reducer rnrn public void reduce(LongWritable key, Iterator values,rn Context context) throws IOException, InterruptedException rnrn /**rn * nothingrn */rn context.write(new Text("2"), new Text("2"));rnrn rn rnrn public static void main(String[] args) throws Exception rn /*rn * job2参数设置rn */rn Configuration jobXConf = new Configuration();rnrn String[] ioArgs = new String[] "testin/stjoin.txt", "output/link8" ;rn String[] otherArgs = new GenericOptionsParser(jobXConf, ioArgs)rn .getRemainingArgs();rn if (otherArgs.length != 2) rn System.err.println("Usage: wordcount ");rn System.exit(2);rn rn Job jobX = new Job(jobXConf,"JobX");rn jobX.setJarByClass(JobX.class);rnrn jobX.setMapperClass(JobX.MapX.class);rn jobX.setReducerClass(JobX.ReduceX.class);rnrn // job1.setOutputKeyClass(Text.class);rn // job1.setOutputValueClass(Text.class);rnrn FileInputFormat.addInputPath(jobX, new Path(otherArgs[0]));rn FileOutputFormat.setOutputPath(jobX, new Path(otherArgs[1]));rnrn // 等待第一个job完成以后在再执行第二个任务rn // jobX.waitForCompletion(true);rnrn /*rn * jobY参数设置rn */rn Configuration jobYConf = new Configuration();rn String[] ioArgsY = new String[] "output/link8", "output/linkend8" ;rn String[] otherArgsY = new GenericOptionsParser(jobYConf, ioArgsY)rn .getRemainingArgs();rn if (otherArgsY.length != 2) rn System.err.println("Usage: wordcount ");rn System.exit(2);rn rn Job jobY = new Job(jobYConf, "JobY");rn jobY.setJarByClass(JobY.class);rnrn jobY.setMapperClass(JobY.MapY.class);rn jobY.setReducerClass(JobY.ReduceY.class);rnrn // job2.setOutputKeyClass(Text.class);rn // job2.setOutputValueClass(Text.class);rnrn // job2.setInputFormatClass(KeyValueTextInputFormat.class);rn rn FileInputFormat.addInputPath(jobY, new Path(otherArgsY[0]));rn FileOutputFormat.setOutputPath(jobY, new Path(otherArgsY[1]));rnrn // jobY.waitForCompletion(true);rnrn /*rn * jobMain参数设置rn */rn Configuration jobMConf = new Configuration();rn String[] ioArgsM = new String[] "output/link8", "output/linkend8" ;rn String[] otherArgsM = new GenericOptionsParser(jobMConf, ioArgsY)rn .getRemainingArgs();rn if (otherArgsM.length != 2) rn System.err.println("Usage: wordcount ");rn System.exit(2);rn rn Job jobM = new Job(jobMConf, "JobM");rn jobM.setJarByClass(MainJob.class);rnrn jobM.setMapperClass(MainJob.MainMap.class);rn jobM.setReducerClass(MainJob.MainReduce.class);rnrn // job2.setOutputKeyClass(Text.class);rn // job2.setOutputValueClass(Text.class);rnrn // job2.setInputFormatClass(KeyValueTextInputFormat.class);rn rn FileInputFormat.addInputPath(jobM, new Path(otherArgsM[0]));rn FileOutputFormat.setOutputPath(jobM, new Path(otherArgsM[1]));rnrn // 设置任务job之间的依赖关系rnrn // 设置任务运行模组rn JobControl jc = new JobControl("compound");rn /**这里会报错,显示类型不匹配**也就是说我们设置的jobx是mapreduce包下的,而其要求的是mapred下面*/rn jc.addJob(jobX);rn jc.addJob(jobY);rn jc.addJob(jobM);rn jc.run();rnrn rnrnrnrn问题总结:rn为什么旧版API包下的方法不能访问新版API包下的方法?而且部分旧版API包下的方法并没有开放出来,如何解决呢?

没有更多推荐了,返回首页