Map和Reduce方法中操作本地文件

最新推荐文章于 2024-04-18 15:54:27 发布

风声2012

最新推荐文章于 2024-04-18 15:54:27 发布

阅读量5.6k

点赞数

文章标签： string class output path mapreduce exception

本文链接：https://blog.csdn.net/zklth/article/details/5816455

版权

Map和Reduce方法中操作本地文件

在Map和Reduce方法中是可以直接操作本地文件的，例如向本地文件系统中写或者读，只是这也会是分布式读和写，这会是从执行task的节点的本地硬盘中读或向其中写。

注意事项：mapreduce程序书写完毕，请务必打包成jar，在命令行提交中运行。之前我向本地文件系统写时一直不生成数据，以为map或者reduce中不能向本地文件系统写，实际上并不如此。我的错误之处是直接在主节点上的eclipse中编译运行，由于从节点上没有主节点上的这些代码，所以执行之后毫无效果。

在Map中向本地文件系统写文件的代码，以下代码成功运行后，会在从节点的/home/hadoop目录下生成LogInfo文件。

package org.apache.hadoop.examples; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileAlreadyExistsException; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; /* * AUTHOR: zhankunlin 2010-8-16 */ public class WordCountZKL { public static class LogInfo{ public static String LogFile="/home/hadoop/LogInfo"; static{ } public static void Begin(String region,String taskID){ File log=new File(LogFile); FileOutputStream out; try{ out=new FileOutputStream(LogFile, true); out.write((region+" "+taskID+" begin/n").getBytes()); }catch(FileNotFoundException e){ } catch(IOException e){ } } public static void End(String region,String taskID){ //File log=new File(LogFile); FileOutputStream out; try{ out=new FileOutputStream(LogFile, true); out.write((region+" "+taskID+" end/n").getBytes()); }catch(FileNotFoundException e){ } catch(IOException e){ } } } /* public static class WordCountMapper extends Mapper<Object, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } */ public static class WordCountMapperZKL extends Mapper<Object, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Context context) throws IOException, InterruptedException { LogInfo.Begin("map",context.getTaskAttemptID().getTaskID().toString()); //从节点上会生成文件 while (context.nextKeyValue()) { Object key = context.getCurrentKey(); Text value = (Text) context.getCurrentValue(); / StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } / } LogInfo.End("map",context.getTaskAttemptID().getTaskID().toString()); } /** * Expert users can override this method for more complete control over the * execution of the Mapper. * @param context * @throws IOException */ public void run(Context context) throws IOException, InterruptedException { setup(context); map(context); cleanup(context); } } public static class WordCountReducerZKL extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Context context) throws IOException, InterruptedException { while (context.nextKey()) { Text key = context.getCurrentKey(); Iterable<IntWritable> values = context.getValues(); /// int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); /// } } /** * Advanced application writers can use the * {@link #run(org.apache.hadoop.mapreduce.Reducer.Context)} method to * control how the reduce task works. */ public void run(Context context) throws IOException, InterruptedException { setup(context); reduce(context); cleanup(context); } } @SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { LogInfo.Begin("job","job_1"); //主节点上会生成LogInfo文件 Configuration conf = new Configuration(); /* * String[] otherArgs = new GenericOptionsParser(conf, * args).getRemainingArgs(); if (otherArgs.length != 2) { * System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } */ String[] inputPars = { "wcinZKL", "wcoutZKL" }; String[] otherArgs = new GenericOptionsParser(conf, inputPars) .getRemainingArgs(); Path outputPaths = new Path(otherArgs[1]); FileSystem fs = FileSystem.get(conf); if (fs.exists(outputPaths)) { // please see the code of exists() method // throw new FileAlreadyExistsException("Output directory " + // outputPaths + " already exists"); FileStatus fsStatus = fs.getFileStatus(outputPaths); if (fsStatus.isDir()) // only test the methods of hdfs,but it is not necessary fs.delete(outputPaths, true); else fs.delete(outputPaths, false);// true is also ok System.out.println("Output directory /"" + outputPaths + "/" already exists" + ",firstly delete it"); } /* * FileStatus fsStatus=fs.getFileStatus(outputPaths); if * (fsStatus!=null) { throw new * FileAlreadyExistsException("Output directory " + outputPaths + * " already exists"); } */ Job job = new Job(conf, "word count zkl"); job.setJarByClass(WordCountZKL.class); job.setMapperClass(WordCountMapperZKL.class); job.setCombinerClass(WordCountReducerZKL.class); job.setReducerClass(WordCountReducerZKL.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.out.println("job "+job.getJobName()+"("+job.getJobID()+")"+" finished? "+job.waitForCompletion(true)); //System.exit( job.waitForCompletion(true)? 0 : 1); LogInfo.End("job","job_1"); } }

风声2012

关注

0
点赞
踩
4

收藏

觉得还不错? 一键收藏
1
评论
Map和Reduce方法中操作本地文件

<br />Map和Reduce方法中操作本地文件<br /> <br /> 在Map和Reduce方法中是可以直接操作本地文件的，例如向本地文件系统中写或者读，只是这也会是分布式读和写，这会是从执行task的节点的本地硬盘中读或向其中写。<br /> 注意事项：mapreduce程序书写完毕，请务必打包成jar，在命令行提交中运行。之前我向本地文件系统写时一直不生成数据，以为map或者reduce中不能向本地文件系统写，实际上并不如此。我的错误之处是直接在主节点上的eclipse中编译运行，
复制链接

扫一扫