import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class TotalPerAndSal {enum Counter{LINESKIP;}public static class TSMapper extends Mapper{private String str;private String[] arr;private Text dep=new Text();private Text salary=new Text();public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException{str=value.toString();arr=str.split(",");try{if(arr.length==8){dep.set(arr[7].trim());salary.set("emp:"+arr[5].trim());}else{dep.set(arr[0].trim());salary.set("dep:"+arr[1].trim());} context.write(dep, salary);}catch(Exception e){System.out.println(e.getStackTrace());context.getCounter(Counter.LINESKIP).increment(1);return;}}}public static class TSReducer extends Reducer{private IntWritable result=new IntWritable();public void reduce(Text key,Iterable values,Context context) throws IOException,InterruptedException{String[] s;String depname = ""; int sum=0;int count=0;for(Text val:values){s=val.toString().split(":");if("dep".equals(s[0]))depname=s[1];else if("emp".equals(s[0])) {sum+=Integer.parseInt(s[1].toString());count++; }}key.set(key+","+depname+","+count+",");if(count==0) result.set(sum);else result.set(sum/count); context.write(key, result);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration(); conf.set("mapred.job.tracker", "192.168.1.23:9001"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: TotalPerAndSal "); System.exit(2); } Job job = new Job(conf, "Total PerAndSal"); job.setJarByClass(TotalSalary.class); job.setMapperClass(TSMapper.class); //job.setCombinerClass(TSReducer.class); job.setReducerClass(TSReducer.class); // FileSystem fs=FileSystem.get(conf); Path out=new Path(args[1]); if(fs.exists(out)) fs.delete(out); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, out); //job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); boolean res=job.waitForCompletion(true); // Print out Job finishing status System.out.println( "Job Name: " + job.getJobName() ); System.out.println( "Job Successful: " + ( job.isSuccessful() ? "Yes" : "No" ) ); System.out.println( "Lines of Mapper Input: " + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() ); System.out.println( "Lines of Reducer Output: " + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue() ); System.out.println( "Lines skipped: " + job.getCounters().findCounter(Counter.LINESKIP).getValue() ); if (res) System.exit(0); else System.exit(1);}}
totalperandsal
最新推荐文章于 2022-05-12 16:31:19 发布