在eclipse远程运行map/reduce例子

  1. 先配置windows 的host文件,加入hadoop集群的ip和名字

        192.168.7.11 intel-hadoop-11

        192.168.7.12 intel-hadoop-12

        192.168.7.13 intel-hadoop-13

2.   需要把生成的jar包放在eclipse的项目根目录

3.   代码示例,三个class文件,

注意点,因为是远程调用hadoop,所以需要Configuration _conf = new Configuration();

在_conf配置hadoop的连接信息。信息在mapred-site.xml、core-site.xml文件中查看。

package com.test;

//注意事项

/* *

 * 设置系统host 包含hadoop 主机ip

 * 

 * */

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.compress.CompressionCodec;

import org.apache.hadoop.io.compress.GzipCodec;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;


import com.test.WordCountMap;

import com.test.WordCountReduce;


public class WordCountDriver {

//Drive 

 public static void main(String[] args) throws Exception 

 {

   Configuration _conf = new Configuration();//本次设置只是对本程序有效

   _conf.set("mapred.job.tracker", "192.168.7.11:54311"); //设置hadoop job tracker的连接

       _conf.set("fs.default.name", "hdfs://192.168.7.11:8020"); //设置hdfs的连接 

       _conf.set("hadoop.job.ugi", "hadoop");  //

       _conf.set("Hadoop.tmp.dir", "/user/");  //

       _conf.set("dfs.permissions","false");//设置权限关闭,这句话是不必要的。

       _conf.set("mapred.jar", "com.test.jar");//设置map reduce 使用到的包,把他们生成jar放在工程根目录 


//_conf.setBoolean("mapred.output.compress",true);

//_conf.setClass("mapred.output.compression.codec",GzipCodec.class,CompressionCodec.class);

//String[] otherArgs = new GenericOptionsParser(_conf, args).getRemainingArgs();

//if (otherArgs.length != 2) 

//{

//System.err.println("Usage: wordcount <in> <out>");

//System.exit(2);

//}

   Job job = new Job(_conf, "word count");

   job.setJarByClass(WordCountDriver.class);//设置job包

   //job.setNumReduceTasks(5);

   FileInputFormat.addInputPath(job, new Path("/user/input"));//输入目录

   FileOutputFormat.setOutputPath(job, new Path("/user/output"));//输出目录

   job.setMapperClass(WordCountMap.class);//map 包

   job.setReducerClass(WordCountReduce.class);//reduce 包

   job.setOutputKeyClass(Text.class);

   job.setOutputValueClass(IntWritable.class);

   System.exit(job.waitForCompletion(true) ? 0 : 1);

 }

}


package com.test;


import java.io.File;

import java.io.FileWriter;

import java.io.IOException;

import java.util.StringTokenizer;


import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Mapper.Context;




public class WordCountMap extends Mapper<Object, Text, Text, IntWritable>{

  private static IntWritable value_=new IntWritable(1);

  private static Text _key=new Text();

  public void map(Object key, Text value, Context context) throws IOException, InterruptedException 

  {

StringTokenizer _value=new StringTokenizer(value.toString().trim());

while(_value.hasMoreTokens())

{

_key.set(_value.nextToken().trim());

context.write(_key, value_);

}

  }

}



package com.test;


import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Reducer.Context;



public class WordCountReduce extends  Reducer<Text,IntWritable,Text,IntWritable> {

  private static IntWritable _sum=new IntWritable();

  public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException 

  {

  int sum=0;

  for(IntWritable value : values)

  {

  sum+=value.get();

  }

  _sum.set(sum);

  context.write(key, _sum);

  }

}

运行结果

14/06/26 10:46:00 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.

14/06/26 10:46:01 INFO input.FileInputFormat: Total input paths to process : 1

14/06/26 10:46:01 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

14/06/26 10:46:01 WARN snappy.LoadSnappy: Snappy native library not loaded

14/06/26 10:46:02 INFO mapred.JobClient: Running job: job_201406251739_0002

14/06/26 10:46:03 INFO mapred.JobClient:  map 0% reduce 0%

14/06/26 10:46:16 INFO mapred.JobClient:  map 100% reduce 0%

14/06/26 10:46:26 INFO mapred.JobClient:  map 100% reduce 33%

14/06/26 10:46:29 INFO mapred.JobClient:  map 100% reduce 100%

14/06/26 10:46:33 INFO mapred.JobClient: Job complete: job_201406251739_0002

14/06/26 10:46:33 INFO mapred.JobClient: Counters: 29

14/06/26 10:46:33 INFO mapred.JobClient:   Job Counters 

14/06/26 10:46:33 INFO mapred.JobClient:     Launched reduce tasks=1

14/06/26 10:46:33 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=15193

14/06/26 10:46:33 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0

14/06/26 10:46:33 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0

14/06/26 10:46:33 INFO mapred.JobClient:     Launched map tasks=1

14/06/26 10:46:33 INFO mapred.JobClient:     Data-local map tasks=1

14/06/26 10:46:33 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=10845

14/06/26 10:46:33 INFO mapred.JobClient:   File Output Format Counters 

14/06/26 10:46:33 INFO mapred.JobClient:     Bytes Written=626

14/06/26 10:46:33 INFO mapred.JobClient:   FileSystemCounters

14/06/26 10:46:33 INFO mapred.JobClient:     FILE_BYTES_READ=1272

14/06/26 10:46:33 INFO mapred.JobClient:     HDFS_BYTES_READ=738

14/06/26 10:46:33 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=51545

14/06/26 10:46:33 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=626

14/06/26 10:46:33 INFO mapred.JobClient:   File Input Format Counters 

14/06/26 10:46:33 INFO mapred.JobClient:     Bytes Read=630

14/06/26 10:46:33 INFO mapred.JobClient:   Map-Reduce Framework

14/06/26 10:46:33 INFO mapred.JobClient:     Map output materialized bytes=1272

14/06/26 10:46:33 INFO mapred.JobClient:     Map input records=1

14/06/26 10:46:33 INFO mapred.JobClient:     Reduce shuffle bytes=1272

14/06/26 10:46:33 INFO mapred.JobClient:     Spilled Records=212

14/06/26 10:46:33 INFO mapred.JobClient:     Map output bytes=1054

14/06/26 10:46:33 INFO mapred.JobClient:     CPU time spent (ms)=2650

14/06/26 10:46:33 INFO mapred.JobClient:     Total committed heap usage (bytes)=301006848

14/06/26 10:46:33 INFO mapred.JobClient:     Combine input records=0

14/06/26 10:46:33 INFO mapred.JobClient:     SPLIT_RAW_BYTES=108

14/06/26 10:46:33 INFO mapred.JobClient:     Reduce input records=106

14/06/26 10:46:33 INFO mapred.JobClient:     Reduce input groups=71

14/06/26 10:46:33 INFO mapred.JobClient:     Combine output records=0

14/06/26 10:46:33 INFO mapred.JobClient:     Physical memory (bytes) snapshot=289206272

14/06/26 10:46:33 INFO mapred.JobClient:     Reduce output records=71

14/06/26 10:46:33 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=1508102144

14/06/26 10:46:33 INFO mapred.JobClient:     Map output records=106


转载于:https://my.oschina.net/huotui/blog/284320

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值