map/reduce template

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class Template extends Configured implements Tool {

	public static class M extends
			Mapper<LongWritable, Text, LongWritable, Text> {
		@Override
		protected void map(LongWritable key, Text values, Context context)
				throws IOException, InterruptedException {

		}
	}

	public static class R extends
			Reducer<LongWritable, Text, LongWritable, Text> {
		@Override
		protected void reduce(LongWritable key, Iterable<Text> values,
				Context context) throws IOException, InterruptedException {

		}
	}

	public static class P extends Partitioner<Text, LongWritable> {
		@Override
		public int getPartition(Text key, LongWritable value, int parts) {
			int hash = key.toString().hashCode();
			return (hash & Integer.MAX_VALUE) % parts;
		}
	}

	public static class G implements RawComparator<Text> {
		public int compare(Text o1, Text o2) {
			return 0;
		}

		public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
			return 0;
		}
	}

	public static class C implements RawComparator<Text> {
		public int compare(Text o1, Text o2) {
			return 0;
		}

		public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
			return 0;
		}
	}

	public int run(String[] args) throws Exception {
		Job job = new Job(getConf(), "Template this is!");
		job.setJarByClass(Template.class);

		job.setMapperClass(M.class);
		job.setCombinerClass(R.class);
		job.setReducerClass(R.class);

		job.setPartitionerClass(P.class);
		job.setGroupingComparatorClass(G.class);
		job.setSortComparatorClass(C.class);

		FileInputFormat.addInputPaths(job, args[0]);

		// job.setInputFormatClass(LzoTextInputFormat.class);
		// LzoTextInputFormat.addInputPaths(job, args[0]);

		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(Text.class);
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		// job.setOutputFormatClass(TextOutputFormat.class);
		// TextOutputFormat.setOutputPath(job, new Path(args[1]));
		// TextOutputFormat.setCompressOutput(job, true);
		// TextOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

		// job.setOutputFormatClass(SequenceFileOutputFormat.class);
		// SequenceFileOutputFormat.setOutputPath(job, new Path(args[1]));
		// SequenceFileOutputFormat.setCompressOutput(job, true);
		// SequenceFileOutputFormat.setOutputCompressorClass(job,
		// GzipCodec.class);
		// SequenceFileOutputFormat.setOutputCompressionType(job,
		// CompressionType.BLOCK);

		boolean successful = job.waitForCompletion(true);

		System.out.println(job.getJobID()
				+ (successful ? " :successful" : " :failed"));

		return successful ? 0 : 1;
	}

	/**
	 * @param args
	 * @throws Exception
	 */
	public static void main(String[] args) throws Exception {
		System.out.println("Hello World!");
		System.exit(ToolRunner.run(new Configuration(), new Template(), args));
	}

}

 

 

关于本地测试

 

There are a few approaches to debugging your Hadoop MapReduce:
You can use MRUnit (http://www.cloudera.com/hadoop-mrunit) to write tests for your MapReduce. You'll be able to do this within Eclipse, so it's easy to debug the specific logic.
You can set your jobtracker to "local" (mapred.job.tracker is the config variable), which then runs the maps and the reducer all in the same VM as the job submission. You can trigger this from Eclipse, and set breakpoints, etc. See http://wiki.apache.org/hadoop/HowToDe... .
The trickiest approach is to attach a debugger to job that Hadoop spawns. This isn't as simple as attaching a debugger to TaskTracker, since the TaskTracker itself forks off a new subprocess (called Child) to run your tasks. You can use mapred.child.java.opts to add -Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8020 to the child tasks. Then, you'll be able to attach Eclipse (or any other Java debugger) to port 8020.

The reason you're not seeing the Hadoop daemons with jps is that they're probably as the user hadoop. If you use sudo jps, you'll probably see those processes.

 

http://wiki.apache.org/hadoop/HowToDebugMapReducePrograms

 

 

以下 jvm参数,酌量添加

 

-Dfile.encoding=UTF-8 -Duser.language=zh -Xms1024m -Xmx1024m -XX:PermSize=64M -XX:MaxPermSize=128m -XX:MaxNewSize=256m -Djava.awt.headless=true

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值