前几天遇到了大数据量的上传问题,上传速度一直很慢,看到网上有文章实现了有路由的mr索引程序,所以我也写了一个程序如下:
</pre><p><pre name="code" class="java">public class MapReduceWithRoute extends Configured implements Tool {
/**
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
long start = System.currentTimeMillis();
System.out.println("hello");
ToolRunner.run(new MapReduceWithRoute(), args);
long end = System.currentTimeMillis();
System.out.println("time is:" + ((end - start)));
}
public int run(String args[]) throws Exception {
Job job;
try {
Configuration conf = getConf();
if (conf == null)
conf = new Configuration();
int numReducerTasks = 3;// default
try {
numReducerTasks = Integer.parseInt(args[2]);
} catch (Exception e) {
System.out.println("Exception occurred getting reducers " + e.getMessage());
e.printStackTrace();
}
// conf.set("dfs.replication", "2");
// conf.set("mapred.map.tasks.speculative.execution", "false");
// conf.set("mapred.reduce.tasks.speculative.execution", "false");
// conf.set("mapreduce.job.ubertask.enable", "true");
job = new Job(conf, "mapreducetest");
job.setJarByClass(MapReduceWithRoute.class);
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(solrBean.class);
job.setNumReduceTasks(numReducerTasks);
// job.setPartit