MaprRduce v2 在 java 代码中远程提交作业到 Yarn 的配置项,最后编辑于 2021-12-22
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
// hbase.zookeeper.quorum
conf.set(HConstants.ZOOKEEPER_QUORUM, "zkserver01,zkserver02,zkserver03:2181");
// zookeeper.session.timeout
conf.set(HConstants.ZK_SESSION_TIMEOUT, "60000");
// yarn.resourcemanager.scheduler.address
conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, "rmserver:8030");
// yarn.resourcemanager.resource-tracker.address
conf.set(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, "rmserver:8031");
// yarn.resourcemanager.address
conf.set(YarnConfiguration.RM_ADDRESS, "rmserver:8032"); // see step 3
// yarn.resourcemanager.admin.address
conf.set(YarnConfiguration.RM_ADMIN_ADDRESS, "rmserver:8033");
// yarn.resourcemanager.webapp.address
conf.set(YarnConfiguration.RM_WEBAPP_ADDRESS, "rmserver:8088");
// yarn.resourcemanager.webapp.https.address
conf.set(YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, "rmserver:8090");
// mapreduce.framework.name
conf.set(MRConfig.FRAMEWORK_NAME, "yarn");
// fs.defaultFS
conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "hdfs://hadoopserver:8020"); // see step 2
// MapReduce JobHistory Server地址
// mapreduce.jobhistory.address
conf.set(JHAdminConfig.MR_HISTORY_ADDRESS, "rmhisserver:10020");
// History Server的管理地址
// mapreduce.jobhistory.admin.address
conf.set(JHAdminConfig.JHS_ADMIN_ADDRESS, "rmhisserver:10033");
// MapReduce JobHistory Server Web UI地址
// mapreduce.jobhistory.webapp.address
conf.set(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS, "rmhisserver:19888");
// mapreduce.jobhistory.webapp.https.address
conf.set(JHAdminConfig.MR_HISTORY_WEBAPP_HTTPS_ADDRESS, "rmhisserver:19890");
// yarn.app.mapreduce.am.staging-dir
conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/user");
// mapreduce.map.java.opts
conf.set(MRJobConfig.MAP_JAVA_OPTS, " -Xmx1638m");
// mapreduce.map.memory.mb
conf.set(MRJobConfig.MAP_MEMORY_MB, "2048");
// mapreduce.reduce.java.opts
conf.set(MRJobConfig.REDUCE_JAVA_OPTS, " -Xmx1638m");
// mapreduce.reduce.memory.mb
conf.set(MRJobConfig.REDUCE_MEMORY_MB, "2048");
// mapreduce.job.reduce.slowstart.completedmaps
conf.set(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, "0.8");
// mapreduce.task.io.sort.factor
conf.set(MRJobConfig.IO_SORT_FACTOR, "64");
// mapreduce.task.io.sort.mb
conf.set(MRJobConfig.IO_SORT_MB, "256");
// mapreduce.map.output.compress
conf.set(MRJobConfig.MAP_OUTPUT_COMPRESS, "true");
// mapreduce.map.output.compress.codec
conf.set(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, "org.apache.hadoop.io.compress.SnappyCodec");
// mapreduce.reduce.shuffle.parallelcopies
conf.set(MRJobConfig.SHUFFLE_PARALLEL_COPIES, "10");
// yarn.application.classpath
conf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
"$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,"
+ "$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,"
+ "$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*");
job.setNumReduceTasks(100);
参考资料
1. Map-side Join Vs. Join | Edureka Blog . https://www.edureka.co/blog/map-side-join-vs-join/
2. MapReduce Example | Reduce Side Join MapReduce Example | Edureka. https://www.edureka.co/blog/mapreduce-example-reduce-side-join/
3. Hadoop MapReduce Join & Counter with Example. https://www.guru99.com/introduction-to-counters-joins-in-map-reduce.html
4. 多个MapReduce之间的嵌套_yanzhelee-CSDN博客. https://blog.csdn.net/u010521842/article/details/75042771
5. MapReduce和HBase集成(Apache版本和CDH版本) - 陈小哥cw - 博客园 . https://www.cnblogs.com/chenxiaoge/p/13335436.html
6. java - HBase multiple table scans for the job - Stack Overflow. https://stackoverflow.com/questions/14077766/hbase-multiple-table-scans-for-the-job
7. Yarn的简单介绍_涛博-CSDN博客_yarn. https://blog.csdn.net/qq_33624952/article/details/79341034
8. 如何使用hadoop命令向CDH集群提交MapReduce作业 - 云+社区 - 腾讯云 . https://cloud.tencent.com/developer/article/1078452
9. 如何使用java命令从非集群节点向CDH集群提交MapReduce作业 - 云+社区 - 腾讯云 . https://cloud.tencent.com/developer/article/1078444
10. (6条消息) ResourceManager相关配置参数_似水流年-CSDN博客 . https://blog.csdn.net/xiaoshunzi111/article/details/50617357
11. java - Running MapReduce remotely - Stack Overflow. https://stackoverflow.com/questions/29268845/running-mapreduce-remotely
12. mapreduce - HBase: How to delete rows using Map-Reduce - Stack Overflow . https://stackoverflow.com/questions/6358872/hbase-how-to-delete-rows-using-map-reduce
13. mapreduce - Update and Delete Map/Reduce in HBase - Stack Overflow. https://stackoverflow.com/questions/22435013/update-and-delete-map-reduce-in-hbase
14. hadoop - Efficient way to delete multiple rows in HBase - Stack Overflow. https://stackoverflow.com/questions/4618980/efficient-way-to-delete-multiple-rows-in-hbase
15. hbase数据删除的辛酸_程序的简单生活-CSDN博客_hbase删除历史数据 . https://blog.csdn.net/hellowordlichao/article/details/27176151
16. MapReduce - Quick Guide . https://www.tutorialspoint.com/map_reduce/map_reduce_quick_guide.htm
17. Apache Pig Integration | Apache Phoenix . https://phoenix.apache.org/pig_integration.html
18. 11. Determine YARN and MapReduce Memory Configuration Settings - Hortonworks Data Platform. https://docs.cloudera.com/HDPDocuments/HDP2/HDP-2.0.6.0/bk_installing_manually_book/content/rpm-chap1-11.html
19. Configuring Memory for MapReduce Running on YARN - DZone Big Data . https://dzone.com/articles/configuring-memory-for-mapreduce-running-on-yarn
20. Hive mapreduce的map与reduce个数由什么决定?_lihuazaizheli的博客-CSDN博客 . https://blog.csdn.net/lihuazaizheli/article/details/107580462
21. HDFS中的文件写入到Mysql,通过DBConfiguration,DBOutputFormat_刘光华的专栏-CSDN博客. https://blog.csdn.net/zhoudetiankong/article/details/17027917
22. [YARN] Yarn下Mapreduce的内存参数理解 - SegmentFault 思否. https://segmentfault.com/a/1190000003777237
23. 17-mapreduce.pdf. https://web.stanford.edu/class/archive/cs/cs110/cs110.1202/static/lectures/17-mapreduce.pdf
24. HBase常用Java API - 知乎. https://zhuanlan.zhihu.com/p/138768143
25. Apache Hadoop 3.3.1 – MapReduce Tutorial. http://hadoop.apache.org/docs/r3.3.1/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html
26. java - Access args[0] value in MapReduce - Stack Overflow. https://stackoverflow.com/questions/21700133/access-args0-value-in-mapreduce
27. Zstandard - Real-time data compression algorithm. http://facebook.github.io/zstd/
28. Choosing the number of reducers · paulhoule/infovore Wiki · GitHub. https://github.com/paulhoule/infovore/wiki/Choosing-the-number-of-reducers
29. MapReduce任务参数调优(转) - 悟寰轩-叶秋 - 博客园. https://www.cnblogs.com/sunxucool/p/4459006.html
30. mapreduce - How to optimize shuffling/sorting phase in a hadoop job - Stack Overflow. https://stackoverflow.com/questions/34186583/how-to-optimize-shuffling-sorting-phase-in-a-hadoop-job
31. Hadoop学习笔记—8.Combiner与自定义Combiner - EdisonZhou - 博客园. https://www.cnblogs.com/edisonchou/p/4297786.html