一、使用前提
1.配置windows环境变量Hadoop
2.下载winutils.exe和hadoop.dll,hadoop.lib等windows的hadoop依赖文件放在本机hadoop\bin目录下
二、方案一(使用Configuration配置相关属性)
创建一个Configuration对象时,其构造方法会默认加载hadoop中的相关配置文件core-site.xml,hdfs-site.xml,mapred-site.xml,yarn-site.xml
如果没有这些配置文件,我们可以通过手动设置
public class WcRunner {
public static void main(String[] args) throws Exception {
//设置需要远程服务器登录名称(防止没有权限操作)
System.setProperty("HADOOP_USER_NAME", "hadoop");
Configuration conf = new Configuration();
//设置主机地址
conf.set("fs.defaultFS", "hdfs://slave1:9000");
//可以跨域操作系统
conf.set("mapreduce.app-submission.cross-platform", "true");
//设置yarn
conf.set("mapreduce.framework.name", "yarn");
conf.set("yarn.resourcemanager.hostname","slave1");
//设置JAR包路径
conf.set("mapreduce.job.jar","HadoopDemo.jar");
Job job = Job.getInstance(conf);
job.setJarByClass(WcRunner.class);
//指定使用的Map和Reduce类
job.setMapperClass(WcMapper.class);
job.setReducerClass(WcReducer.class);
//指定reduce输出key和value类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//指定map输出类型key和value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//设置要处理数据存放位置
FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.2.100:9000/user/hadoop/srcdata"));
//设置处理结果的输出数据存放路径
FileOutputFormat.setOutputPath(job,new Path("hdfs://192.168.2.100:9000/user/hadoop/output"));
//将job提交至集群
job.waitForCompletion(true);
}
}
三、方案二(将相关配置文件放在src路径下)
将core-site.xml、mapred-site.xml、yarn-site.xml从服务器拷贝到win后,放到classpath路径下
Configuration conf = new Configuration();在加载的时候,会自动加载这些xml
mapred-site.xml,中记得添加如下属性:
<!--允许跨平台提交-->
<property>
<name>mapreduce.app-submission.cross-platform</name>
<value>true</value>
</property>
public class WcRunner {
public static void main(String[] args) throws Exception {
//设置需要远程服务器登录名称(防止没有权限操作)
System.setProperty("HADOOP_USER_NAME", "hadoop");
Configuration conf = new Configuration();
//设置JAR包路径
conf.set("mapreduce.job.jar","HadoopDemo.jar");
Job job = Job.getInstance(conf);
job.setJarByClass(WcRunner.class);
//指定使用的Map和Reduce类
job.setMapperClass(WcMapper.class);
job.setReducerClass(WcReducer.class);
//指定reduce输出key和value类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
//指定map输出类型key和value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
//设置要处理数据存放位置
FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.2.100:9000/user/hadoop/srcdata"));
//设置处理结果的输出数据存放路径
FileOutputFormat.setOutputPath(job,new Path("hdfs://192.168.2.100:9000/user/hadoop/output"));
//将job提交至集群
job.waitForCompletion(true);
}
}