Windows下调式Hadoop
一、Hadoop在Windows下的环境设置
-
[winutils-master下载链接](拌面一碗大 (thsjl.com))
-
window系统的环境变量配置
- 下载网络上的依赖包winutils-master
- 例如:F:\soft-env\05-hadoop-3.1.0\bin
- 解压winutils-master.zip,找到3.1.0版本
- 将所有的文件覆盖到F:\soft-env\05-hadoop-3.1.0\bin目录
- 将winutils.exe和hadoop.dll文件拷贝到 C:\Windows\System32目录下
- 下载网络上的依赖包winutils-master
-
环境变量
- 将Hadoop添加到环境变量
- HADOOP_HOME–>F:\soft-env\05-hadoop-3.1.0
- HADOOP_USER_NAME–>longlong | root (core-site.xml)
- Path --> %HADOOP_HOME%\bin;%HADOOP_HOME%\sbin;
- 将Hadoop添加到环境变量
-
重启电脑
二、 idea下运行MR程序
2.1 本地运行Job
-
pom.xml 文件
-
<properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <maven.compiler.source>1.8</maven.compiler.source> <maven.compiler.target>1.8</maven.compiler.target> <!-- Hadoop版本控制 --> <hadoop.version>3.1.2</hadoop.version> </properties> <dependencies> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-slf4j-impl</artifactId> <version>2.12.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-auth</artifactId> <version>${hadoop.version}</version> </dependency> </dependencies> <build> <plugins> <plugin> <artifactId>maven-compiler-plugin</artifactId> <version>3.6.1</version> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> <plugin> <artifactId>maven-assembly-plugin</artifactId> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> </configuration> <executions> <execution> <id>make-assembly</id> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> </plugin> </plugins> </build>
-
WordCountMapper.java
-
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> { private Text outKey = new Text(); private IntWritable outValue = new IntWritable(1); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { // 1. 获取当前行数据 String line = value.toString(); // 2. 切分 String[] words = line.split(" "); // 3. 转换写出 for (String word : words) { outKey.set(word); context.write(outKey, outValue); } } }
-
WordCountReducer.java
-
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> { private IntWritable outValue = new IntWritable(); // private int total = 0; @Override protected void reduce(Text key, Iterable<IntWritable> words, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int total = 0; // 1. 求和 for (IntWritable word : words) { total += word.get(); } outValue.set(total); // 2. 输出 context.write(key, outValue); } }
-
WordCountDriver.java
-
public class WordCountDriver { public static void main(String[] args) throws IOException { // 1. 加载配置文件 Configuration conf = new Configuration(); Job job = null; try { job = Job.getInstance(conf, "WordCount"); } catch (IOException e) { e.printStackTrace(); } // 2. 设置主类 job.setJarByClass(WordCountDriver.class); // 3. 设置Map端输出 job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // 4. 设置Reducer端输出 job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); // 5. 设置文件输入输出路径 FileInputFormat.setInputPaths(job, new Path("D:\\BigDataProjects\\data\\wordcount\\input\\word.txt")); FileOutputFormat.setOutputPath(job, new Path("D:\\BigDataProjects\\data\\wordcount\\out")); // 6. 关闭程序 try { job.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } }
2.2 无法运行
-
Error:NativeIO…
- 检查环境变量
- 重启电脑
-
无法解决
-
在本地创建NativeIO.java
-
代码太长。。。。
-
解决过程
-
idea开启全项目查找类 NativeIO.class
-
创建包 org.apache.hadoop.io.nativeio
-
创建NativeIO.java
-
将NativeIO.class内容拷贝到NativeIO.java中
-
修改access方法
-
public static boolean access(String path, AccessRight desiredAccess) throws IOException { return true; }
-
再次运行,完美解决。
-
-
2.3 idea提交Job到华为云运行
-
拷贝core-site.xml,hdfs-site.xml到resources下
-
WordCountDriver.java
-
public class WordCountDriver { public static void main(String[] args) throws IOException { // 1. 加载配置文件 Configuration conf = new Configuration(); // 指定MapReduce运行在Yarn上 conf.set("mapreduce.framework.name","local"); // 指定mapreduce可以在远程集群运行 conf.set("mapreduce.app-submission.cross-platform","true"); // datanode使用主机名 conf.set("dfs.client.use.datanode.hostname", "true"); Job job = null; try { job = Job.getInstance(conf, "WordCount"); } catch (IOException e) { e.printStackTrace(); } // 2. 设置主类 job.setJarByClass(WordCountDriver.class); // 3. 设置Map端输出 job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // 4. 设置Reducer端输出 job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); // 5. 设置文件输入输出路径 FileInputFormat.setInputPaths(job, new Path("hdfs://node1:8020/wordcount/input/word.txt")); FileOutputFormat.setOutputPath(job, new Path("hdfs://node1:8020/wordcount/out")); // 6. 关闭程序 try { job.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } }
-
注意:
-
## 服务器设置了主机名的需要增加此配置,避免外网访问内网 conf.set("dfs.client.use.datanode.hostname", "true");
2.4 Jar包上传运行
-
WordCountDriver.java
-
public class WordCountDriver { public static void main(String[] args) throws IOException { // 1. 加载配置文件 Configuration conf = new Configuration(); // 指定MapReduce运行在Yarn上 conf.set("mapreduce.framework.name","local"); // 指定mapreduce可以在远程集群运行 conf.set("mapreduce.app-submission.cross-platform","true"); // datanode使用主机名 conf.set("dfs.client.use.datanode.hostname", "true"); Job job = null; try { job = Job.getInstance(conf, "WordCount"); } catch (IOException e) { e.printStackTrace(); } // 2. 设置主类 job.setJarByClass(WordCountDriver.class); // 3. 设置Map端输出 job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // 4. 设置Reducer端输出 job.setReducerClass(WordCountReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); // 5. 设置文件输入输出路径 FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); // 6. 关闭程序 try { job.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } }