Windows下调式Hadoop

本文详细介绍了如何在Windows环境下配置Hadoop,包括设置环境变量、拷贝winutils.exe和hadoop.dll到系统目录,以及解决IDEA中运行MapReduce程序遇到的NativeIO错误。通过创建并编辑NativeIO.java文件,成功本地运行WordCount示例。此外,还展示了如何将Job提交到华为云运行,并提供了相关配置参数。
摘要由CSDN通过智能技术生成

Windows下调式Hadoop

一、Hadoop在Windows下的环境设置
  • [winutils-master下载链接](拌面一碗大 (thsjl.com))

  • window系统的环境变量配置

    • 下载网络上的依赖包winutils-master
      • 例如:F:\soft-env\05-hadoop-3.1.0\bin
    • 解压winutils-master.zip,找到3.1.0版本
      • 将所有的文件覆盖到F:\soft-env\05-hadoop-3.1.0\bin目录
    • 将winutils.exe和hadoop.dll文件拷贝到 C:\Windows\System32目录下
  • 环境变量

    • 将Hadoop添加到环境变量
      • HADOOP_HOME–>F:\soft-env\05-hadoop-3.1.0
      • HADOOP_USER_NAME–>longlong | root (core-site.xml)
      • Path --> %HADOOP_HOME%\bin;%HADOOP_HOME%\sbin;
  • 重启电脑

二、 idea下运行MR程序
2.1 本地运行Job
  • pom.xml 文件

  • <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <!-- Hadoop版本控制 -->
        <hadoop.version>3.1.2</hadoop.version>
      </properties>
    
      <dependencies>
        <dependency>
          <groupId>org.apache.logging.log4j</groupId>
          <artifactId>log4j-slf4j-impl</artifactId>
          <version>2.12.0</version>
        </dependency>
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-client</artifactId>
          <version>${hadoop.version}</version>
        </dependency>
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-hdfs</artifactId>
          <version>${hadoop.version}</version>
        </dependency>
        <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-auth</artifactId>
          <version>${hadoop.version}</version>
        </dependency>
      </dependencies>
      <build>
        <plugins>
          <plugin>
            <artifactId>maven-compiler-plugin</artifactId>
            <version>3.6.1</version>
            <configuration>
              <source>1.8</source>
              <target>1.8</target>
            </configuration>
          </plugin>
          <plugin>
            <artifactId>maven-assembly-plugin</artifactId>
            <configuration>
              <descriptorRefs>
                <descriptorRef>jar-with-dependencies</descriptorRef>
              </descriptorRefs>
            </configuration>
            <executions>
              <execution>
                <id>make-assembly</id>
                <phase>package</phase>
                <goals>
                  <goal>single</goal>
                </goals>
              </execution>
            </executions>
          </plugin>
        </plugins>
      </build>
    
  • WordCountMapper.java

  • public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    
        private Text outKey = new Text();
        private IntWritable outValue = new IntWritable(1);
    
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            // 1. 获取当前行数据
            String line = value.toString();
            // 2. 切分
            String[] words = line.split(" ");
            // 3. 转换写出
            for (String word : words) {
                outKey.set(word);
                context.write(outKey, outValue);
            }
        }
    }
    
  • WordCountReducer.java

  • public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    
        private IntWritable outValue = new IntWritable();
    
        // private int total = 0;
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> words, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int total = 0;
            // 1. 求和
            for (IntWritable word : words) {
                total += word.get();
            }
            outValue.set(total);
            // 2. 输出
            context.write(key, outValue);
        }
    
    }
    
  • WordCountDriver.java

  • public class WordCountDriver {
        public static void main(String[] args) throws IOException {
    
            // 1. 加载配置文件
            Configuration conf = new Configuration();
    
            Job job = null;
            try {
                job = Job.getInstance(conf, "WordCount");
            } catch (IOException e) {
                e.printStackTrace();
            }
    
            // 2. 设置主类
            job.setJarByClass(WordCountDriver.class);
    
            // 3. 设置Map端输出
            job.setMapperClass(WordCountMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
    
            // 4. 设置Reducer端输出
            job.setReducerClass(WordCountReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            job.setNumReduceTasks(1);
    
            // 5. 设置文件输入输出路径
    
            FileInputFormat.setInputPaths(job, new Path("D:\\BigDataProjects\\data\\wordcount\\input\\word.txt"));
            FileOutputFormat.setOutputPath(job, new Path("D:\\BigDataProjects\\data\\wordcount\\out"));
    
           // 6. 关闭程序
            try {
                job.waitForCompletion(true);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            } catch (ClassNotFoundException e) {
                e.printStackTrace();
            }
        }
    
    }
    
2.2 无法运行
  • Error:NativeIO…

    • 检查环境变量
    • 重启电脑
  • 无法解决

    • 在本地创建NativeIO.java

    • 代码太长。。。。
      
    • 解决过程

      • idea开启全项目查找类 NativeIO.class

      • 创建包 org.apache.hadoop.io.nativeio

      • 创建NativeIO.java

      • 将NativeIO.class内容拷贝到NativeIO.java中

      • 修改access方法

      •  public static boolean access(String path, AccessRight desiredAccess)
                        throws IOException {
                    return true;
                }
        
      • 再次运行,完美解决。

2.3 idea提交Job到华为云运行
  • 拷贝core-site.xml,hdfs-site.xml到resources下

  • WordCountDriver.java

  • public class WordCountDriver {
        public static void main(String[] args) throws IOException {
    
            // 1. 加载配置文件
            Configuration conf = new Configuration();
    
            // 指定MapReduce运行在Yarn上
            conf.set("mapreduce.framework.name","local");
            // 指定mapreduce可以在远程集群运行
            conf.set("mapreduce.app-submission.cross-platform","true");
            // datanode使用主机名
            conf.set("dfs.client.use.datanode.hostname", "true");
    
            Job job = null;
            try {
                job = Job.getInstance(conf, "WordCount");
            } catch (IOException e) {
                e.printStackTrace();
            }
    
            // 2. 设置主类
            job.setJarByClass(WordCountDriver.class);
    
            // 3. 设置Map端输出
            job.setMapperClass(WordCountMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
    
            // 4. 设置Reducer端输出
            job.setReducerClass(WordCountReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            job.setNumReduceTasks(1);
    
            // 5. 设置文件输入输出路径
    
            FileInputFormat.setInputPaths(job, new Path("hdfs://node1:8020/wordcount/input/word.txt"));
            FileOutputFormat.setOutputPath(job, new Path("hdfs://node1:8020/wordcount/out"));
                    // 6. 关闭程序
            try {
                job.waitForCompletion(true);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            } catch (ClassNotFoundException e) {
                e.printStackTrace();
            }
        }
    
    }
    
  • 注意:

  • ## 服务器设置了主机名的需要增加此配置,避免外网访问内网
    conf.set("dfs.client.use.datanode.hostname", "true");
    
2.4 Jar包上传运行
  • WordCountDriver.java

  • public class WordCountDriver {
        public static void main(String[] args) throws IOException {
    
            // 1. 加载配置文件
            Configuration conf = new Configuration();
    
            // 指定MapReduce运行在Yarn上
            conf.set("mapreduce.framework.name","local");
            // 指定mapreduce可以在远程集群运行
            conf.set("mapreduce.app-submission.cross-platform","true");
            // datanode使用主机名
            conf.set("dfs.client.use.datanode.hostname", "true");
    
            Job job = null;
            try {
                job = Job.getInstance(conf, "WordCount");
            } catch (IOException e) {
                e.printStackTrace();
            }
    
            // 2. 设置主类
            job.setJarByClass(WordCountDriver.class);
    
            // 3. 设置Map端输出
            job.setMapperClass(WordCountMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
    
            // 4. 设置Reducer端输出
            job.setReducerClass(WordCountReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            job.setNumReduceTasks(1);
    
            // 5. 设置文件输入输出路径
    
            FileInputFormat.setInputPaths(job, args[0]);
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
            // 6. 关闭程序
            try {
                job.waitForCompletion(true);
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            } catch (ClassNotFoundException e) {
                e.printStackTrace();
            }
        }
    
    }
    
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值