Macbook Intellij idea与Eclipse远程调试Hadoop应用程序

最新推荐文章于 2024-07-05 17:00:12 发布

影夜life

最新推荐文章于 2024-07-05 17:00:12 发布

阅读量4.8k

点赞数

分类专栏： hadoop 大数据 mac 文章标签： hadoop intellij idea

本文链接：https://blog.csdn.net/u013980127/article/details/52118528

版权

hadoop 同时被 3 个专栏收录

20 篇文章 2 订阅

订阅专栏

大数据

4 篇文章 1 订阅

订阅专栏

mac

4 篇文章 0 订阅

订阅专栏

Intellij idea

准备工作

在虚拟机安装hadoop集群
开发机配置
（1）idean版本15.0.4
（2）jdk版本1.7.0_71
（3）Mac OS X 10.11.6
（4）hadoop安装（hadoop-2.5.2.tar.gz解压）
/Users/zhangws/opt/hadoop-2.5.2
配置环境变量

HADOOP_HOME=/Users/zhangws/opt/hadoop-2.5.2
HADOOP_BIN_PATH=%HADOOP_HOME%\bin
HADOOP_PREFIX=/Users/zhangws/opt/hadoop-2.5.2

另外，PATH变量在最后追加;%HADOOP_HOME%\bin

创建工程

这里写图片描述

pom.xml文件内容

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>com.zw</groupId>
  <artifactId>hadoop-demo</artifactId>
  <version>1.0-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>hadoop-demo</name>
  <url>http://maven.apache.org</url>

  <properties>
      <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
      <hadoop.version>2.5.2</hadoop.version>
  </properties>

  <dependencies>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
      <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-hdfs</artifactId>
          <version>${hadoop.version}</version>
      </dependency>

      <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-client</artifactId>
          <version>${hadoop.version}</version>
      </dependency>

    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
  </dependencies>
</project>

core-site.xml配置（resources目录）

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://master:9000</value>
    </property>
</configuration>

注：value指定hadoop的地址（虚拟机）

设置运行参数

这里写图片描述

Working directory是本地hadoop的home路径；
Program arguments的内容如下：

这里写图片描述

hdfs://master:9000/weekone/words/input/test.txt
hdfs://master:9000/weekone/words/output
分别为输入参数和输出参数。

注：
如果input/test.txt文件没有，请先手动上传；
/output/ 必须是不存在的，否则程序运行到最后，发现目标目录存在，也会报错；

按照上面步骤就可以在适当的位置打断点，调试了。

示例

package com.zw.mr.demo;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;

/**
 * 这是统计单词个数的例子
 *
 * Created by zhangws on 16/7/31.
 */
public class WordsCount {
    public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            String[] strings = value.toString().split(" ");
            for (String s : strings) {
                //将文本行放入key
                context.write(new Text(s), new IntWritable(1));
            }
        }
    }

    public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        public void reduce(Text key, Iterable<IntWritable> values, Context context)
                throws IOException, InterruptedException {

            int count = 0;
            for (IntWritable v : values) {
                count += v.get();
            }
            //输出key
            context.write(key, new IntWritable(count));
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        if (otherArgs.length < 2) {
            System.err.println("Usage: wordcount <in> [<in>...] <out>");
            System.exit(2);
        }

        //先删除output目录
        rmr(conf, otherArgs[otherArgs.length - 1]);

        Job job = Job.getInstance(conf, "WordsCount");
        job.setJarByClass(WordsCount.class);

        job.setMapperClass(MyMapper.class);
        job.setCombinerClass(MyReducer.class);
        job.setReducerClass(MyReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        if (job.waitForCompletion(true)) {
            cat(conf, otherArgs[1] + "/part-r-00000");
            System.out.println("success");
        } else {
            System.out.println("fail");
        }
    }

    /**
     * 删除指定目录
     *
     * @param conf
     * @param dirPath
     *
     * @throws IOException
     */
    private static void rmr(Configuration conf, String dirPath) throws IOException {
        boolean delResult = false;
//        FileSystem fs = FileSystem.get(conf);
        Path targetPath = new Path(dirPath);
        FileSystem fs = targetPath.getFileSystem(conf);
        if (fs.exists(targetPath)) {
            delResult = fs.delete(targetPath, true);
            if (delResult) {
                System.out.println(targetPath + " has been deleted sucessfullly.");
            } else {
                System.out.println(targetPath + " deletion failed.");
            }
        }
        return delResult;
    }

    /**
     * 输出指定文件内容
     *
     * @param conf     HDFS配置
     * @param filePath 文件路径
     *
     * @return 文件内容
     *
     * @throws IOException
     */
    public static void cat(Configuration conf, String filePath) throws IOException {

//        FileSystem fileSystem = FileSystem.get(conf);
        InputStream in = null;
        Path file = new Path(filePath);
        FileSystem fileSystem = file.getFileSystem(conf);
        try {
            in = fileSystem.open(file);
            IOUtils.copyBytes(in, System.out, 4096, true);
        } finally {
            if (in != null) {
                IOUtils.closeStream(in);
            }
        }
    }
}

日志文件

这里写图片描述

log4j.rootLogger=INFO, stdout

#log4j.logger.org.springframework=INFO
#log4j.logger.org.apache.activemq=INFO
#log4j.logger.org.apache.activemq.spring=WARN
#log4j.logger.org.apache.activemq.store.journal=INFO
#log4j.logger.org.activeio.journal=INFO

log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} | %-5.5p | %-16.16t | %-32.32c{1} | %-32.32C %4L | %m%n

权限设置

由于客户端与服务器的权限问题，对输入目录等需要赋予授权
hdfs dfs -chomod 777 test/

或者hdfs-site.xml里添加

<property>
    <name>dfs.permissions</name>
    <value>false</value>
</property>

运行结果

这里写图片描述

可能会遇到下面问题（本人是用自编译的hadoop-2.6.4遇到的）
java.io.IOException: No FileSystem for scheme: hdfs

这里写图片描述

Eclipse

准备工作

在虚拟机安装hadoop集群
开发机配置
（1）Eclipse Version: Mars.2 Release (4.5.2)
（2）jdk版本1.7.0_71
（3）Mac OS X 10.11.6
（4）hadoop安装（hadoop-2.5.2.tar.gz解压）
/Users/zhangws/opt/hadoop-2.5.2
环境变量同上
安装插件
https://github.com/winghc/hadoop2x-eclipse-plugin
下载hadoop-eclipse-plugin-2.6.0.jar，放入eclipse的plugins目录，启动eclipse