作为开发人员,我们可以暂时忽略集群等部署环境,首要关注开发环境。本文介绍一种可在IDE上运行\调试MapReduce程序的方法,方便程序员尽快开始大数据MapReduce编程。
maven依赖
按规范新建maven项目,下面是我的pom:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.lanqiao</groupId>
<artifactId>bigData</artifactId>
<version>1.0</version>
<packaging>jar</packaging>
<properties>
<!--logger-->
<slf4j-api.version>1.7.25</slf4j-api.version>
<logback.version>1.2.3</logback.version>
<java.version>1.8</java.version>
<!--hadoop-core-->
<hadoop-core.version>1.2.1</hadoop-core.version>
<hadoop.version>2.6.5</hadoop.version>
<junit.version>4.12</junit.version>
</properties>
<dependencies>
<!--logger-->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>${slf4j-api.version}</version>
</dependency>
<!-- Hadoop main client artifact -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- Unit test artifacts -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>3.6.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.mrunit</groupId>
<artifactId>mrunit</artifactId>
<version>1.1.0</version>
<classifier>hadoop2</classifier>
<scope>test</scope>
</dependency>
<!-- Hadoop test artifact for running mini clusters -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>