1、编译
github地址:https://github.com/apache/incubator-hudi
cd incubator-hudi-hoodie-0.4.7
mvn clean install -DskipITs -DskipTests -Dhadoop.version=2.6.0-cdh5.13.0 -Dhive.version=1.1.0-cdh5.13.0
2、快速开始
1、新建项目
新建maven项目,并加入scala框架,然后依次加入spark、hudi依赖
<properties>
<scala.version>2.11</scala.version>
<spark.version>2.4.0</spark.version>
<parquet.version>1.10.1</parquet.version>
<parquet-format-structures.version>1.10.1-palantir.3-2-gda7f810</parquet-format-structures.version>
<hudi.version>0.4.7</hudi.version>
</properties>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${
scala.version}</artifactId>
<version>${
spark.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${
scala.version}</artifactId>
<version>${
spark.version}</version>
<exclusions>
<exclusion>
<artifactId>parquet-column</artifactId>
<groupId>org.apache.parquet</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${
scala.version}</artifactId>
<version>${
spark.version}</version