本地spark读取阿里云oss
1:pom文件导入依赖
<!-- oss sdk依赖 支持-->
<dependency>
<groupId>com.aliyun.oss</groupId>
<artifactId>aliyun-sdk-oss</artifactId>
<version>3.15.1</version>
</dependency>
<!--支持OSS数据源 -->
<dependency>
<groupId>com.aliyun.emr</groupId>
<artifactId>emr-core</artifactId>
<version>1.6.0</version>
<exclusions>
<exclusion>
<artifactId>httpclient</artifactId>
<groupId>org.apache.httpcomponents</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.4.1</version>
<exclusions>
<exclusion>
<artifactId>httpcore</artifactId>
<groupId>org.apache.httpcomponents</groupId>
</exclusion>
</exclusions>
</dependency>
2:读取代码示例
val conf: SparkConf = new SparkConf().setAppName("dataArchiveOss")
conf.set("spark.driver.maxResultSize","2g")
conf.set("spark.hadoop.fs.oss.impl", "com.aliyun.fs.oss.nat.NativeOssFileSystem")
conf.set("spark.hadoop.mapreduce.job.run-local", "true")
conf.set("spark.hadoop.fs.oss.accessKeyId", accessKeyId)
conf.set("spark.hadoop.fs.oss.accessKeySecret", accessKeySecret)
val spark = SparkSession
.builder()
.config(conf)
.master("local[*]") // 设置本地运行模式
.getOrCreate()
import spark.implicits._
spark
.read
.parquet("oss://bucket.enpoint/读取路径")
.show(20,false)