<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-mapreduce</artifactId>
<version>1.1.0</version>
</dependency>
SparkConf sparkConf = new SparkConf();
sparkConf.setAppName("spark-orc");
sparkConf.set("fs.hdfs.impl",org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
JavaSparkContext sc = new JavaSparkContext(sparkConf);
JavaPairRDD<LongWritable, OrcStruct> rdd =sc.hadoopFile(orcPath,rcInputFormat.class,LongWritable.class, OrcStruct.class);
也可以使用SparkSession来读orc
val sqlContext = SparkSession.builder().config(sparkConf).getOrCreate()
sqlContext.read.orc(orcPath)