IDEA 运行spark程序:
package sparkcore import org.apache.spark.{SparkConf, SparkContext} object TextFileTest { def main(args: Array[String]): Unit = { //创建 sparkContext System.setProperty("hadoop.home.dir","C:\\hadoop\\hadoop-3.1.3\\hadoop-3.1.3") val conf: SparkConf = new SparkConf().setAppName("sparkStream").setMaster("local[*]") val sc = new SparkContext(conf) //读取文件并过滤空行 val lines = sc.textFile("file:///C:/KuGou/com_program_learn/02java/IdeaProjects/JavaSpark/src/data/input/rng_comment.txt").filter(_.trim != "") lines.foreach( x => { System.out.println(x.toString) } ) } }
报以下错误:
ERROR GPLNativeCodeLoader: Could not load native gpl library
java.lang.UnsatisfiedLinkError: no gplcompression in java.library.path
at java.lang.ClassLoader.loadLibrary(ClassLoader.java:1738)
at java.lang.Runtime.loadLibrary0(Runtime.java:823)
at java.lang.System.loadLibrary(System.java:1028)
at com.hadoop.compression.lzo.GPLNativeCodeLoader.<clinit>(GPLNativeCodeLoader.java:32)
at com.hadoop.compression.lzo.LzoCodec.<clinit>(LzoCodec.java:71)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:247)
at org.apache.hadoop.conf.Configuration.getClassByNameOrNull(Configuration.java:1659)
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:1624)
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:128)
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:175)
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:106)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:75)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:133)
at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:155)
at org.apache.spark.rdd.HadoopRDD$$anon$1.<init>(HadoopRDD.scala:187)
at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:181)
at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:93)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.FilteredRDD.compute(FilteredRDD.scala:34)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.rdd.FilteredRDD.compute(FilteredRDD.scala:34)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111)
at org.apache.spark.scheduler.Task.run(Task.scala:51)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187)
at java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
at java.lang.Thread.run(Thread.java:662)
14/08/06 20:32:11 ERROR LzoCodec: Cannot load native-lzo without native-hadoop
需要增加windows下hadoop对lzo的支持
<dependency> <groupId>com.hadoop.compression</groupId> <artifactId>com.hadoop.compression</artifactId> <version>1.0</version> <scope>system</scope> <systemPath>${project.basedir}/src/main/resources/lib/hadoop-lzo-0.4.20.jar</systemPath> </dependency>
将jar包放入:类路径下resources/lib
lzo2.dll文件和gplcompression.dll文件,放到system32就行了
链接:https://pan.baidu.com/s/1_pI-7RDm66AYPhxIhb4p5w
提取码:aitc