SparkSQL官网Java示例ERROR CodeGenerator: failed to compile问题

33 篇文章 0 订阅

Table of Contents

 

问题代码

定位问题

没完呢?

定位问题

正确的代码

总结


问题代码

SparkSessionJavaTest.java

package sparkSQL.apachedemo;


import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.expressions.MutableAggregationBuffer;
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.TypedColumn;



import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.apache.spark.sql.functions.col;

/**
 * @Classname SparkSessionJavaTest
 * @Date 2019/11/14 下午7:00
 * @Auther hadoop
 * @Description:
 * SparkSQL Java版本
 */

public class SparkSessionJavaTest {
    public static void main(String[] args){
        Logger.getLogger("org").setLevel(Level.INFO);
        SparkConf conf = new SparkConf()
                .setAppName("SparkSessionJavaTest")
                .setMaster("local[2]");
        SparkSession spark = SparkSession
                .builder()
                .config(conf)
//                .enableHiveSupport()
                .getOrCreate();
        String filePath = "file:/usr/local/spark/examples/src/main/resources/";

        typeSafeUserDefinedAggregateFunction(spark,filePath);
        spark.stop();
    }

    /**
     * 集合操作
     *自定义安全类型
     * @param spark
     * @param filePath
     */
    private static void typeSafeUserDefinedAggregateFunction(SparkSession spark,String filePath){
        Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class);
        String path = filePath+ "employees.json";
        Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder);
        ds.show();

        MyAverage2 myAverage = new MyAverage2();
	// Convert the function to a `TypedColumn` and give it a name
        TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary");
        Dataset<Double> result = ds.select(averageSalary);
        result.show();

    }

 /**
     * Employee内部类
     */
    public static class Employee implements Serializable{
        private String name;
        private long salary;

        public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

        public String getName() {
            return name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public long getSalary() {
            return salary;
        }

        public void setSalary(long salary) {
            this.salary = salary;
        }
    }


    /**
     * Average 内部类
     */
    public static class  Average implements Serializable{
        private long sum;
        private long count;

        public Average(long sum, long count) {
            this.sum = sum;
            this.count = count;
        }

        public long getSum() {
            return sum;
        }

        public void setSum(long sum) {
            this.sum = sum;
        }

        public long getCount() {
            return count;
        }

        public void setCount(long count) {
            this.count = count;
        }
    }

    public static class MyAverage2 extends Aggregator<Employee,Average,Double> {
        //A zero value for this aggregation.Should satisfy the property taht any b + zero = b
        public Average zero(){
            return new Average(0L,0L);
        }
        //Combine tow values to produce a new value. For performance,the function may modify 'buffer'
        //and return it instead of constructing a new object
        public Average reduce(Average buffer,Employee employee){
            long newSum = buffer.getSum() + employee.getSalary();
            long newCount = buffer.getCount() + 1;
            buffer.setSum(newSum);
            buffer.setCount(newCount);
            return buffer;
        }
        //Merge tow intermediate values
        public Average merge(Average b1,Average b2){
            long mergeSum = b1.getSum() + b2.getSum();
            long mergeCount = b1.getCount() + b2.getCount();
            b1.setSum(mergeSum);
            b1.setCount(mergeCount);
            return b1;
        }
        //Transform the output of the reduction
        public Double finish(Average reduction){
            return ((double)reduction.getSum()) / reduction.getCount();

        }
        //Specifies the Encoder for the intermediate value type
        public Encoder<Average> bufferEncoder(){
            return Encoders.bean(Average.class);
        }
        //Specifies the Encoder for the final output value type
        public Encoder<Double> outputEncoder(){
            return Encoders.DOUBLE();
        }

    }
}

问题:

在学习Spark SQL官网Type-Safe User-Defined Aggregate Functions示例的时候,遇到问题如下:

19/11/15 14:26:36 ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Employee(java.lang.String, long)"
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Employee(java.lang.String, long)"
 .......(省略)
19/11/15 14:26:36 INFO CodeGenerator: 
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */   return new SpecificSafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 006 */
/* 007 */   private Object[] references;
/* 008 */   private InternalRow mutableRow;
/* 009 */
/* 010 */
/* 011 */   public SpecificSafeProjection(Object[] references) {
/* 012 */     this.references = references;
/* 013 */     mutableRow = (InternalRow) references[references.length - 1];
/* 014 */
/* 015 */   }
/* 016 */
/* 017 */   public void initialize(int partitionIndex) {
/* 018 */
/* 019 */   }
/* 020 */
/* 021 */   public java.lang.Object apply(java.lang.Object _i) {
/* 022 */     InternalRow i = (InternalRow) _i;
/* 023 */
/* 024 */     sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_6 = InitializeJavaBean_0(i);
/* 025 */     if (false) {
/* 026 */       mutableRow.setNullAt(0);
/* 027 */     } else {
/* 028 */
/* 029 */       mutableRow.update(0, value_6);
/* 030 */     }
/* 031 */
/* 032 */     return mutableRow;
/* 033 */   }
/* 034 */
/* 035 */
/* 036 */   private sparkSQL.apachedemo.SparkSessionJavaTest$Employee InitializeJavaBean_0(InternalRow i) {
/* 037 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Employee();
/* 038 */     sparkSQL.apachedemo.SparkSessionJavaTest$Employee javaBean_0 = value_1;
/* 039 */     if (!false) {
/* 040 */
/* 041 */
/* 042 */       boolean isNull_3 = i.isNullAt(0);
/* 043 */       UTF8String value_3 = isNull_3 ? null : (i.getUTF8String(0));
/* 044 */       boolean isNull_2 = true;
/* 045 */       java.lang.String value_2 = null;
/* 046 */       if (!isNull_3) {
/* 047 */
/* 048 */         isNull_2 = false;
/* 049 */         if (!isNull_2) {
/* 050 */
/* 051 */           Object funcResult_0 = null;
/* 052 */           funcResult_0 = value_3.toString();
/* 053 */
/* 054 */           if (funcResult_0 != null) {
/* 055 */             value_2 = (java.lang.String) funcResult_0;
/* 056 */           } else {
/* 057 */             isNull_2 = true;
/* 058 */           }
/* 059 */
/* 060 */
/* 061 */         }
/* 062 */       }
/* 063 */       javaBean_0.setName(value_2);
/* 064 */
/* 065 */
/* 066 */       boolean isNull_5 = i.isNullAt(1);
/* 067 */       long value_5 = isNull_5 ? -1L : (i.getLong(1));
/* 068 */
/* 069 */       if (isNull_5) {
/* 070 */         throw new NullPointerException(((java.lang.String) references[0] /* errMsg */));
/* 071 */       }
/* 072 */       javaBean_0.setSalary(value_5);
/* 073 */
/* 074 */     }
/* 075 */
/* 076 */     return value_1;
/* 077 */   }
/* 078 */
/* 079 */ }

19/11/15 14:26:37 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 2)
java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 37, Column 85: No applicable constructor/method found for zero actual parameters; candidates are: 

...(省略)

定位问题

File 'generated.java', Line 37, Column 85: failed to compile

/* 037 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Employee value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Employee();

 可以看到,代码运行的时候调用Employees.class类中的无参构造器,而代码中使用是有参构造器。

  public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

将这个构造器注释掉,使用默认构造器,这个问题就消失了。

没完呢?

但是,还有一个Average.class类也定义了有参构造器,会不会也有这个问题,也顺便检验一下上面的分析是否正确?

(啊哈,原来的异常已经消失了,但是抛出了新的异常。)

19/11/15 14:50:06 ERROR CodeGenerator: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"
org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"

 .......(省略)

19/11/15 14:50:06 INFO CodeGenerator: 
/* 001 */ public java.lang.Object generate(Object[] references) {
/* 002 */   return new SpecificSafeProjection(references);
/* 003 */ }
/* 004 */
/* 005 */ class SpecificSafeProjection extends org.apache.spark.sql.catalyst.expressions.codegen.BaseProjection {
/* 006 */
/* 007 */   private Object[] references;
/* 008 */   private InternalRow mutableRow;
/* 009 */
/* 010 */
/* 011 */   public SpecificSafeProjection(Object[] references) {
/* 012 */     this.references = references;
/* 013 */     mutableRow = (InternalRow) references[references.length - 1];
/* 014 */
/* 015 */   }
/* 016 */
/* 017 */   public void initialize(int partitionIndex) {
/* 018 */
/* 019 */   }
/* 020 */
/* 021 */   public java.lang.Object apply(java.lang.Object _i) {
/* 022 */     InternalRow i = (InternalRow) _i;
/* 023 */
/* 024 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Average value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Average();
/* 025 */     sparkSQL.apachedemo.SparkSessionJavaTest$Average javaBean_0 = value_1;
/* 026 */     if (!false) {
/* 027 */
/* 028 */
/* 029 */       long value_3 = i.getLong(0);
/* 030 */
/* 031 */       if (false) {
/* 032 */         throw new NullPointerException(((java.lang.String) references[0] /* errMsg */));
/* 033 */       }
/* 034 */       javaBean_0.setCount(value_3);
/* 035 */
/* 036 */
/* 037 */       long value_5 = i.getLong(1);
/* 038 */
/* 039 */       if (false) {
/* 040 */         throw new NullPointerException(((java.lang.String) references[1] /* errMsg */));
/* 041 */       }
/* 042 */       javaBean_0.setSum(value_5);
/* 043 */
/* 044 */     }
/* 045 */     if (false) {
/* 046 */       mutableRow.setNullAt(0);
/* 047 */     } else {
/* 048 */
/* 049 */       mutableRow.update(0, value_1);
/* 050 */     }
/* 051 */
/* 052 */     return mutableRow;
/* 053 */   }
/* 054 */
/* 055 */
/* 056 */ }

19/11/15 14:50:06 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 3)
java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 24, Column 84: No applicable constructor/method found for zero actual parameters; candidates are: "sparkSQL.apachedemo.SparkSessionJavaTest$Average(long, long)"

...(省略)

定位问题

File 'generated.java', Line 24, Column 84: failed to compile

/* 024 */     final sparkSQL.apachedemo.SparkSessionJavaTest$Average value_1 = false ? null : new sparkSQL.apachedemo.SparkSessionJavaTest$Average();

确实,是因为代码在编译的时候使用的是无参构造器,而代码中已经指定了有参构造器,导致有异常抛出。 

将有参构造器注释掉,使用默认的构造器

  public Average(long sum, long count) {
            this.sum = sum;
            this.count = count;
        }

并将代码中的zero函数进行修改。

        public Average zero(){
            Average average = new Average();
            average.setSum(0L);
            average.setCount(0L);
            return average;
        }

这样就问题就能完整解决。

正确的代码

SparkSessionJavaTest.java

package sparkSQL.apachedemo;


import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.*;
import org.apache.spark.sql.expressions.Aggregator;
import org.apache.spark.sql.expressions.MutableAggregationBuffer;
import org.apache.spark.sql.expressions.UserDefinedAggregateFunction;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.TypedColumn;



import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.apache.spark.sql.functions.col;

/**
 * @Classname SparkSessionJavaTest
 * @Date 2019/11/14 下午7:00
 * @Auther hadoop
 * @Description:
 * SparkSQL Java版本
 */

public class SparkSessionJavaTest {
    public static void main(String[] args){
        Logger.getLogger("org").setLevel(Level.INFO);
        SparkConf conf = new SparkConf()
                .setAppName("SparkSessionJavaTest")
                .setMaster("local[2]");
        SparkSession spark = SparkSession
                .builder()
                .config(conf)
//                .enableHiveSupport()
                .getOrCreate();
        String filePath = "file:/usr/local/spark/examples/src/main/resources/";

        typeSafeUserDefinedAggregateFunction(spark,filePath);
        spark.stop();
    }

    /**
     * 集合操作
     *自定义安全类型
     * @param spark
     * @param filePath
     */
    private static void typeSafeUserDefinedAggregateFunction(SparkSession spark,String filePath){
        Encoder<Employee> employeeEncoder = Encoders.bean(Employee.class);
        String path = filePath+ "employees.json";
        Dataset<Employee> ds = spark.read().json(path).as(employeeEncoder);
        ds.show();

        MyAverage2 myAverage = new MyAverage2();
	// Convert the function to a `TypedColumn` and give it a name
        TypedColumn<Employee, Double> averageSalary = myAverage.toColumn().name("average_salary");
        Dataset<Double> result = ds.select(averageSalary);
        result.show();

    }

 /**
     * Employee内部类
     */
    public static class Employee implements Serializable{
        private String name;
        private long salary;

        public Employee(String name, long salary) {
            this.name = name;
            this.salary = salary;
        }

        public String getName() {
            return name;
        }

        public void setName(String name) {
            this.name = name;
        }

        public long getSalary() {
            return salary;
        }

        public void setSalary(long salary) {
            this.salary = salary;
        }
    }


    /**
     * Average 内部类
     */
    public static class  Average implements Serializable{
        private long sum;
        private long count;

        // public Average(long sum, long count) {
        //     this.sum = sum;
        //     this.count = count;
        // }

        public long getSum() {
            return sum;
        }

        public void setSum(long sum) {
            this.sum = sum;
        }

        public long getCount() {
            return count;
        }

        public void setCount(long count) {
            this.count = count;
        }
    }

    public static class MyAverage2 extends Aggregator<Employee,Average,Double> {
        //A zero value for this aggregation.Should satisfy the property taht any b + zero = b
        public Average zero(){
            Average average = new Average();
            average.setSum(0L);
            average.setCount(0L);
            return average;
        }
        //Combine tow values to produce a new value. For performance,the function may modify 'buffer'
        //and return it instead of constructing a new object
        public Average reduce(Average buffer,Employee employee){
            long newSum = buffer.getSum() + employee.getSalary();
            long newCount = buffer.getCount() + 1;
            buffer.setSum(newSum);
            buffer.setCount(newCount);
            return buffer;
        }
        //Merge tow intermediate values
        public Average merge(Average b1,Average b2){
            long mergeSum = b1.getSum() + b2.getSum();
            long mergeCount = b1.getCount() + b2.getCount();
            b1.setSum(mergeSum);
            b1.setCount(mergeCount);
            return b1;
        }
        //Transform the output of the reduction
        public Double finish(Average reduction){
            return ((double)reduction.getSum()) / reduction.getCount();

        }
        //Specifies the Encoder for the intermediate value type
        public Encoder<Average> bufferEncoder(){
            return Encoders.bean(Average.class);
        }
        //Specifies the Encoder for the final output value type
        public Encoder<Double> outputEncoder(){
            return Encoders.DOUBLE();
        }

    }
}

总结

这样的问题也是第一次遇到,在自己慢慢摸索中将问题定位出来,为什么自己会走那么长的弯路,发现自己将代码中的日志级别设置为Logger.getLogger("org").setLevel(Level.ERROR),根本就没有发现具体的问题所在。将日志级别设置为Logger.getLogger("org").setLevel(Level.INFO)后就可以看到全部日志,也容易发现问题,在以后的时间中也要多注意这样的问题。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值