spark onyarn从接口到提交集群运行(中间遇到一些比较恶心的问题)

接口

@RequestMapping(URLPrefix.COMPARE.WORKMODEL_RUN_ALL)
    @ResponseBody
    public Result runAll(Integer id, Integer clusterType, Integer infom) {
//        Integer infom = 0;
        Configuration configuration = new Configuration();

        if (infom != 0){
            SystemParams systemParams = systemParamRepository.findAll().get(0);
            String systemParamsJson = systemParams.getSystemParamsJson();
            JSONArray jsonArray = JSONArray.parseArray(systemParamsJson);

            ///home/lyg/IdeaProjects/dataexa-compare/cp-base/target/classes/
            SystemPath systemPath = new SystemPath();
            String classPath = systemPath.getClassPath();
            String[] split = classPath.split("cp-starter");
            String newClassPath = split[0] + "cp-base/target/";
//            String newClassPath = classPath.substring(0,classPath.length() - 8);

            configuration.setHadoopHome(JSONObject.parseObject(jsonArray.get(0).toString()).get("paramValue").toString());
            configuration.setSparkHome(JSONObject.parseObject(jsonArray.get(1).toString()).get("paramValue").toString());
            configuration.setYarnConfDir(JSONObject.parseObject(jsonArray.get(2).toString()).get("paramValue").toString());
            configuration.setAppResource("hdfs://192.168.1.80:8020/dataexa/compare/cp-base-1.0-SNAPSHOT-compare-core.jar");
            configuration.setMainClass("com.dataexa.cp.base.datasource.DataSourceReader");
            configuration.setDataDir(JSONObject.parseObject(jsonArray.get(3).toString()).get("paramValue").toString());
        }
        /*configuration.setHadoopHome("/opt/software/hadoop-2.6.0-cdh5.10.0");
        configuration.setSparkHome("/opt/software/spark-2.2.0");
        configuration.setYarnConfDir("/home/lyg/software/yarn-conf");
        configuration.setAppResource("/home/lyg/IdeaProjects/dataexa-compare/cp-base/target/cp-base-1.0-SNAPSHOT-compare-core.jar");
        configuration.setMainClass("com.dataexa.cp.base.datasource.DataSourceReader");*/
//        Cluster cluster = new Cluster();
//        cluster.setClusterType(1);
        WorkModel one = workModelService.findOne(id);
        JSONObject jsonObject = JSON.parseObject(one.getDisplay_content());
        DataTableDTO dataTableDTO = workModelService.run(jsonObject, sparkSession,infom,configuration,clusterType,id);
        return RespResult.success(dataTableDTO);

实现类中的run方法

@Service
public class WorkModelServiceImpl extends BaseSimpleService<WorkModel,Integer> implements WorkModelService {

    @Autowired
    DataSourceService dataSourceService;

    @Override
    public DataTableDTO run(JSONObject jsonObject, SparkSession sparkSession, Integer infom, Configuration configuration, Integer clusterType,Integer id) {
        JSONArray edges = jsonObject.getJSONArray("linkDataArray");
        JSONArray vertices = jsonObject.getJSONArray("nodeDataArray");
        AMWGraph amwGraph = new AMWGraph(vertices.size());
        JSONObject[] vertice = new JSONObject[vertices.size()];
        Map<String,Integer> map = new HashMap<>();
        for (int i = 0; i < vertices.size(); i++) {
            vertice[i] = vertices.getJSONObject(i);
            map.put(vertices.getJSONObject(i).getString("key"),i);
        }
        amwGraph.setVertices(vertice);
        for (int i = 0; i < edges.size(); i++) {
            amwGraph.addEdge(map.get(edges.getJSONObject(i).getString("to")),map.get(edges.getJSONObject(i).getString("from")));
        }
        amwGraph.broadFirstSearch();
        List<String> descOperator = amwGraph.operator;
        Map<String, Integer> datasource = amwGraph.datasource;
        Map<String, String> tables = amwGraph.tables;
        Map<String, DataSource> dataSet = new HashMap<>();
        for (String s : datasource.keySet()) {
            dataSet.put(s, dataSourceService.findOne(datasource.get(s)));
        }
        if (infom == 0){//在本地运行
            DataSourceReader dataSourceReader = new DataSourceReader(sparkSession, new DataSourceDTO());
            return dataSourceReader.getResult(descOperator, dataSet, tables,0,0,"");
        }else {//在集群运行
            DataSourceTemp dataSourceTemp = new DataSourceTemp();
            dataSourceTemp.setDescOperator(descOperator);
            dataSourceTemp.setDataSet(dataSet);
            dataSourceTemp.setTables(tables);
            dataSourceTemp.setId(id);
            String str = JSON.toJSONString(dataSourceTemp);
            switch (clusterType){
                case 1://cdh集群运行
                    try {
                        Launcher.launch(configuration,"cluster",str,configuration.getDataDir());
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    break;
                case 2://本地或者单节点运行
                    try {
                        Launcher.launch(configuration,"cluster",str,configuration.getDataDir());//hadoop
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    break;
            }
            return new DataTableDTO();
        }
    }
}

Launcher类

public class Launcher {

    public Launcher() {
    }

    public static boolean launch(Configuration configuration, String... args) throws IOException {
        Map<String, String> env = new HashMap();
        env.put("hadoop.home.dir",configuration.getHadoopHome());
        env.put("HADOOP_HOME", configuration.getHadoopHome());
        env.put("spark.home.dir", configuration.getSparkHome());
        env.put("SPARK_HOME", configuration.getSparkHome());
        env.put("HADOOP_CONF_DIR", configuration.getHadoopHome());
        if (!StringUtils.isBlank(configuration.getYarnConfDir())) {
            env.put("YARN_CONF_DIR", configuration.getYarnConfDir());
        }
        env.put("HADOOP_USER_NAME", "yarn");

        SparkConfig sparkConfig = new SparkConfig();
        sparkConfig.setAppName("DataExa-Compare");
        sparkConfig.setVerbose(true);

        sparkConfig.setMaster("yarn");
        sparkConfig.setDeployMode("cluster"); // client

        sparkConfig.setAppResource(configuration.getAppResource());
        sparkConfig.setMainClass(configuration.getMainClass());
        try {
            SparkLauncher launcher = new SparkLauncher(env);
         /*   if (!Strings.isNullOrEmpty(configuration.getJars())) {
                launcher = launcher.addJar(configuration.getJars());
            }*/

            launcher = launcher.setAppName(sparkConfig.getAppName()).setAppResource(sparkConfig.getAppResource())
                    .setMainClass(sparkConfig.getMainClass())
                    .setMaster(sparkConfig.getMaster())
                    .setVerbose(sparkConfig.getVerbose());
            if (sparkConfig.getDeployMode() != null) {
                launcher = launcher.setDeployMode(sparkConfig.getDeployMode());
            }
            launcher.addAppArgs(DataCompressUtils.serArgs(args));
//            launcher.addAppArgs(DataCompressUtils.serArgs(args));
//            launcher.addFile(Launcher.class.getClassLoader().getResource("hbase-site.xml").getFile());
//            SystemPath systemPath = new SystemPath();
//            String classPath = systemPath.getClassPath();
//            String classes = classPath.split("classes")[0] + "lib/cp-base-1.0-SNAPSHOT.jar/spark-defaults.conf";
//            String string = Launcher.class.getClassLoader().getResource("spark-defaults.conf").toString().substring(9);
//            launcher.setPropertiesFile(classes);
//            launcher.setPropertiesFile(Launcher.class.getClassLoader().getResource("spark-defaults.conf").getFile());
//            launcher.setPropertiesFile("/home/lyg/IdeaProjects/dataexa-compare/cp-base/src/main/resources/spark-defaults.conf");
//            launcher.setPropertiesFile("/home/lyg/IdeaProjects/dataexa-compare/cp-base/target/classes/spark-defaults.conf");
            launcher.setPropertiesFile("/home/lyg/IdeaProjects/dataexa-compare/cp-starter/target/cp-base-1.0-SNAPSHOT.jar/spark-defaults.conf");
            Process process = launcher.launch();
            ProcessStreamHandler handler = (line) -> {
                System.out.println(line);
            };
            ProcessUtil processUtil = new ProcessUtil(handler);
            processUtil.unblock(process);

            if(process.exitValue()!=0) return false;
            return true;
        } catch (Exception e) {
            throw e;
        }

    }

jar包中执行的主类及其他方法

case class DataSourceReader(sparkSession: SparkSession, datasource: DataSourceDTO) {


  def readCsv(rowNum: Int): DataTableDTO = {
    var dataTableDTO = new DataTableDTO
    try {
      val df = sparkSession.read
        .option ( "header", datasource.getCsvBase.getHasHeader )
        .option ( "delimiter", datasource.getCsvBase.getDelimiter )
        .csv ( datasource.getCsvBase.getPath )

      dataTableDTO.setColumnNames ( scala.collection.JavaConversions.seqAsJavaList ( df.columns ) )
      println ( "标题:" + df.columns.mkString ( "," ) )

      var allRow = new util.ArrayList[util.List[AnyRef]]()
      df.take ( rowNum ).foreach ( row => {
        var rowData = new util.ArrayList[AnyRef]()
        df.columns.foreach ( column => {
          rowData.add ( row.getAs ( column ) )
        } )
        allRow.add ( rowData )
      } )
      dataTableDTO.setColumnValues ( allRow )

    } catch {
      case e: Exception => println ( e )
    }
    dataTableDTO
  }

  def getResult(descOperator: util.List[String], dataset: util.Map[String, DataSource], tableNameMap: util.Map[String, String], storage: Int, name: Int, dataDir: String): DataTableDTO = {
    import scala.collection.JavaConverters._
    val operator: List[String] = descOperator.asScala.toList.reverse
    val tableName = tableNameMap.asScala
    val datasets = dataset.asScala
    val dataTableDTO = new DataTableDTO
    var dataFrame: Map[String, DataFrame] = Map ()
    for (elem <- tableName.keySet) {
      if (tableName ( elem ) != null && tableName.contains ( elem )) {
        val dataSource: DataSource = datasets ( elem )
        dataSource.getTypeId match {
          case "jdbc" => {
            val dataBase = JSON.parseObject ( dataSource.getDbJson, classOf [DataBase] )
            dataBase.setDbType("jdbc")
            dataBase.setTableName ( tableName ( elem ) )
            dataFrame += (elem -> DataBaseToDF ( sparkSession ).convert ( dataBase ))
          }
          case "odbc" => {
            val dataBase = JSON.parseObject ( dataSource.getDbJson, classOf [DataBase] )
            dataBase.setDbType("odbc")
            dataBase.setTableName ( tableName ( elem ) )
            dataFrame += (elem -> DataBaseToDF ( sparkSession ).convert ( dataBase ))
          }
          case "hdfs" => {
            val hdfsBase = JSON.parseObject ( dataSource.getDbJson, classOf [HdfsBase] )

            val path = hdfsBase.getPath
            if (!hdfsBase.getFileType && hdfsBase.getSingleHeader == "2")
              hdfsBase.setPath ( s"$path/${tableName ( elem )}" )
            //if (path.substring ( path.lastIndexOf ( "/" ) ).equals ( tableName ( elem ) )) hdfsBase.setPath ( path + "/" + tableName ( elem ) )
            dataFrame += (elem -> HdfsToDF ( sparkSession ).convert ( hdfsBase ))
          }
          case "csv" => {
            val csvBase = JSON.parseObject ( dataSource.getDbJson, classOf [CsvBase] )
            csvBase.getFileType
            val path = csvBase.getPath
            if (!csvBase.getFileType && csvBase.getSingleHeader == "2")
              csvBase.setPath ( s"$path/${tableName ( elem )}" )
            //if (path.substring ( path.lastIndexOf ( "/" ) ).equals ( tableName ( elem ) )) csvBase.setPath ( path + "/" + tableName ( elem ) )
            dataFrame += (elem -> CsvToDF ( sparkSession ).convert ( csvBase ))
          }
          case "hive" => {
            val hiveBase = JSON.parseObject ( dataSource.getDbJson, classOf [HiveBase] )
            hiveBase.setTableName ( tableName ( elem ) )
            dataFrame += (elem -> HiveToDF ( sparkSession ).convert ( hiveBase ))
          }
          case "hbase" => {
            val hbaseBase = JSON.parseObject ( dataSource.getDbJson, classOf [HbaseBase] )
            hbaseBase.setTableName ( tableName ( elem ) )
            val hBaseUtil = new HBaseUtil()
            val columnName: util.List[String] = hBaseUtil.getColumnName(hbaseBase.getZKHost,hbaseBase.getZKPort,hbaseBase.getTableName)
            hbaseBase.setColumns(columnName)
            dataFrame += (elem -> HbaseToDF ( sparkSession ).convert ( hbaseBase ))
          }
        }
      } else {
        dataFrame += (elem -> null)
      }
    }
    if (storage == 0) {
      val result = WorkOperator ( dataFrame, sparkSession ).controller ( operator )
      dataTableDTO.setColumnNames ( scala.collection.JavaConversions.seqAsJavaList ( result.columns ) )
      var allRow = new util.ArrayList[util.List[AnyRef]]()
      result.take ( 10 ).foreach ( row => {
        var rowData = new util.ArrayList[AnyRef]()
        result.columns.foreach ( column => {
          rowData.add ( row.getAs ( column ) )
        } )
        allRow.add ( rowData )
      } )
      dataTableDTO.setColumnValues ( allRow )
      dataTableDTO
    } else {
      val result: DataFrame = WorkOperator ( dataFrame, sparkSession ).controller ( operator )
      //df保存到hdfs
      result.write.mode ( SaveMode.Overwrite ).save ( dataDir + name )
      new DataTableDTO
    }
  }
}

object DataSourceReader {

  def main(args: Array[String]): Unit = {
    val params: Array[String] = DataCompressUtils.desArgs ( args )
    val datasource = new DataSourceDTO
    /* val datasource = new DataSourceDTO();
     datasource.setCnName("csvtest")t
     val csvBase = new CsvBase
     csvBase.setPath("hdfs://dataexa-cdh-80:8022/dataexa/sati/客运出行数据.csv");
     datasource.setCsvBase(csvBase)*/

    val dataSourceReader = params ( 0 ) match {
      case "cluster" => DataSourceReader ( CompareContext.sparkSession, datasource )
      case _ => DataSourceReader ( CompareContext.localSparkSession, datasource )
    }
    val str = params ( 1 )
    val dataDir = params ( 2 )
    val dataSourceTemp = JSON.parseObject ( str, classOf [DataSourceTemp] )
    //    val dataset:util.Map[String,DataSource] = util.Arrays.
    //    val tableNameMap:util.Map[String,String] = util.Arrays.asList(params(3))
    dataSourceReader.getResult ( dataSourceTemp.getDescOperator, dataSourceTemp.getDataSet, dataSourceTemp.getTables, 1, dataSourceTemp.getId, dataDir )

    //    println(dataTableDTO.toString)
    //    dataSourceReader.readCsv(5)
    println ( "运行成功" )

    /*

        val  columnMap = df.columns.map{ case c => (c,s"cp_$c")}.toMap

        var df2 = ColumnOperator(df).columnMapping(columnMap)
        df2.show(3)
    */
  }

}

问题一:spark配置文件无效
Exception in thread “main” java.lang.IllegalArgumentException: Invalid properties file ‘/home/lyg/IdeaProjects/dataexa-compare/cp-starter/target/cp-base-1.0-SNAPSHOT.jar/spark-defaults.conf’.
在这里插入图片描述
原因分析,部署到服务器以后直接获取jar包中的配置文件应该是获取不到,所以一直报这个错误,在本地测试的时候直接设置jar包中的配置文件报同样的错误。。。(通过设置系统配置文件的方式解决的读取此配置失败的问题)
问题二:待后续补充

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值