接口
@RequestMapping(URLPrefix.COMPARE.WORKMODEL_RUN_ALL)
@ResponseBody
public Result runAll(Integer id, Integer clusterType, Integer infom) {
// Integer infom = 0;
Configuration configuration = new Configuration();
if (infom != 0){
SystemParams systemParams = systemParamRepository.findAll().get(0);
String systemParamsJson = systemParams.getSystemParamsJson();
JSONArray jsonArray = JSONArray.parseArray(systemParamsJson);
///home/lyg/IdeaProjects/dataexa-compare/cp-base/target/classes/
SystemPath systemPath = new SystemPath();
String classPath = systemPath.getClassPath();
String[] split = classPath.split("cp-starter");
String newClassPath = split[0] + "cp-base/target/";
// String newClassPath = classPath.substring(0,classPath.length() - 8);
configuration.setHadoopHome(JSONObject.parseObject(jsonArray.get(0).toString()).get("paramValue").toString());
configuration.setSparkHome(JSONObject.parseObject(jsonArray.get(1).toString()).get("paramValue").toString());
configuration.setYarnConfDir(JSONObject.parseObject(jsonArray.get(2).toString()).get("paramValue").toString());
configuration.setAppResource("hdfs://192.168.1.80:8020/dataexa/compare/cp-base-1.0-SNAPSHOT-compare-core.jar");
configuration.setMainClass("com.dataexa.cp.base.datasource.DataSourceReader");
configuration.setDataDir(JSONObject.parseObject(jsonArray.get(3).toString()).get("paramValue").toString());
}
/*configuration.setHadoopHome("/opt/software/hadoop-2.6.0-cdh5.10.0");
configuration.setSparkHome("/opt/software/spark-2.2.0");
configuration.setYarnConfDir("/home/lyg/software/yarn-conf");
configuration.setAppResource("/home/lyg/IdeaProjects/dataexa-compare/cp-base/target/cp-base-1.0-SNAPSHOT-compare-core.jar");
configuration.setMainClass("com.dataexa.cp.base.datasource.DataSourceReader");*/
// Cluster cluster = new Cluster();
// cluster.setClusterType(1);
WorkModel one = workModelService.findOne(id);
JSONObject jsonObject = JSON.parseObject(one.getDisplay_content());
DataTableDTO dataTableDTO = workModelService.run(jsonObject, sparkSession,infom,configuration,clusterType,id);
return RespResult.success(dataTableDTO);
实现类中的run方法
@Service
public class WorkModelServiceImpl extends BaseSimpleService<WorkModel,Integer> implements WorkModelService {
@Autowired
DataSourceService dataSourceService;
@Override
public DataTableDTO run(JSONObject jsonObject, SparkSession sparkSession, Integer infom, Configuration configuration, Integer clusterType,Integer id) {
JSONArray edges = jsonObject.getJSONArray("linkDataArray");
JSONArray vertices = jsonObject.getJSONArray("nodeDataArray");
AMWGraph amwGraph = new AMWGraph(vertices.size());
JSONObject[] vertice = new JSONObject[vertices.size()];
Map<String,Integer> map = new HashMap<>();
for (int i = 0; i < vertices.size(); i++) {
vertice[i] = vertices.getJSONObject(i);
map.put(vertices.getJSONObject(i).getString("key"),i);
}
amwGraph.setVertices(vertice);
for (int i = 0; i < edges.size(); i++) {
amwGraph.addEdge(map.get(edges.getJSONObject(i).getString("to")),map.get(edges.getJSONObject(i).getString("from")));
}
amwGraph.broadFirstSearch();
List<String> descOperator = amwGraph.operator;
Map<String, Integer> datasource = amwGraph.datasource;
Map<String, String> tables = amwGraph.tables;
Map<String, DataSource> dataSet = new HashMap<>();
for (String s : datasource.keySet()) {
dataSet.put(s, dataSourceService.findOne(datasource.get(s)));
}
if (infom == 0){//在本地运行
DataSourceReader dataSourceReader = new DataSourceReader(sparkSession, new DataSourceDTO());
return dataSourceReader.getResult(descOperator, dataSet, tables,0,0,"");
}else {//在集群运行
DataSourceTemp dataSourceTemp = new DataSourceTemp();
dataSourceTemp.setDescOperator(descOperator);
dataSourceTemp.setDataSet(dataSet);
dataSourceTemp.setTables(tables);
dataSourceTemp.setId(id);
String str = JSON.toJSONString(dataSourceTemp);
switch (clusterType){
case 1://cdh集群运行
try {
Launcher.launch(configuration,"cluster",str,configuration.getDataDir());
} catch (IOException e) {
e.printStackTrace();
}
break;
case 2://本地或者单节点运行
try {
Launcher.launch(configuration,"cluster",str,configuration.getDataDir());//hadoop
} catch (IOException e) {
e.printStackTrace();
}
break;
}
return new DataTableDTO();
}
}
}
Launcher类
public class Launcher {
public Launcher() {
}
public static boolean launch(Configuration configuration, String... args) throws IOException {
Map<String, String> env = new HashMap();
env.put("hadoop.home.dir",configuration.getHadoopHome());
env.put("HADOOP_HOME", configuration.getHadoopHome());
env.put("spark.home.dir", configuration.getSparkHome());
env.put("SPARK_HOME", configuration.getSparkHome());
env.put("HADOOP_CONF_DIR", configuration.getHadoopHome());
if (!StringUtils.isBlank(configuration.getYarnConfDir())) {
env.put("YARN_CONF_DIR", configuration.getYarnConfDir());
}
env.put("HADOOP_USER_NAME", "yarn");
SparkConfig sparkConfig = new SparkConfig();
sparkConfig.setAppName("DataExa-Compare");
sparkConfig.setVerbose(true);
sparkConfig.setMaster("yarn");
sparkConfig.setDeployMode("cluster"); // client
sparkConfig.setAppResource(configuration.getAppResource());
sparkConfig.setMainClass(configuration.getMainClass());
try {
SparkLauncher launcher = new SparkLauncher(env);
/* if (!Strings.isNullOrEmpty(configuration.getJars())) {
launcher = launcher.addJar(configuration.getJars());
}*/
launcher = launcher.setAppName(sparkConfig.getAppName()).setAppResource(sparkConfig.getAppResource())
.setMainClass(sparkConfig.getMainClass())
.setMaster(sparkConfig.getMaster())
.setVerbose(sparkConfig.getVerbose());
if (sparkConfig.getDeployMode() != null) {
launcher = launcher.setDeployMode(sparkConfig.getDeployMode());
}
launcher.addAppArgs(DataCompressUtils.serArgs(args));
// launcher.addAppArgs(DataCompressUtils.serArgs(args));
// launcher.addFile(Launcher.class.getClassLoader().getResource("hbase-site.xml").getFile());
// SystemPath systemPath = new SystemPath();
// String classPath = systemPath.getClassPath();
// String classes = classPath.split("classes")[0] + "lib/cp-base-1.0-SNAPSHOT.jar/spark-defaults.conf";
// String string = Launcher.class.getClassLoader().getResource("spark-defaults.conf").toString().substring(9);
// launcher.setPropertiesFile(classes);
// launcher.setPropertiesFile(Launcher.class.getClassLoader().getResource("spark-defaults.conf").getFile());
// launcher.setPropertiesFile("/home/lyg/IdeaProjects/dataexa-compare/cp-base/src/main/resources/spark-defaults.conf");
// launcher.setPropertiesFile("/home/lyg/IdeaProjects/dataexa-compare/cp-base/target/classes/spark-defaults.conf");
launcher.setPropertiesFile("/home/lyg/IdeaProjects/dataexa-compare/cp-starter/target/cp-base-1.0-SNAPSHOT.jar/spark-defaults.conf");
Process process = launcher.launch();
ProcessStreamHandler handler = (line) -> {
System.out.println(line);
};
ProcessUtil processUtil = new ProcessUtil(handler);
processUtil.unblock(process);
if(process.exitValue()!=0) return false;
return true;
} catch (Exception e) {
throw e;
}
}
jar包中执行的主类及其他方法
case class DataSourceReader(sparkSession: SparkSession, datasource: DataSourceDTO) {
def readCsv(rowNum: Int): DataTableDTO = {
var dataTableDTO = new DataTableDTO
try {
val df = sparkSession.read
.option ( "header", datasource.getCsvBase.getHasHeader )
.option ( "delimiter", datasource.getCsvBase.getDelimiter )
.csv ( datasource.getCsvBase.getPath )
dataTableDTO.setColumnNames ( scala.collection.JavaConversions.seqAsJavaList ( df.columns ) )
println ( "标题:" + df.columns.mkString ( "," ) )
var allRow = new util.ArrayList[util.List[AnyRef]]()
df.take ( rowNum ).foreach ( row => {
var rowData = new util.ArrayList[AnyRef]()
df.columns.foreach ( column => {
rowData.add ( row.getAs ( column ) )
} )
allRow.add ( rowData )
} )
dataTableDTO.setColumnValues ( allRow )
} catch {
case e: Exception => println ( e )
}
dataTableDTO
}
def getResult(descOperator: util.List[String], dataset: util.Map[String, DataSource], tableNameMap: util.Map[String, String], storage: Int, name: Int, dataDir: String): DataTableDTO = {
import scala.collection.JavaConverters._
val operator: List[String] = descOperator.asScala.toList.reverse
val tableName = tableNameMap.asScala
val datasets = dataset.asScala
val dataTableDTO = new DataTableDTO
var dataFrame: Map[String, DataFrame] = Map ()
for (elem <- tableName.keySet) {
if (tableName ( elem ) != null && tableName.contains ( elem )) {
val dataSource: DataSource = datasets ( elem )
dataSource.getTypeId match {
case "jdbc" => {
val dataBase = JSON.parseObject ( dataSource.getDbJson, classOf [DataBase] )
dataBase.setDbType("jdbc")
dataBase.setTableName ( tableName ( elem ) )
dataFrame += (elem -> DataBaseToDF ( sparkSession ).convert ( dataBase ))
}
case "odbc" => {
val dataBase = JSON.parseObject ( dataSource.getDbJson, classOf [DataBase] )
dataBase.setDbType("odbc")
dataBase.setTableName ( tableName ( elem ) )
dataFrame += (elem -> DataBaseToDF ( sparkSession ).convert ( dataBase ))
}
case "hdfs" => {
val hdfsBase = JSON.parseObject ( dataSource.getDbJson, classOf [HdfsBase] )
val path = hdfsBase.getPath
if (!hdfsBase.getFileType && hdfsBase.getSingleHeader == "2")
hdfsBase.setPath ( s"$path/${tableName ( elem )}" )
//if (path.substring ( path.lastIndexOf ( "/" ) ).equals ( tableName ( elem ) )) hdfsBase.setPath ( path + "/" + tableName ( elem ) )
dataFrame += (elem -> HdfsToDF ( sparkSession ).convert ( hdfsBase ))
}
case "csv" => {
val csvBase = JSON.parseObject ( dataSource.getDbJson, classOf [CsvBase] )
csvBase.getFileType
val path = csvBase.getPath
if (!csvBase.getFileType && csvBase.getSingleHeader == "2")
csvBase.setPath ( s"$path/${tableName ( elem )}" )
//if (path.substring ( path.lastIndexOf ( "/" ) ).equals ( tableName ( elem ) )) csvBase.setPath ( path + "/" + tableName ( elem ) )
dataFrame += (elem -> CsvToDF ( sparkSession ).convert ( csvBase ))
}
case "hive" => {
val hiveBase = JSON.parseObject ( dataSource.getDbJson, classOf [HiveBase] )
hiveBase.setTableName ( tableName ( elem ) )
dataFrame += (elem -> HiveToDF ( sparkSession ).convert ( hiveBase ))
}
case "hbase" => {
val hbaseBase = JSON.parseObject ( dataSource.getDbJson, classOf [HbaseBase] )
hbaseBase.setTableName ( tableName ( elem ) )
val hBaseUtil = new HBaseUtil()
val columnName: util.List[String] = hBaseUtil.getColumnName(hbaseBase.getZKHost,hbaseBase.getZKPort,hbaseBase.getTableName)
hbaseBase.setColumns(columnName)
dataFrame += (elem -> HbaseToDF ( sparkSession ).convert ( hbaseBase ))
}
}
} else {
dataFrame += (elem -> null)
}
}
if (storage == 0) {
val result = WorkOperator ( dataFrame, sparkSession ).controller ( operator )
dataTableDTO.setColumnNames ( scala.collection.JavaConversions.seqAsJavaList ( result.columns ) )
var allRow = new util.ArrayList[util.List[AnyRef]]()
result.take ( 10 ).foreach ( row => {
var rowData = new util.ArrayList[AnyRef]()
result.columns.foreach ( column => {
rowData.add ( row.getAs ( column ) )
} )
allRow.add ( rowData )
} )
dataTableDTO.setColumnValues ( allRow )
dataTableDTO
} else {
val result: DataFrame = WorkOperator ( dataFrame, sparkSession ).controller ( operator )
//df保存到hdfs
result.write.mode ( SaveMode.Overwrite ).save ( dataDir + name )
new DataTableDTO
}
}
}
object DataSourceReader {
def main(args: Array[String]): Unit = {
val params: Array[String] = DataCompressUtils.desArgs ( args )
val datasource = new DataSourceDTO
/* val datasource = new DataSourceDTO();
datasource.setCnName("csvtest")t
val csvBase = new CsvBase
csvBase.setPath("hdfs://dataexa-cdh-80:8022/dataexa/sati/客运出行数据.csv");
datasource.setCsvBase(csvBase)*/
val dataSourceReader = params ( 0 ) match {
case "cluster" => DataSourceReader ( CompareContext.sparkSession, datasource )
case _ => DataSourceReader ( CompareContext.localSparkSession, datasource )
}
val str = params ( 1 )
val dataDir = params ( 2 )
val dataSourceTemp = JSON.parseObject ( str, classOf [DataSourceTemp] )
// val dataset:util.Map[String,DataSource] = util.Arrays.
// val tableNameMap:util.Map[String,String] = util.Arrays.asList(params(3))
dataSourceReader.getResult ( dataSourceTemp.getDescOperator, dataSourceTemp.getDataSet, dataSourceTemp.getTables, 1, dataSourceTemp.getId, dataDir )
// println(dataTableDTO.toString)
// dataSourceReader.readCsv(5)
println ( "运行成功" )
/*
val columnMap = df.columns.map{ case c => (c,s"cp_$c")}.toMap
var df2 = ColumnOperator(df).columnMapping(columnMap)
df2.show(3)
*/
}
}
问题一:spark配置文件无效
Exception in thread “main” java.lang.IllegalArgumentException: Invalid properties file ‘/home/lyg/IdeaProjects/dataexa-compare/cp-starter/target/cp-base-1.0-SNAPSHOT.jar/spark-defaults.conf’.
原因分析,部署到服务器以后直接获取jar包中的配置文件应该是获取不到,所以一直报这个错误,在本地测试的时候直接设置jar包中的配置文件报同样的错误。。。(通过设置系统配置文件的方式解决的读取此配置失败的问题)
问题二:待后续补充