SparkConf conf = new SparkConf().setAppName("SparkExtractData").setMaster(sparkConfig.getMasterUrl())
.set("spark.executor.memory", sparkConfig.getExecutorMemory()).set("spark.driver.host", sparkConfig.getDriverHost()) // 指定driver的hosts-name
.set("spark.driver.port", sparkConfig.getDriverPort()) // 指定driver的服务端口
.set("spark.cores.max", sparkConfig.getMaxCores()).set("spark.executor.cores", sparkConfig.getExecutorCores())
.set("spark.sql.parquet.writeLegacyFormat", "true");
// SparkSession spark = SparkSession.builder().config(conf).getOrCreate();
JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
SparkContext sparkContext = JavaSparkContext.toSparkContext(javaSparkContext);
SQLContext spark = SQLContext.getOrCreate(sparkContext);
Properties conp = new Properties();
conp.setProperty("user", druidConfig.getUsername());
conp.setProperty("password", druidConfig.getPassword());
conp.setProperty("driver", "oracle.jdbc.driver.OracleDriver");
String[] table = tablename.split(",");
String[] path = filepath.split(",");
for (int i =0; i< table.length; i++ ) {
if (StringUtil.isNotEmpty(table[i]) && StringUtil.isNotEmpty(path[i]) ) {
DataFrame jdbcDF = spark.read().jdbc(druidConfig.getDbUrl(), table[i].toUpperCase(), conp);
List<Column> colList = getColumns(table[i]);
Column[] cols = colList.toArray(new Column[colList.size()]);
DataFrame m = jdbcDF.select(cols);
m.show();
m.printSchema();
System.out.println("开始写入数据。。。。。");
try {
m.write().mode("overwrite").parquet(sparkConfig.getHdfs() + sparkConfig.getHiveDb() + "/" + path[i]);
}catch (Exception e){
e.printStackTrace();
}
}
}
String ct = "58377173";
String sql_str = "(select a.*, ROWNUM rownum__rn from TD_AD_DATA_CHARGE_DETAIL a) b";
DataFrame m = spark.read().format("jdbc").option("driver", "oracle.jdbc.driver.OracleDriver").option("url", druidConfig.getDbUrl())
.option("user", druidConfig.getUsername()).option("password", druidConfig.getPassword()).option("dbtable", sql_str)
.option("fetchsize", "100000").option("partitionColumn", "rownum__rn").option("lowerBound", "0").option("upperBound", ct)
.option("numPartitions", "10").load().drop("rownum__rn");
m.show();
m.printSchema();
m.repartition(100).write().mode("overwrite").parquet(sparkConfig.getHdfs() + sparkConfig.getHiveDb() + "/charge");
sparkContext.stop();
java链接spark,并提交数据
最新推荐文章于 2024-05-21 14:54:56 发布