1、基础环境
spark on yarn环境已经搭建,通过start-thriftserver.sh --master yarn启动 sparksubmit进程即可
dm数据库;
2、使用jdbc来访问spark,java API实现
2.1 创建datasource、connection、statement代码,这里属于通用代码,使用Druid连接池来创建jdbc连接
/**
* 读取druid.properties,并生成datasource
*/
private static Statement init() throws Exception {
Properties properties = new Properties();
InputStream resourceAsStream =
DataHubTask.class.getClassLoader().getResourceAsStream("conf/druid.properties");
try {
properties.load(resourceAsStream);
} catch (IOException e) {
LOG.error("The resource loaded failed, please check the path {}.", SPARK_THRIFT_SERVER_PATH);
throw new RuntimeException(e);
}
dataSource = DruidDataSourceFactory.createDataSource(properties);
Connection connection = dataSource.getConnection();