flink是一款实时计算的工具。由于项目中部分场景需要实时统计出结果,展示给用户看,于是引入了这个工具。一开始采用的是直接购买云服务的方式,但后面由于某些因素,停下云服务,转而使用自己搭建。
public static void startFlinkJob(String[] args) {
String ip = EnvConstant.DEV.ip;
String username = EnvConstant.DEV.username;
String pwd = EnvConstant.DEV.pwd;
if (args != null && args.length > 0) {
ip = args[0];
username = args[1];
pwd = args[2];
}
Integer baseServerId = Integer.valueOf(args[3]);
String sql = args[4];
try {
Statement parse = CCJSqlParserUtil.parse(sql);
List<String> tableList = new TablesNamesFinder().getTableList(parse);
String sinkStr = tableList.get(0);
String[] split = StringUtils.split(sinkStr, ".");
Table sink = new Table(split[0] , split[1]);
List<Table> sourceList = new ArrayList<>();
for (int i = 1; i < tableList.size(); i++) {
String[] sourceStr = StringUtils.split(tableList.get(i), ".");
Table source = new Table(sourceStr[0] , sourceStr[1]);
sourceList.add(source);
}
startFlinkJob(new DatabaseId(ip,username,pwd), baseServerId, sql, sink , sourceList.toArray(new Table[sourceList.size()]));
} catch (JSQLParserException e) {
e.printStackTrace();
} catch (DatabaseAlreadyExistException e) {
e.printStackTrace();
} catch (TableNotExistException e) {
e.printStackTrace();
}
}
public static void startFlinkJob(DatabaseId databaseId , Integer baseServerId , String sql , Table sink , Table... sources) throws DatabaseAlreadyExistException, TableNotExistException {
MySqlCatalog mySqlCatalog = new MySqlCatalog(mysqlCatalogName, "information_schema?useUnicode=true&characterEncoding=UTF-8&serverTimezone=Asia/Shanghai&useSSL=false", databaseId.getUsername(), databaseId.getPwd(), MessageFormatter.arrayFormat("jdbc:mysql://{}:3306", new Object[]{databaseId.getIp()}).getMessage());
FlinkUtil.addCatalog(mysqlCatalogName,mySqlCatalog);
Catalog devStream = new GenericInMemoryCatalog(targetCatalogName);
FlinkUtil.addCatalog(targetCatalogName, devStream);
tableEnvironment.useCatalog(targetCatalogName);
devStream = tableEnvironment.getCatalog(targetCatalogName).get();
for (Table source : sources) {
devStream.createDatabase(source.getSchema() , new CatalogDatabaseImpl(new HashMap<>(),null),true);
if (Objects.nonNull(registerTable.putIfAbsent(source.getIdentity() , new ObjectPath(source.getSchema(), source.getTableName())))) {
continue;
}
String s = "create table " + source.getSchema() + "." + source.getTableName() + tableEnvironment.getCatalog(mysqlCatalogName).get().getTable(new ObjectPath(source.getSchema(), source.getTableName())).getUnresolvedSchema().toString() + getCDCWith(source.getTableName(),source.getSchema(),databaseId ,baseServerId);
baseServerId+=parallelism;
tableEnvironment.executeSql(s);
}
String s = "";
devStream.createDatabase(sink.getSchema() , new CatalogDatabaseImpl(new HashMap<>(),null),true);
if (!devStream.tableExists(new ObjectPath(sink.getSchema(), sink.getTableName()))) {
s = "create table " + sink.getSchema() + "." + sink.getTableName() + tableEnvironment.getCatalog(mysqlCatalogName).get().getTable(new ObjectPath(sink.getSchema(), sink.getTableName())).getUnresolvedSchema().toString() + getJdbcWith(sink.getTableName(),sink.getSchema(),databaseId);
tableEnvironment.executeSql(s);
}
tableEnvironment.executeSql(sql);
//Thread.currentThread().join();
}
private static String getJdbcWith(String tableName , String databaseName , DatabaseId databaseId) {
return MessageFormatter.arrayFormat("WITH (\n" +
" 'connector' = 'jdbc',\n" +
" 'table-name' = '{}'," +
" 'url' = 'jdbc:mysql://{}:3306/{}?useUnicode=true&characterEncoding=utf-8&useSSL=false&serverTimezone=UTC'," +
" 'driver' = 'com.mysql.cj.jdbc.Driver'," +
" 'username' = '{}'," +
" 'password' = '{}'" +
")",new Object[]{tableName ,databaseId.getIp() , databaseName, databaseId.getUsername() , databaseId.getPwd()}).getMessage();
}
private static String getKafkaWith(String tableName , String databaseName , DatabaseId databaseId) {
return MessageFormatter.arrayFormat(" WITH (\n" +
" 'connector' = '{}',\n" +
" 'topic' = '{}',\n" +
" 'properties.bootstrap.servers' = '{}',\n" +
" 'properties.group.id' = '{}',\n" +
" 'scan.startup.mode' = 'latest-offset'," +
" 'value.format' = 'canal-json'," +
" 'value.canal-json.timestamp-format.standard'='SQL'," +
" 'value.canal-json.database.include' = '{}'," +
" 'value.canal-json.table.include' = '{}'" +
")",new Object[]{"kafka" , "canal_dev","127.0.0.1:9092" , "flink_consumer_" + tableName , databaseName ,tableName }).getMessage();
}
private static String getCDCWith(String tableName , String databaseName , DatabaseId databaseId , Integer baseServerId) {
return MessageFormatter.arrayFormat("WITH (\n" +
"'connector' = '{}',\n" +
"'hostname' = '{}',\n" +
"'port' = '3306',\n" +
"'username' = '{}',\n" +
"'password' = '{}',\n" +
" 'server-time-zone' = 'Asia/Shanghai'," +
"'table-name' = '{}',\n" +
"'database-name' = '{}'," +
"'server-id' = '{}'" +
")",new Object[]{"mysql-cdc" , databaseId.getIp() , databaseId.getUsername() , databaseId.getPwd() ,tableName , databaseName , getServerId(baseServerId,parallelism)}).getMessage();
}
private static String getPrintWith() {
return " WITH (\n" +
" 'connector' = 'print'\n" +
")";
}
public static void addCatalog(String catalogName , Catalog catalog) {
if (!tableEnvironment.getCatalog(catalogName).isPresent()) {
synchronized (tableEnvironment) {
if (!tableEnvironment.getCatalog(catalogName).isPresent()) {
tableEnvironment.registerCatalog(catalogName , catalog);
}
}
}
}
private static StreamTableEnvironment getEnvironment() {
EnvironmentSettings build = EnvironmentSettings.newInstance().inStreamingMode().build();
executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
tableEnvironment = StreamTableEnvironment.create(executionEnvironment, build);
tableEnvironment.getConfig().set(TaskManagerOptions.NETWORK_MEMORY_MIN , new MemorySize(1024*1024*5000L));
tableEnvironment.getConfig().set(TaskManagerOptions.NETWORK_MEMORY_MAX , new MemorySize(1024*1024*5000L));
tableEnvironment.getConfig().set(TaskManagerOptions.NETWORK_MEMORY_FRACTION , 0.9f);
tableEnvironment.getConfig().set(TaskManagerOptions.TOTAL_PROCESS_MEMORY,new MemorySize(1024*1024*1024*4L));
tableEnvironment.getConfig().set(TaskManagerOptions.TASK_HEAP_MEMORY,new MemorySize(1024*1024*1024*2L));
tableEnvironment.getConfig().set(TaskManagerOptions.FRAMEWORK_HEAP_MEMORY,new MemorySize(1024*1024*1024*2L));
tableEnvironment.getConfig().set(TaskManagerOptions.FRAMEWORK_OFF_HEAP_MEMORY,new MemorySize(1024*1024*1024*2L));
tableEnvironment.getConfig().set(TaskManagerOptions.MANAGED_MEMORY_SIZE,new MemorySize(1024*1024*256L));
tableEnvironment.getConfig().set(TaskManagerOptions.NUM_TASK_SLOTS,4);
tableEnvironment.getConfig().set(JobManagerOptions.TOTAL_PROCESS_MEMORY,new MemorySize(1024*1024*1024*1L));
executionEnvironment.enableCheckpointing(24 * 60 * 60 * 1000);
executionEnvironment.getCheckpointConfig().setCheckpointStorage("file:///opt/flink/checkpoint");
executionEnvironment.setStateBackend(new HashMapStateBackend());
tableEnvironment.getConfig().setIdleStateRetention(Duration.of(365, ChronoUnit.DAYS));
//executionEnvironment.setStateBackend(new EmbeddedRocksDBStateBackend(true));
//executionEnvironment.setStateBackend(new EmbeddedRocksDBStateBackend(true));
//executionEnvironment.getCheckpointConfig().setCheckpointInterval(60*60*1000);
//executionEnvironment.getCheckpointConfig().disableCheckpointing();
//tableEnvironment.getConfig().set(TaskManagerOptions.TOTAL_FLINK_MEMORY , new MemorySize(1024*1024*2024));
executionEnvironment.setParallelism(parallelism);
executionEnvironment.getConfig().setParallelism(parallelism);
//添加自定义的函数,处理flink不具备的函数功能
tableEnvironment.createTemporarySystemFunction("split_str", SplitFunction.class);
tableEnvironment.createTemporarySystemFunction("jsonarray_parse", JSONArrayParseFunction.class);
tableEnvironment.createTemporarySystemFunction("polygon_parse", PolygonParseFunction.class);
tableEnvironment.createTemporarySystemFunction("int_range_parse", IntegerRangeFunction.class);
return tableEnvironment;
}
public static String getServerId(Integer baseId , Integer num) {
//String template ="/*+OPTIONS('server-id'='{}')*/";
String template ="{}";
if (num == 1) {
return MessageFormatter.arrayFormat(template, new Object[]{baseId + 1}).getMessage();
} else {
return MessageFormatter.arrayFormat(template, new Object[]{(baseId + 1) + "-" + (baseId + 1 + num - 1)}).getMessage();
}
}