QueryIceberg 代码备份

该代码示例展示了如何在Flink流处理环境中使用ApacheIceberg作为数据存储,并执行SQL查询。程序首先定义了一些自定义函数,然后从命令行参数中获取配置信息,如SQL查询和任务ID。接着,它创建了一个Flink流处理环境,注册了自定义函数,并设置了一个Iceberg目录。程序执行SQL插入和查询操作,并将结果保存到指定的服务。如果查询成功,还会清理临时表。
摘要由CSDN通过智能技术生成
package autox.xdata.iceberg;

import com.alibaba.fastjson2.JSONArray;
import org.apache.commons.lang.text.StrSubstitutor;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.*;
import org.apache.flink.streaming.api.environment.*;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableResult;
import javaProto.autox.xdata.RunTaskStatus;

import com.alibaba.fastjson2.JSONObject;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import autox.xdata.http.HttpRequest;
import org.apache.iceberg.aws.s3.S3FileIO;
import org.apache.iceberg.jdbc.JdbcCatalog;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.flink.types.Row;
import org.apache.flink.util.CloseableIterator;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;

public class QueryIceberg {
    public static final Logger logger = LoggerFactory.getLogger("autox.xdata.iceberg.queryIcebergJob");

    public static boolean save_result(String service, String url_path, JSONObject res) {
        HttpRequest httpRequest = new HttpRequest();
        return httpRequest.postJsonData(service + url_path, res.toJSONString());
    }

    public static void register_functions(StreamTableEnvironment tableEnv) {
        tableEnv.createTemporarySystemFunction("DayOrNight", QueryUDF.DayOrNight.class);
        tableEnv.createTemporarySystemFunction("DisengageTimes", QueryUDF.DisengageTimes.class);
        tableEnv.createTemporarySystemFunction("TagsContain", QueryUDF.TagsContain.class);
        tableEnv.createTemporarySystemFunction("EventsTags", QueryUDF.EventsTags.class);
    }

    public static void main(String[] args) throws Exception {
        StopWatch sw = new StopWatch();

        // parameters
        ParameterTool parameter = ParameterTool.fromArgs(args);
        String taskId = parameter.getRequired("taskId");
        String jobName = parameter.get("jobName", "jobName_icebergQueryJob_" + taskId);
        logger.info(String.format("Parameter: %s, jobName: %s", parameter.getConfiguration(), jobName));

        String base64_not_xray_sql = parameter.getRequired("notXraySql");
        String base64_xray_where_sql = parameter.getRequired("xrayWhereSql");
        String catalogName = parameter.get("catalogName", "xdata");
        Double scenario_merge_interval_ms = Double.valueOf(parameter.get("scenarioMergeIntervalMs", "1000")); // 1 s
        String service = parameter.get("service", "http://10.10.82.153:5000");
        String url_path = parameter.get("url_path", "/scenario_search/result/save");
        String conf_path = parameter.getRequired("config");
        Integer await_min = parameter.getInt("await_min", 30);

        // conf
        Map<String, String> conf = ParameterTool.fromPropertiesFile(conf_path).getConfiguration().toMap();
        int parallelism = Integer.parseInt(conf.getOrDefault("flink.query.parallelism", "20"));

        // init
        String origin_xray_columns = "select _xray_id as record, " +
                "record_metadata.hostname as hostName, " +
                "concat(xray_server_info.domain, '/id?', _xray_id) as xrayUrl, " +
                "${tmp}.${res}.startTimeMs / 1000 as startTime, " +
                "(${tmp}.${res}.endTimeMs - ${tmp}.${res}.startTimeMs) as duration, " +
                "(${tmp}.${res}.startTimeMs / 1000 - record_metadata.time_info.start_timestamp) as relativeTime " +
                "from ${tmp}.${res} inner join xraydb.xrayMeta on xraydb.xrayMeta._xray_id = ${tmp}.${res}.xrayId ";
        String origin_data_tmp_table_name = "default_catalog.default_database.tmp";
        String sink_namespace = "tmp";
        String table_name = "query_iceberg_res_" + taskId;

        HashMap<String, String> valuesMap = new HashMap<>() {{
            put("res", table_name);
            put("tmp", sink_namespace);
        }};
        String xray_columns = new StrSubstitutor(valuesMap).replace(origin_xray_columns);

        ParseIcebergData job = new ParseIcebergData();
        String not_xray_sql = String.format("insert into %s %s", origin_data_tmp_table_name, job.parse_base64_sql(base64_not_xray_sql));
        String xray_sql = xray_columns + job.parse_base64_sql(base64_xray_where_sql);
        String drop_table_sql = String.format("drop table if exists %s.%s", sink_namespace, table_name);

        logger.info(String.format("jobName: %s, prepare use_time: %dms \n", jobName, sw.getTime()));
        sw.reset();
        sw.start();

        // response msg
        String error_msg = "";

        Integer taskStatus = RunTaskStatus.FAILED_VALUE;
        long execute_sql_used_time_ms = 0;
        JSONArray record_list = new JSONArray();
        TableResult execute_res = null;
        StreamTableEnvironment tableEnv = null;

        JSONObject res = new JSONObject();
        res.put("taskId", taskId);
        try {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//            env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
//                    3, // number of restart attempts
//                    Time.of(10, TimeUnit.SECONDS) // delay
//            ));
            env.setMaxParallelism(parallelism * 3);
//            env.setRuntimeMode(RuntimeExecutionMode.BATCH);
            env.setParallelism(parallelism);
            logger.info("env.getConfiguration:" + env.getConfiguration());
            logger.info("env.getConfig():" + env.getConfig());

            tableEnv = StreamTableEnvironment.create(env);
            register_functions(tableEnv);

            tableEnv.executeSql(String.format("CREATE CATALOG %s WITH (" +
                    "'type'='iceberg'," +
                    "'catalog-impl'='%s'," +
                    "'warehouse'='%s'," +
                    "'io-impl'='%s'," +
                    "'uri'='%s'," +
                    "'jdbc.user'='%s'," +
                    "'jdbc.password'='%s'," +
                    "'s3.endpoint'='%s'," +
                    "'s3.access-key-id'='%s'," +
                    "'s3.secret-access-key'='%s'," +
                    "'s3.region'='default')",
                    catalogName, JdbcCatalog.class.getName(),
                    conf.get("s3.file_location"), S3FileIO.class.getName(),
                    conf.get("jdbc.uri"), conf.get("jdbc.user"), conf.get("jdbc.password"),
                    conf.get("s3.endpoint"), conf.get("s3.access-key-id"), conf.get("s3.secret-access-key"))
            );
            tableEnv.useCatalog(catalogName);

            tableEnv.executeSql(drop_table_sql).await(await_min, TimeUnit.SECONDS);

            tableEnv.executeSql(String.format("CREATE TABLE %s (name STRING, timestamp_ms DOUBLE) WITH (" +
                    "'connector' = 'xDataScenarioIcebergTableSink'," +
                    "'merge-interval-ms' = '%s'," +
                    "'sink-catalog-id' = '%s'," +
                    "'sink-namespace' = '%s'," +
                    "'sink-table-name' = '%s'" +
                    ")",
                    origin_data_tmp_table_name, scenario_merge_interval_ms, catalogName, sink_namespace, table_name)
            );
            logger.info("not_xray_sql: " + not_xray_sql);
            tableEnv.executeSql(not_xray_sql).await(await_min, TimeUnit.MINUTES);

            logger.info("xray_sql: " + xray_sql);
            execute_res = tableEnv.executeSql(xray_sql);
            record_list = job.parse_iceberg_result(execute_res);
            taskStatus = RunTaskStatus.FINISHED_VALUE;

            execute_sql_used_time_ms = sw.getTime();

            logger.info(String.format("execute finish! jobName: %s, executeSql use_time: %dms \n", jobName, execute_sql_used_time_ms));
        } catch (Exception e) {
            e.printStackTrace();
            error_msg = e.getLocalizedMessage();
        }

        res.put("taskStatus", taskStatus);
        res.put("record", record_list);
        res.put("used_time_ms", execute_sql_used_time_ms);
        res.put("error_msg", error_msg);

        logger.info(String.format("finish! size:%s : jobName: %s, res: %s", record_list.size(), jobName, res));
        boolean is_save = save_result(service, url_path, res);
        if (is_save) {
            logger.info("save_result successfully! start execute sql: " + drop_table_sql);
            tableEnv.executeSql(drop_table_sql).await(await_min, TimeUnit.SECONDS);
        }
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值