Doris:读取Doris数据的N种方法

目录

1.MySQL Client

2.JDBC

3. 查询计划

4.Spark Doris Connector

5.Flink Doris Connector

6.Arrow Flight SQL


1.MySQL Client

        Doris 采用 MySQL 协议,高度兼容 MySQL 语法,支持标准 SQL,用户可以通过各类客户端工具来访问 Doris。登录到doris服务器后,可使用 select语句查询数据。

mysql -uroot -P9030 -h127.0.0.1

        为了防止用户的一个查询可能因为消耗内存过大。查询进行了内存控制,一个查询任务,在单个 BE 节点上默认使用不超过 2GB 内存。用户在使用时,如果发现报 Memory limit exceeded 错误,一般是超过内存限制了。遇到内存超限时,用户应该尽量通过优化自己的 sql 语句来解决。如果确切发现2GB内存不能满足,可以手动设置内存参数。

    select 查询如果使用limit分页查询,则需要指定order by 字段,否则同一个sql返回的数据可能不一样。

2.JDBC

        由于Doris 采用 MySQL 协议,同样也支持通过JDBC方式读取数据。

package com.yichenkeji.demo.test;


import lombok.extern.slf4j.Slf4j;

import java.sql.*;
import java.util.Properties;

@Slf4j
public class DorisJDBCDemo {

    public static void main(String[] args) throws SQLException {
        String jdbc_driver = "com.mysql.cj.jdbc.Driver";
        String jdbc_url = "jdbc:mysql://192.168.179.131:9030/demo?rewriteBatchedStatements=true";
        String username = "root";
        String password = "";

        Connection conn = getConnection(username,password,jdbc_url,jdbc_driver);
        log.info("{}",conn);
        String sql = "select * from dim_area limit 10";
        Statement stmt = conn.createStatement();
        ResultSet rs = stmt.executeQuery(sql);
        while (rs.next()){
            log.info("id={},name={}",rs.getFloat("id"),rs.getString("name"));
        }

        closeConnection(conn);
    }

    /**
     * 获取连接
     * @param username
     * @param password
     * @param jdbcUrl
     * @param driver
     * @return
     */

    public static Connection getConnection(String username,String password,String jdbcUrl,String driver) {

        Properties prop = new Properties();
        prop.put("user", username);
        prop.put("password", password);
        try {
            Class.forName(driver);

            log.info("jdbcUrl:{}",jdbcUrl);
            return DriverManager.getConnection(jdbcUrl, prop);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * 关闭连接
     * @param conn
     */
    public static void closeConnection(Connection conn) {
        if(conn != null){
            try {
                if(!conn.isClosed()){
                    conn.close();
                }
            } catch (SQLException e) {
                log.error("SQLException:{}", e.getMessage());
            }
        }
    }

}

3. 查询计划

        由于jdbc查询暂时不支持流式读取,如果读取的数据量过大,一次性读取全部数据需要很大的资源,所有可以使用查询计划API接口,给定一个 SQL,获取该 SQL 对应的查询计划。通过返回的数据分区信息,分批读取数据。

引用jar包

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.yichenkeji</groupId>
    <artifactId>doris-demo</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <thrift-service.version>1.0.0</thrift-service.version>
        <lombok.version>1.18.26</lombok.version>
        <httpclient.version>4.5.14</httpclient.version>
        <libthrift.version>0.16.0</libthrift.version>
        <fastjson2.version>2.0.32</fastjson2.version>
        <arrow.version>13.0.0</arrow.version>
    </properties>

    <dependencies>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>${lombok.version}</version>
            <scope>provided</scope>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>${httpclient.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.thrift</groupId>
            <artifactId>libthrift</artifactId>
            <version>${libthrift.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.doris</groupId>
            <artifactId>thrift-service</artifactId>
            <version>${thrift-service.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.alibaba.fastjson2/fastjson2 -->
        <dependency>
            <groupId>com.alibaba.fastjson2</groupId>
            <artifactId>fastjson2</artifactId>
            <version>${fastjson2.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-vector</artifactId>
            <version>${arrow.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-flight -->
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-flight</artifactId>
            <version>${arrow.version}</version>
            <type>pom</type>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.arrow/flight-sql -->
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>flight-sql</artifactId>
            <version>${arrow.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-memory-netty</artifactId>
            <version>${arrow.version}</version>
            <scope>runtime</scope>
        </dependency>
    </dependencies>
</project>

编码实现:

package com.yichenkeji.doris;

import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.*;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.ipc.ArrowStreamReader;
import org.apache.arrow.vector.types.Types;
import org.apache.commons.codec.binary.Base64;
import org.apache.doris.sdk.thrift.*;
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.apache.thrift.TConfiguration;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;

import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.*;

public class QueryPlanMain {
    private static String queryPlanUrlFormat = "%s/api/%s/%s/_query_plan";


    public static void main(String[] args) throws Exception {
        String dorisUrl = "http://127.0.0.1:8030";
        String username = "root";
        String password = "1234564";
        String database = "article";
        String table = "dim_user";
//        String[] columns = {"id","name","merger_name","area_code","level"};
        String[] columns = {"*"};
        String sql ="select "+String.join(",",columns)+" from "+database+"."+table
//                +" where id<= 10 "
//                + " order by id " //不支持order by
//                + " limit 10" //不支持limit
                ;
        JSONObject queryPlanResult = getQueryPlan(dorisUrl,username,password,database,table,sql);
        System.out.println("查询计划执行结果:"+queryPlanResult);
        //根据查询计划结果,查询数据
        if (queryPlanResult != null){
            readData(queryPlanResult,username,password,database,table);
        }
    }

    private static JSONObject getQueryPlan(String dorisUrl, String username,String password,String database, String table, String sql) throws Exception {
        try (CloseableHttpClient client = HttpClients.custom().build()) {

            String queryPlanUrl = String.format(queryPlanUrlFormat, dorisUrl, database, table);


            HttpPost post = new HttpPost(queryPlanUrl);
            System.out.println("执行查询计划url: "+queryPlanUrl);
            post.setHeader(HttpHeaders.EXPECT, "100-continue");
            post.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(username, password));

            // The param is specific SQL, and the query plan is returned
            Map<String, String> params = new HashMap<>();
            params.put("sql", sql);
            System.out.println("执行查询计划参数: "+ params);
            StringEntity entity = new StringEntity(JSON.toJSONString(params));
            post.setEntity(entity);

            try (CloseableHttpResponse response = client.execute(post)) {

                if (response.getEntity() != null) {
                    //解析查询计划
                    JSONObject queryPlanJSONObject = JSONObject.parseObject(EntityUtils.toString(response.getEntity()));
                    System.out.println("执行查询计划response: " + queryPlanJSONObject);
                    JSONObject dataJSONObject = queryPlanJSONObject.getJSONObject("data");
                    //查询计划异常返回exception信息
                    if (dataJSONObject.containsKey("exception")) {
                        throw new RuntimeException(dataJSONObject.getString("exception"));
                    }
                    return dataJSONObject;
                }
            }
        }
        return null;
    }

    private static String basicAuthHeader(String username, String password) {
        final String info = username + ":" + password;
        byte[] encoded = Base64.encodeBase64(info.getBytes(StandardCharsets.UTF_8));
        return "Basic " + new String(encoded);
    }
    private static void readData(JSONObject data,String username,String password,String database,String table) throws Exception {

        String queryPlan = data.getString("opaqued_query_plan");
        JSONObject partitions = data.getJSONObject("partitions");
        long readTotal = 0L;
        long totalTime = 0L;
        for (Map.Entry<String, Object> tablet : partitions.entrySet()) {
            System.out.println("tablet信息:"+ tablet);
            long startTime = System.currentTimeMillis();
            Long tabletId = Long.parseLong(tablet.getKey());
            JSONObject value = JSONObject.parseObject(JSON.toJSONString(tablet.getValue()));
            //get first backend
            String routingsBackend = value.getJSONArray("routings").getString(0);
            String backendHost = routingsBackend.split(":")[0];
            String backendPort = routingsBackend.split(":")[1];

            //connect backend
            TBinaryProtocol.Factory factory = new TBinaryProtocol.Factory();
            TTransport transport = new TSocket(new TConfiguration(), backendHost, Integer.parseInt(backendPort));
            TProtocol protocol = factory.getProtocol(transport);
            TDorisExternalService.Client client = new TDorisExternalService.Client(protocol);
            if (!transport.isOpen()) {
                transport.open();
            }

            //build params
            TScanOpenParams params = new TScanOpenParams();
            params.cluster = "default_cluster";
            params.database = database;
            params.table = table;
            params.tablet_ids = Collections.singletonList(tabletId);
            params.opaqued_query_plan = queryPlan;
            // max row number of one read batch
            params.setBatchSize(1000);
            params.setQueryTimeout(3600);
            params.setMemLimit(2147483648L);
            params.setUser(username);
            params.setPasswd(password);

            //open scanner
            TScanOpenResult tScanOpenResult = client.openScanner(params);
            if (!TStatusCode.OK.equals(tScanOpenResult.getStatus().getStatusCode())) {
                throw new RuntimeException(String.format("The status of open scanner result from %s is '%s', error message is: %s.",
                        routingsBackend, tScanOpenResult.getStatus().getStatusCode(), tScanOpenResult.getStatus().getErrorMsgs()));
            }

            TScanNextBatchParams nextBatchParams = new TScanNextBatchParams();
            nextBatchParams.setContextId(tScanOpenResult.getContextId());
            boolean eos = false;
            //read data
            int offset = 0;
            long nums = 0L;
            while (!eos) {
                nextBatchParams.setOffset(offset);
                TScanBatchResult next = client.getNext(nextBatchParams);
                if (!TStatusCode.OK.equals(next.getStatus().getStatusCode())) {
                    throw new RuntimeException(String.format("The status of get next result from %s is '%s', error message is: %s.",
                            routingsBackend, next.getStatus().getStatusCode(), next.getStatus().getErrorMsgs()));
                }
                eos = next.isEos();
                if (!eos) {
                    int i = convertArrow(next);
                    offset += i;
                    nums += i;
                }
            }
            readTotal += nums;
            long cost_time =(System.currentTimeMillis() - startTime);
            totalTime +=cost_time;
            System.out.println("tabletId["+tabletId+"]任务结束,总数据量:"+nums+",总花费:"+cost_time+"ms");
            //close
            TScanCloseParams closeParams = new TScanCloseParams();
            closeParams.setContextId(tScanOpenResult.getContextId());
            client.closeScanner(closeParams);
            if (transport.isOpen()) {
                transport.close();
            }
        }
        System.out.println("总任务结束,总数据量:"+readTotal+",总花费:"+totalTime+"ms");

    }


    private static int convertArrow(TScanBatchResult nextResult) throws Exception {
        long startTime = System.currentTimeMillis();
        int offset = 0;
        RootAllocator rootAllocator = new RootAllocator(Integer.MAX_VALUE);
        ArrowStreamReader arrowStreamReader = new ArrowStreamReader(new ByteArrayInputStream(nextResult.getRows()), rootAllocator);
        VectorSchemaRoot root = arrowStreamReader.getVectorSchemaRoot();
        while (arrowStreamReader.loadNextBatch()) {
            List<FieldVector> fieldVectors = root.getFieldVectors();
            //total data rows
            int rowCountInOneBatch = root.getRowCount();

            //按行获取数据
//            for (int row = 0; row < rowCountInOneBatch; row++) {
//                List<Object> rowData = new ArrayList<>();
//                for (FieldVector fieldVector : fieldVectors) {
//                    Types.MinorType minorType = fieldVector.getMinorType();
//                    Object v = convertValue(row, minorType, fieldVector);
//                    rowData.add(v);
//                }
                System.out.println(rowData);
//            }
            //按列获取数据
            List<Object> rowData = new ArrayList<>();
            for (FieldVector fieldVector : fieldVectors) {
                Types.MinorType minorType = fieldVector.getMinorType();
                List<Object> filedData = new ArrayList<>();
                for (int row = 0; row < rowCountInOneBatch; row++) {
                    Object v = convertValue(row, minorType, fieldVector);
                    filedData.add(v);

                }
//                System.out.println(filedData);
            }
            offset += root.getRowCount();
        }

        //处理完之后要关闭,否则容易内存溢出
        arrowStreamReader.close();

        return offset;
    }

    private static Object convertValue(int rowIndex,
                                       Types.MinorType minorType,
                                       FieldVector fieldVector) {
        Object fieldValue;
        switch (minorType) {
            case BIT:
                BitVector bitVector = (BitVector) fieldVector;
                fieldValue = bitVector.isNull(rowIndex) ? null : bitVector.get(rowIndex) != 0;
                break;
            case TINYINT:
                TinyIntVector tinyIntVector = (TinyIntVector) fieldVector;
                fieldValue = tinyIntVector.isNull(rowIndex) ? null : tinyIntVector.get(rowIndex);
                break;
            case SMALLINT:
                SmallIntVector smallIntVector = (SmallIntVector) fieldVector;
                fieldValue = smallIntVector.isNull(rowIndex) ? null : smallIntVector.get(rowIndex);
                break;
            case INT:
                IntVector intVector = (IntVector) fieldVector;
                fieldValue = intVector.isNull(rowIndex) ? null : intVector.get(rowIndex);
                break;
            case BIGINT:
                BigIntVector bigIntVector = (BigIntVector) fieldVector;
                fieldValue = bigIntVector.isNull(rowIndex) ? null : bigIntVector.get(rowIndex);
                break;
            case FLOAT4:
                Float4Vector float4Vector = (Float4Vector) fieldVector;
                fieldValue = float4Vector.isNull(rowIndex) ? null : float4Vector.get(rowIndex);
                break;
            case FLOAT8:
                Float8Vector float8Vector = (Float8Vector) fieldVector;
                fieldValue = float8Vector.isNull(rowIndex) ? null : float8Vector.get(rowIndex);
                break;
            case VARBINARY:
                VarBinaryVector varBinaryVector = (VarBinaryVector) fieldVector;
                fieldValue = varBinaryVector.isNull(rowIndex) ? null : varBinaryVector.get(rowIndex);
                break;
            case DECIMAL:
                DecimalVector decimalVector = (DecimalVector) fieldVector;
                fieldValue = decimalVector.getObject(rowIndex).stripTrailingZeros();
                break;
            case VARCHAR:
                VarCharVector date = (VarCharVector) fieldVector;
                fieldValue = new String(date.get(rowIndex));
                break;
            case LIST:
                ListVector listVector = (ListVector) fieldVector;
                fieldValue = listVector.isNull(rowIndex) ? null : listVector.getObject(rowIndex);
                break;
            default:
                fieldValue = fieldVector.isNull(rowIndex) ? null : fieldVector.getObject(rowIndex);
        }
        return fieldValue;
    }
}

4.Spark Doris Connector

        Spark Doris Connector 可以支持通过 Spark 读取 Doris 中存储的数据,也支持通过Spark写入数据到Doris。

val dorisSparkDF = spark.read.format("doris")
  .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME")
  .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT")
  .option("user", "$YOUR_DORIS_USERNAME")
  .option("password", "$YOUR_DORIS_PASSWORD")
  .load()

dorisSparkDF.show(5)

5.Flink Doris Connector

        Flink Doris Connector 可以支持通过 Flink 操作(读取、插入、修改、删除) Doris 中存储的数据。本文档介绍Flink如何通过Datastream和SQL操作Doris。

DorisOptions.Builder builder = DorisOptions.builder()
        .setFenodes("FE_IP:HTTP_PORT")
        .setTableIdentifier("db.table")
        .setUsername("root")
        .setPassword("password");

DorisSource<List<?>> dorisSource = DorisSourceBuilder.<List<?>>builder()
        .setDorisOptions(builder.build())
        .setDorisReadOptions(DorisReadOptions.builder().build())
        .setDeserializer(new SimpleListDeserializationSchema())
        .build();

env.fromSource(dorisSource, WatermarkStrategy.noWatermarks(), "doris source").print();

6.Arrow Flight SQL

        Arrow Flight SQL 是一种使用 Arrow 内存格式和 Flight RPC 框架与 SQL 数据库交互的协议。在 Apache Doris 2.1 版本中,提供基于 Arrow Flight SQL 构建了高速数据传输链路,支持主流语言通过 SQL 从 Doris 高速读取大规模数据,极大提升了其他系统与 Apache Doris 间数据传输效率。

1)maven 引入flight-sql-jdbc-driver包:

​
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>com.yichenkeji</groupId>
        <artifactId>yichen-demo</artifactId>
        <version>1.0</version>
    </parent>

    <artifactId>yichen-demo-doris</artifactId>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <lombok.version>1.18.26</lombok.version>
        <arrow.version>15.0.2</arrow.version>
    </properties>

    <dependencies>

        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>${lombok.version}</version>
            <scope>provided</scope>
        </dependency>


        <!-- https://mvnrepository.com/artifact/org.apache.arrow/flight-sql-jdbc-driver -->
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>flight-sql-jdbc-driver</artifactId>
            <version>${arrow.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-jdbc -->
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-jdbc</artifactId>
            <version>${arrow.version}</version>
        </dependency>

    </dependencies>
</project>

​

2)JDBC编码实现

package com.yichenkeji.demo.doris;

import java.sql.*;
import java.util.Properties;

public class ArrowFlightSqlJdbc {

    public static void main(String[] args) throws SQLException {
        String jdbc_url = "jdbc:arrow-flight-sql://192.168.179.134:9040";
        Properties prop = new Properties();
        prop.put("user", "root");
        prop.put("password", "");
        //需要配置该参数,否则报错
        prop.put("useEncryption", false);
        try(Connection conn = DriverManager.getConnection(jdbc_url,prop);
            Statement stmt = conn.createStatement();
        ) {
            System.out.println("conn: "+conn);
            String sql = "select id,name from demo.dim_user";
            ResultSet rs = stmt.executeQuery(sql);
            System.out.println("rs: "+rs);
            while (rs.next()){
                System.out.println("id="+rs.getLong("id") +",name="+rs.getString("name"));
            }
        }catch (Exception e){
            e.printStackTrace();
        }

    }

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

m0_37559973

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值