目录
1.MySQL Client
Doris 采用 MySQL 协议,高度兼容 MySQL 语法,支持标准 SQL,用户可以通过各类客户端工具来访问 Doris。登录到doris服务器后,可使用 select语句查询数据。
mysql -uroot -P9030 -h127.0.0.1
为了防止用户的一个查询可能因为消耗内存过大。查询进行了内存控制,一个查询任务,在单个 BE 节点上默认使用不超过 2GB 内存。用户在使用时,如果发现报 Memory limit exceeded 错误,一般是超过内存限制了。遇到内存超限时,用户应该尽量通过优化自己的 sql 语句来解决。如果确切发现2GB内存不能满足,可以手动设置内存参数。
select 查询如果使用limit分页查询,则需要指定order by 字段,否则同一个sql返回的数据可能不一样。
2.JDBC
由于Doris 采用 MySQL 协议,同样也支持通过JDBC方式读取数据。
package com.yichenkeji.demo.test;
import lombok.extern.slf4j.Slf4j;
import java.sql.*;
import java.util.Properties;
@Slf4j
public class DorisJDBCDemo {
public static void main(String[] args) throws SQLException {
String jdbc_driver = "com.mysql.cj.jdbc.Driver";
String jdbc_url = "jdbc:mysql://192.168.179.131:9030/demo?rewriteBatchedStatements=true";
String username = "root";
String password = "";
Connection conn = getConnection(username,password,jdbc_url,jdbc_driver);
log.info("{}",conn);
String sql = "select * from dim_area limit 10";
Statement stmt = conn.createStatement();
ResultSet rs = stmt.executeQuery(sql);
while (rs.next()){
log.info("id={},name={}",rs.getFloat("id"),rs.getString("name"));
}
closeConnection(conn);
}
/**
* 获取连接
* @param username
* @param password
* @param jdbcUrl
* @param driver
* @return
*/
public static Connection getConnection(String username,String password,String jdbcUrl,String driver) {
Properties prop = new Properties();
prop.put("user", username);
prop.put("password", password);
try {
Class.forName(driver);
log.info("jdbcUrl:{}",jdbcUrl);
return DriverManager.getConnection(jdbcUrl, prop);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* 关闭连接
* @param conn
*/
public static void closeConnection(Connection conn) {
if(conn != null){
try {
if(!conn.isClosed()){
conn.close();
}
} catch (SQLException e) {
log.error("SQLException:{}", e.getMessage());
}
}
}
}
3. 查询计划
由于jdbc查询暂时不支持流式读取,如果读取的数据量过大,一次性读取全部数据需要很大的资源,所有可以使用查询计划API接口,给定一个 SQL,获取该 SQL 对应的查询计划。通过返回的数据分区信息,分批读取数据。
引用jar包
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.yichenkeji</groupId>
<artifactId>doris-demo</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<thrift-service.version>1.0.0</thrift-service.version>
<lombok.version>1.18.26</lombok.version>
<httpclient.version>4.5.14</httpclient.version>
<libthrift.version>0.16.0</libthrift.version>
<fastjson2.version>2.0.32</fastjson2.version>
<arrow.version>13.0.0</arrow.version>
</properties>
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${httpclient.version}</version>
</dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>${libthrift.version}</version>
</dependency>
<dependency>
<groupId>org.apache.doris</groupId>
<artifactId>thrift-service</artifactId>
<version>${thrift-service.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba.fastjson2/fastjson2 -->
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2</artifactId>
<version>${fastjson2.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>${arrow.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-flight -->
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-flight</artifactId>
<version>${arrow.version}</version>
<type>pom</type>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.arrow/flight-sql -->
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>flight-sql</artifactId>
<version>${arrow.version}</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-netty</artifactId>
<version>${arrow.version}</version>
<scope>runtime</scope>
</dependency>
</dependencies>
</project>
编码实现:
package com.yichenkeji.doris;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.*;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.ipc.ArrowStreamReader;
import org.apache.arrow.vector.types.Types;
import org.apache.commons.codec.binary.Base64;
import org.apache.doris.sdk.thrift.*;
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.apache.thrift.TConfiguration;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.*;
public class QueryPlanMain {
private static String queryPlanUrlFormat = "%s/api/%s/%s/_query_plan";
public static void main(String[] args) throws Exception {
String dorisUrl = "http://127.0.0.1:8030";
String username = "root";
String password = "1234564";
String database = "article";
String table = "dim_user";
// String[] columns = {"id","name","merger_name","area_code","level"};
String[] columns = {"*"};
String sql ="select "+String.join(",",columns)+" from "+database+"."+table
// +" where id<= 10 "
// + " order by id " //不支持order by
// + " limit 10" //不支持limit
;
JSONObject queryPlanResult = getQueryPlan(dorisUrl,username,password,database,table,sql);
System.out.println("查询计划执行结果:"+queryPlanResult);
//根据查询计划结果,查询数据
if (queryPlanResult != null){
readData(queryPlanResult,username,password,database,table);
}
}
private static JSONObject getQueryPlan(String dorisUrl, String username,String password,String database, String table, String sql) throws Exception {
try (CloseableHttpClient client = HttpClients.custom().build()) {
String queryPlanUrl = String.format(queryPlanUrlFormat, dorisUrl, database, table);
HttpPost post = new HttpPost(queryPlanUrl);
System.out.println("执行查询计划url: "+queryPlanUrl);
post.setHeader(HttpHeaders.EXPECT, "100-continue");
post.setHeader(HttpHeaders.AUTHORIZATION, basicAuthHeader(username, password));
// The param is specific SQL, and the query plan is returned
Map<String, String> params = new HashMap<>();
params.put("sql", sql);
System.out.println("执行查询计划参数: "+ params);
StringEntity entity = new StringEntity(JSON.toJSONString(params));
post.setEntity(entity);
try (CloseableHttpResponse response = client.execute(post)) {
if (response.getEntity() != null) {
//解析查询计划
JSONObject queryPlanJSONObject = JSONObject.parseObject(EntityUtils.toString(response.getEntity()));
System.out.println("执行查询计划response: " + queryPlanJSONObject);
JSONObject dataJSONObject = queryPlanJSONObject.getJSONObject("data");
//查询计划异常返回exception信息
if (dataJSONObject.containsKey("exception")) {
throw new RuntimeException(dataJSONObject.getString("exception"));
}
return dataJSONObject;
}
}
}
return null;
}
private static String basicAuthHeader(String username, String password) {
final String info = username + ":" + password;
byte[] encoded = Base64.encodeBase64(info.getBytes(StandardCharsets.UTF_8));
return "Basic " + new String(encoded);
}
private static void readData(JSONObject data,String username,String password,String database,String table) throws Exception {
String queryPlan = data.getString("opaqued_query_plan");
JSONObject partitions = data.getJSONObject("partitions");
long readTotal = 0L;
long totalTime = 0L;
for (Map.Entry<String, Object> tablet : partitions.entrySet()) {
System.out.println("tablet信息:"+ tablet);
long startTime = System.currentTimeMillis();
Long tabletId = Long.parseLong(tablet.getKey());
JSONObject value = JSONObject.parseObject(JSON.toJSONString(tablet.getValue()));
//get first backend
String routingsBackend = value.getJSONArray("routings").getString(0);
String backendHost = routingsBackend.split(":")[0];
String backendPort = routingsBackend.split(":")[1];
//connect backend
TBinaryProtocol.Factory factory = new TBinaryProtocol.Factory();
TTransport transport = new TSocket(new TConfiguration(), backendHost, Integer.parseInt(backendPort));
TProtocol protocol = factory.getProtocol(transport);
TDorisExternalService.Client client = new TDorisExternalService.Client(protocol);
if (!transport.isOpen()) {
transport.open();
}
//build params
TScanOpenParams params = new TScanOpenParams();
params.cluster = "default_cluster";
params.database = database;
params.table = table;
params.tablet_ids = Collections.singletonList(tabletId);
params.opaqued_query_plan = queryPlan;
// max row number of one read batch
params.setBatchSize(1000);
params.setQueryTimeout(3600);
params.setMemLimit(2147483648L);
params.setUser(username);
params.setPasswd(password);
//open scanner
TScanOpenResult tScanOpenResult = client.openScanner(params);
if (!TStatusCode.OK.equals(tScanOpenResult.getStatus().getStatusCode())) {
throw new RuntimeException(String.format("The status of open scanner result from %s is '%s', error message is: %s.",
routingsBackend, tScanOpenResult.getStatus().getStatusCode(), tScanOpenResult.getStatus().getErrorMsgs()));
}
TScanNextBatchParams nextBatchParams = new TScanNextBatchParams();
nextBatchParams.setContextId(tScanOpenResult.getContextId());
boolean eos = false;
//read data
int offset = 0;
long nums = 0L;
while (!eos) {
nextBatchParams.setOffset(offset);
TScanBatchResult next = client.getNext(nextBatchParams);
if (!TStatusCode.OK.equals(next.getStatus().getStatusCode())) {
throw new RuntimeException(String.format("The status of get next result from %s is '%s', error message is: %s.",
routingsBackend, next.getStatus().getStatusCode(), next.getStatus().getErrorMsgs()));
}
eos = next.isEos();
if (!eos) {
int i = convertArrow(next);
offset += i;
nums += i;
}
}
readTotal += nums;
long cost_time =(System.currentTimeMillis() - startTime);
totalTime +=cost_time;
System.out.println("tabletId["+tabletId+"]任务结束,总数据量:"+nums+",总花费:"+cost_time+"ms");
//close
TScanCloseParams closeParams = new TScanCloseParams();
closeParams.setContextId(tScanOpenResult.getContextId());
client.closeScanner(closeParams);
if (transport.isOpen()) {
transport.close();
}
}
System.out.println("总任务结束,总数据量:"+readTotal+",总花费:"+totalTime+"ms");
}
private static int convertArrow(TScanBatchResult nextResult) throws Exception {
long startTime = System.currentTimeMillis();
int offset = 0;
RootAllocator rootAllocator = new RootAllocator(Integer.MAX_VALUE);
ArrowStreamReader arrowStreamReader = new ArrowStreamReader(new ByteArrayInputStream(nextResult.getRows()), rootAllocator);
VectorSchemaRoot root = arrowStreamReader.getVectorSchemaRoot();
while (arrowStreamReader.loadNextBatch()) {
List<FieldVector> fieldVectors = root.getFieldVectors();
//total data rows
int rowCountInOneBatch = root.getRowCount();
//按行获取数据
// for (int row = 0; row < rowCountInOneBatch; row++) {
// List<Object> rowData = new ArrayList<>();
// for (FieldVector fieldVector : fieldVectors) {
// Types.MinorType minorType = fieldVector.getMinorType();
// Object v = convertValue(row, minorType, fieldVector);
// rowData.add(v);
// }
System.out.println(rowData);
// }
//按列获取数据
List<Object> rowData = new ArrayList<>();
for (FieldVector fieldVector : fieldVectors) {
Types.MinorType minorType = fieldVector.getMinorType();
List<Object> filedData = new ArrayList<>();
for (int row = 0; row < rowCountInOneBatch; row++) {
Object v = convertValue(row, minorType, fieldVector);
filedData.add(v);
}
// System.out.println(filedData);
}
offset += root.getRowCount();
}
//处理完之后要关闭,否则容易内存溢出
arrowStreamReader.close();
return offset;
}
private static Object convertValue(int rowIndex,
Types.MinorType minorType,
FieldVector fieldVector) {
Object fieldValue;
switch (minorType) {
case BIT:
BitVector bitVector = (BitVector) fieldVector;
fieldValue = bitVector.isNull(rowIndex) ? null : bitVector.get(rowIndex) != 0;
break;
case TINYINT:
TinyIntVector tinyIntVector = (TinyIntVector) fieldVector;
fieldValue = tinyIntVector.isNull(rowIndex) ? null : tinyIntVector.get(rowIndex);
break;
case SMALLINT:
SmallIntVector smallIntVector = (SmallIntVector) fieldVector;
fieldValue = smallIntVector.isNull(rowIndex) ? null : smallIntVector.get(rowIndex);
break;
case INT:
IntVector intVector = (IntVector) fieldVector;
fieldValue = intVector.isNull(rowIndex) ? null : intVector.get(rowIndex);
break;
case BIGINT:
BigIntVector bigIntVector = (BigIntVector) fieldVector;
fieldValue = bigIntVector.isNull(rowIndex) ? null : bigIntVector.get(rowIndex);
break;
case FLOAT4:
Float4Vector float4Vector = (Float4Vector) fieldVector;
fieldValue = float4Vector.isNull(rowIndex) ? null : float4Vector.get(rowIndex);
break;
case FLOAT8:
Float8Vector float8Vector = (Float8Vector) fieldVector;
fieldValue = float8Vector.isNull(rowIndex) ? null : float8Vector.get(rowIndex);
break;
case VARBINARY:
VarBinaryVector varBinaryVector = (VarBinaryVector) fieldVector;
fieldValue = varBinaryVector.isNull(rowIndex) ? null : varBinaryVector.get(rowIndex);
break;
case DECIMAL:
DecimalVector decimalVector = (DecimalVector) fieldVector;
fieldValue = decimalVector.getObject(rowIndex).stripTrailingZeros();
break;
case VARCHAR:
VarCharVector date = (VarCharVector) fieldVector;
fieldValue = new String(date.get(rowIndex));
break;
case LIST:
ListVector listVector = (ListVector) fieldVector;
fieldValue = listVector.isNull(rowIndex) ? null : listVector.getObject(rowIndex);
break;
default:
fieldValue = fieldVector.isNull(rowIndex) ? null : fieldVector.getObject(rowIndex);
}
return fieldValue;
}
}
4.Spark Doris Connector
Spark Doris Connector 可以支持通过 Spark 读取 Doris 中存储的数据,也支持通过Spark写入数据到Doris。
val dorisSparkDF = spark.read.format("doris")
.option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME")
.option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT")
.option("user", "$YOUR_DORIS_USERNAME")
.option("password", "$YOUR_DORIS_PASSWORD")
.load()
dorisSparkDF.show(5)
5.Flink Doris Connector
Flink Doris Connector 可以支持通过 Flink 操作(读取、插入、修改、删除) Doris 中存储的数据。本文档介绍Flink如何通过Datastream和SQL操作Doris。
DorisOptions.Builder builder = DorisOptions.builder()
.setFenodes("FE_IP:HTTP_PORT")
.setTableIdentifier("db.table")
.setUsername("root")
.setPassword("password");
DorisSource<List<?>> dorisSource = DorisSourceBuilder.<List<?>>builder()
.setDorisOptions(builder.build())
.setDorisReadOptions(DorisReadOptions.builder().build())
.setDeserializer(new SimpleListDeserializationSchema())
.build();
env.fromSource(dorisSource, WatermarkStrategy.noWatermarks(), "doris source").print();
6.Arrow Flight SQL
Arrow Flight SQL 是一种使用 Arrow 内存格式和 Flight RPC 框架与 SQL 数据库交互的协议。在 Apache Doris 2.1 版本中,提供基于 Arrow Flight SQL 构建了高速数据传输链路,支持主流语言通过 SQL 从 Doris 高速读取大规模数据,极大提升了其他系统与 Apache Doris 间数据传输效率。
1)maven 引入flight-sql-jdbc-driver包:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.yichenkeji</groupId>
<artifactId>yichen-demo</artifactId>
<version>1.0</version>
</parent>
<artifactId>yichen-demo-doris</artifactId>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<lombok.version>1.18.26</lombok.version>
<arrow.version>15.0.2</arrow.version>
</properties>
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.arrow/flight-sql-jdbc-driver -->
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>flight-sql-jdbc-driver</artifactId>
<version>${arrow.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-jdbc -->
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-jdbc</artifactId>
<version>${arrow.version}</version>
</dependency>
</dependencies>
</project>
2)JDBC编码实现
package com.yichenkeji.demo.doris;
import java.sql.*;
import java.util.Properties;
public class ArrowFlightSqlJdbc {
public static void main(String[] args) throws SQLException {
String jdbc_url = "jdbc:arrow-flight-sql://192.168.179.134:9040";
Properties prop = new Properties();
prop.put("user", "root");
prop.put("password", "");
//需要配置该参数,否则报错
prop.put("useEncryption", false);
try(Connection conn = DriverManager.getConnection(jdbc_url,prop);
Statement stmt = conn.createStatement();
) {
System.out.println("conn: "+conn);
String sql = "select id,name from demo.dim_user";
ResultSet rs = stmt.executeQuery(sql);
System.out.println("rs: "+rs);
while (rs.next()){
System.out.println("id="+rs.getLong("id") +",name="+rs.getString("name"));
}
}catch (Exception e){
e.printStackTrace();
}
}
}