Trino连接器扩展(URL)

为了读取远程服务器的数据(ftp/sftp)和存在关系型数据库的维表进行关联、使用场景为大量的数据进行近实时的关联分析。

 

实际开发过程中参考网站:

1,Tino官方文档:https://trino.io/docs/current

2,官方实现htpp连接例子:https://trino.io/docs/current/develop/example-http.html

 

package com.eastcom.trino.plugin;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import io.airlift.slice.Slice;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.ColumnMetadata;
import io.trino.spi.connector.ConnectorMetadata;
import io.trino.spi.connector.ConnectorNewTableLayout;
import io.trino.spi.connector.ConnectorOutputMetadata;
import io.trino.spi.connector.ConnectorOutputTableHandle;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorTableHandle;
import io.trino.spi.connector.ConnectorTableMetadata;
import io.trino.spi.connector.ConnectorTableProperties;
import io.trino.spi.connector.SchemaTableName;
import io.trino.spi.connector.SchemaTablePrefix;
import io.trino.spi.security.TrinoPrincipal;
import io.trino.spi.statistics.ComputedStatistics;

import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static com.google.common.base.Verify.verify;
import static io.trino.spi.StandardErrorCode.ALREADY_EXISTS;
import static io.trino.spi.StandardErrorCode.NOT_FOUND;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toMap;

/**
 * @author panda
 * @date 2021/1/25
 */
public class UrlMetadata implements ConnectorMetadata {

    public static final String DEFAULT = "default";
    private final List<String> schemas = new ArrayList<>();
    private final AtomicLong nextTableId = new AtomicLong();
    private final Map<SchemaTableName, Long> tableIds = new ConcurrentHashMap<>();
    private final Map<Long, UrlTableInfo> tables = new ConcurrentHashMap<>();

    public UrlMetadata() {
        this.schemas.add(DEFAULT);
    }

    @Nullable
    @Override
    public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) {
        Long id = tableIds.get(tableName);
        if (id == null) {
            return null;
        }
        return new UrlTableHandle(id, tableName, tables.get(id).getProperties());
    }

    @Override
    public Map<String, ColumnHandle> getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) {
        ImmutableMap.Builder<String, ColumnHandle> columnHandles = ImmutableMap.builder();
        UrlTableHandle urlTableHandle = (UrlTableHandle) tableHandle;
        for (UrlColumnInfo column : tables.get(urlTableHandle.getId()).getColumns()) {
            columnHandles.put(column.getName(), column.getHandle());
        }
        return columnHandles.build();
    }

    @Override
    public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) {
        UrlTableHandle tableHandle = (UrlTableHandle) table;
        return tables.get(tableHandle.getId()).getMetadata();
    }

    @Override
    public ColumnMetadata getColumnMetadata(
            ConnectorSession session,
            ConnectorTableHandle tableHandle,
            ColumnHandle columnHandle) {
        return ((UrlColumnHandle) columnHandle).getMetadata();
    }

    @Override
    public List<String> listSchemaNames(ConnectorSession session) {
        return ImmutableList.copyOf(this.schemas);
    }

    @Override
    public List<SchemaTableName> listTables(ConnectorSession session, Optional<String> schemaName) {
        ImmutableList.Builder<SchemaTableName> builder = ImmutableList.builder();
        tables.values().stream().filter(table -> schemaName.map(table.getSchemaName()::contentEquals).orElse(true))
                .map(UrlTableInfo::getSchemaTableName).forEach(builder::add);
        return builder.build();
    }

    @Override
    public Map<SchemaTableName, List<ColumnMetadata>> listTableColumns(
            ConnectorSession session,
            SchemaTablePrefix prefix) {
        return tables.values().stream().filter(table -> prefix.matches(table.getSchemaTableName()))
                .collect(toMap(UrlTableInfo::getSchemaTableName, handle -> handle.getMetadata().getColumns()));
    }

    @Override
    public void createSchema(ConnectorSession session, String schemaName, Map<String, Object> properties,
                             TrinoPrincipal owner) {
        if (schemas.contains(schemaName)) {
            throw new TrinoException(ALREADY_EXISTS, format("Schema [%s] already exists", schemaName));
        }
        schemas.add(schemaName);
    }

    @Override
    public void dropSchema(ConnectorSession session, String schemaName) {
        if (!schemas.contains(schemaName)) {
            throw new TrinoException(NOT_FOUND, format("Schema [%s] does not exist", schemaName));
        }

        List<UrlTableInfo> tables = this.tables.values().stream()
                .filter(table -> table.getSchemaName().equalsIgnoreCase(schemaName))
                .collect(Collectors.toList());
        tables.forEach(table -> {
            tableIds.remove(table.getSchemaTableName());
            tables.remove(table.getId());
        });

        verify(schemas.remove(schemaName));
    }

    @Override
    public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) {
        SchemaTableName table = tableMetadata.getTable();
        if (!schemas.contains(table.getSchemaName())) {
            schemas.add(table.getSchemaName());
        }

        if (checkTableNotExists(table, ignoreExisting)) {
            ConnectorOutputTableHandle outputTableHandle = beginCreateTable(session, tableMetadata, Optional.empty());
            finishCreateTable(session, outputTableHandle, ImmutableList.of(), ImmutableList.of());
        }
    }

    @Override
    public synchronized UrlOutputTableHandle beginCreateTable(
            ConnectorSession session,
            ConnectorTableMetadata tableMetadata,
            Optional<ConnectorNewTableLayout> layout) {
        long tableId = nextTableId.getAndIncrement();

        ImmutableList.Builder<UrlColumnInfo> columns = ImmutableList.builder();
        List<ColumnMetadata> columnMetadataList = tableMetadata.getColumns();
        IntStream.range(0, columnMetadataList.size()).mapToObj(i -> new UrlColumnInfo(columnMetadataList.get(i), i)).forEach(columns::add);

        tableIds.put(tableMetadata.getTable(), tableId);
        tables.put(tableId, new UrlTableInfo(tableId, columns.build(), tableMetadata.getTable().getSchemaName(),
                tableMetadata.getTable().getTableName(), tableMetadata.getProperties()));
        return new UrlOutputTableHandle(tableId, ImmutableSet.copyOf(tableIds.values()));
    }

    @Override
    public synchronized Optional<ConnectorOutputMetadata> finishCreateTable(
            ConnectorSession session,
            ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments,
            Collection<ComputedStatistics> computedStatistics) {
        requireNonNull(tableHandle, "tableHandle is null");
        return Optional.empty();
    }

    @Override
    public void dropTable(ConnectorSession session, ConnectorTableHandle tableHandle) {
        UrlTableHandle handle = (UrlTableHandle) tableHandle;
        UrlTableInfo info = tables.remove(handle.getId());
        if (info != null) {
            tableIds.remove(info.getSchemaTableName());
        }
    }

    @Override
    public boolean usesLegacyTableLayouts() {
        return false;
    }

    @Override
    public Optional<Object> getInfo(ConnectorTableHandle table) {
        return Optional.empty();
    }

    @Override
    public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) {
        return new ConnectorTableProperties();
    }

    private boolean checkTableNotExists(SchemaTableName tableName, boolean ignoreExisting) {
        if (tableIds.containsKey(tableName)) {
            if (ignoreExisting) {
                return false;
            } else {
                throw new TrinoException(ALREADY_EXISTS, format("Table [%s] already exists", tableName.toString()));
            }
        }
        return true;
    }
}
package com.eastcom.trino.plugin;

import com.google.inject.Injector;
import io.airlift.bootstrap.Bootstrap;
import io.trino.spi.NodeManager;
import io.trino.spi.connector.Connector;
import io.trino.spi.connector.ConnectorContext;
import io.trino.spi.connector.ConnectorFactory;
import io.trino.spi.connector.ConnectorHandleResolver;

import java.util.Map;

import static java.util.Objects.requireNonNull;

/**
 * @author panda
 * @date 2021/1/25
 */
public class UrlConnectorFactory implements ConnectorFactory {
    
    public static final String NAME = "url";
    
    @Override
    public String getName() {
        return NAME;
    }
    
    @Override
    public Connector create(String catalogName, Map<String, String> config, ConnectorContext context) {
        requireNonNull(config, "config is null");
        Bootstrap app = new Bootstrap(
                binder -> binder.bind(NodeManager.class).toInstance(context.getNodeManager()),
                new UrlModule());
    
        Injector injector = app
                .strictConfig()
                .doNotInitializeLogging()
                .setRequiredConfigurationProperties(config)
                .initialize();
        
        return injector.getInstance(UrlConnector.class);
    }
    
    @Override
    public ConnectorHandleResolver getHandleResolver() {
        return new UrlConnectorHandleResolver();
    }
}
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.eastcom.trino.plugin;

import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorSplitManager;
import io.trino.spi.connector.ConnectorSplitSource;
import io.trino.spi.connector.ConnectorTableHandle;
import io.trino.spi.connector.ConnectorTransactionHandle;
import io.trino.spi.connector.DynamicFilter;
import io.trino.spi.connector.FixedSplitSource;

import java.util.stream.Collectors;

public class UrlSplitManager implements ConnectorSplitManager {
    
    @Override
    public ConnectorSplitSource getSplits(
            ConnectorTransactionHandle transactionHandle, ConnectorSession session,
            ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
        return new FixedSplitSource(
                ((UrlTableHandle) table).sources().stream()
                        .map(url -> new UrlSplit(url)).collect(Collectors.toList()));
    }
}
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.eastcom.trino.plugin;

import com.google.common.base.Joiner;
import io.airlift.log.Logger;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.RecordCursor;
import io.trino.spi.type.DecimalType;
import io.trino.spi.type.Decimals;
import io.trino.spi.type.StandardTypes;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.TimestampWithTimeZoneType;
import io.trino.spi.type.Type;
import io.trino.spi.type.VarcharType;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.csv.QuoteMode;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.math.BigDecimal;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.time.ZoneId;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.stream.IntStream;
import java.util.zip.GZIPInputStream;

import static com.google.common.base.Preconditions.checkArgument;
import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.BooleanType.BOOLEAN;
import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone;
import static io.trino.spi.type.DoubleType.DOUBLE;
import static io.trino.spi.type.IntegerType.INTEGER;
import static io.trino.spi.type.VarcharType.VARCHAR;
import static io.trino.spi.type.Varchars.truncateToLength;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.math.RoundingMode.HALF_UP;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.TimeUnit.SECONDS;

/**
 * @author panda
 */
public class UrlRecordCursor implements RecordCursor {
    private Logger log = Logger.get(UrlRecordCursor.class);
    private final List<UrlColumnHandle> columns;
    private CSVParser parser;
    private Iterator<CSVRecord> iterator;
    private Integer[] headerIndexes;
    private CSVRecord current;

    public UrlRecordCursor(String url, UrlTableHandle table, List<UrlColumnHandle> columns) {
        this.columns = requireNonNull(columns, "columns is null");
        this.headerIndexes = new Integer[columns.size()];
        try {
            final InputStream input = new URL(url).openStream();
            final CSVFormat csvFormat = CSVFormat.newFormat(table.delimiterChar())
                    .withQuote(table.quoteChar())
                    .withHeader()
                    .withIgnoreEmptyLines()
                    .withQuoteMode(QuoteMode.MINIMAL);
            this.parser = url.endsWith(".gz")
                    ? csvFormat.parse(new InputStreamReader(new GZIPInputStream(input), StandardCharsets.UTF_8))
                    : csvFormat.parse(new InputStreamReader(input, StandardCharsets.UTF_8));
            Map<String, Integer> headerMap = parser.getHeaderMap();
            log.info("列信息::" + columns.toString());
            log.info("文件头::" + parser.getHeaderMap());
            IntStream.range(0, columns.size()).forEach(i -> {
                headerIndexes[i] = headerMap.get(columns.get(i).getTitleName());
                log.info("映射关系::headerIndexes[" + i + "]::" + headerIndexes[i]);
            });
            this.iterator = this.parser.iterator();
        } catch (Exception e) {
            if (this.parser != null) {
                try {
                    this.parser.close();
                } catch (IOException ex) {
                    // Ignore
                }
            }
            this.iterator = Collections.emptyIterator();
            throw new TrinoException(GENERIC_INTERNAL_ERROR, "open url: " + url + "failed", e);
        }
    }

    @Override
    public long getCompletedBytes() {
        return 0;
    }

    @Override
    public long getReadTimeNanos() {
        return 0;
    }

    @Override
    public Type getType(int field) {
        checkArgument(field < columns.size(), "Invalid field index");
        return columns.get(field).getColumnType();
    }

    @Override
    public boolean advanceNextPosition() {
        if (iterator.hasNext()) {
            this.current = iterator.next();
            return true;
        }
        return false;
    }

    @Override
    public boolean getBoolean(int field) {
        checkFieldType(field, BOOLEAN);
        return Boolean.parseBoolean(getFieldValue(field));
    }

    @Override
    public long getLong(int field) {
        Type actual = getType(field);
        UrlColumnHandle handle = columns.get(field);
        try {
            if (StandardTypes.DECIMAL.equals(actual.getBaseName())) {
                BigDecimal value = (BigDecimal) handle.getNumberFormat().parse(getFieldValue(field));
                return Decimals.encodeShortScaledValue(value, ((DecimalType) actual).getScale(), HALF_UP);
            }

            if (TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS.equals(actual)) {
                OffsetDateTime time = OffsetDateTime.parse(getFieldValue(field), handle.getDateFormat())
                        .plus(500, ChronoUnit.MICROS).truncatedTo(ChronoUnit.MILLIS);
                long epochMillis = time.toInstant().toEpochMilli();
                int offsetMinutes = toIntExact(SECONDS.toMinutes(time.getOffset().getTotalSeconds()));
                return packDateTimeWithZone(epochMillis, offsetMinutes);
            }

            if (TimestampType.TIMESTAMP_MILLIS.equals(actual)) {
                long epochMilli = LocalDateTime.parse(getFieldValue(field), handle.getDateFormat())
                        .atZone(ZoneId.systemDefault()).toInstant().toEpochMilli();
                return epochMilli * 1000;
            }

            checkFieldType(field, BIGINT, INTEGER);
            return handle.getNumberFormat().parse(getFieldValue(field)).longValue();
        } catch (ParseException e) {
            throw new TrinoException(GENERIC_INTERNAL_ERROR, e);
        }
    }

    @Override
    public double getDouble(int field) {
        checkFieldType(field, DOUBLE);
        UrlColumnHandle handle = columns.get(field);
        try {
            return handle.getNumberFormat().parse(getFieldValue(field)).doubleValue();
        } catch (ParseException e) {
            throw new TrinoException(GENERIC_INTERNAL_ERROR, e);
        }
    }

    @Override
    public Slice getSlice(int field) {
        Type actual = getType(field);
        try {
            if (actual instanceof VarcharType) {
                return truncateToLength(Slices.utf8Slice(getFieldValue(field)), actual);
            }
            if (StandardTypes.DECIMAL.equals(actual.getBaseName())) {
                UrlColumnHandle handle = columns.get(field);
                BigDecimal value = (BigDecimal) handle.getNumberFormat().parse(getFieldValue(field));
                return Decimals.encodeScaledValue(value, ((DecimalType) actual).getScale(), HALF_UP);
            }
        } catch (ParseException e) {
            throw new TrinoException(GENERIC_INTERNAL_ERROR, e);
        }
        checkFieldType(field, VARCHAR);
        return Slices.utf8Slice(getFieldValue(field));
    }

    @Override
    public Object getObject(int field) {
        throw new UnsupportedOperationException();
    }

    @Override
    public boolean isNull(int field) {
        checkArgument(field < columns.size(), "Invalid field index");
        String fieldValue = getFieldValue(field);
        return "null".equalsIgnoreCase(fieldValue) || fieldValue.isBlank();
    }

    private String getFieldValue(int field) {
        return current.get(headerIndexes[field]);
    }

    @Override
    public void close() {
        if (this.parser != null) {
            try {
                this.parser.close();
            } catch (IOException e) {
                // Ignore
            }
        }
    }

    private void checkFieldType(int field, Type... expected) {
        Type actual = getType(field);
        if (Arrays.stream(expected).anyMatch(actual::equals)) {
            return;
        }
        throw new IllegalArgumentException(
                format("Expected field %s to be type %s but is %s", field, Joiner.on(", ").join(expected), actual));
    }
}

 

  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
您好!要使用Trino连接Elasticsearch(ES)数据库,您需要安装并配置Trino插件。以下是一些步骤来帮助您完成这个过程: 1. 首先,下载Trino的Elasticsearch插件。可以从Trino官方插件存储库(https://github.com/trinodb/trino/tree/main/plugin/elasticsearch)中获取插件的源代码。 2. 解压下载的插件源代码,并导航到源代码目录。 3. 在源代码目录中,运行以下命令来构建插件: ``` ./mvnw clean install ``` 这将编译插件并构建一个JAR文件。 4. 将构建的JAR文件复制到Trino服务器的插件目录。默认情况下,该目录位于Trino服务器的`plugin`目录下。 ``` cp target/elasticsearch-<version>.jar /path/to/trino/plugin/ ``` 注意, `<version>`应该替换为您构建的插件的实际版本号。 5. 修改Trino服务器的配置文件(通常是`etc/config.properties`),添加以下配置项: ``` connector.name=elasticsearch elasticsearch.nodes=<es_host>:<es_port> ``` 其中,`<es_host>`和`<es_port>`应替换为您的Elasticsearch数据库的主机和端口。 6. 保存并关闭配置文件后,重新启动Trino服务器,使更改生效。 现在,您已经成功配置了Trino连接到Elasticsearch数据库。您可以通过Trino的查询语法和功能来查询和操作ES数据库中的数据。请注意,您还可以根据需要进行其他配置,例如设置索引和类型映射等。 希望这可以帮助您!如有其他问题,请随时提问。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值