为了读取远程服务器的数据(ftp/sftp)和存在关系型数据库的维表进行关联、使用场景为大量的数据进行近实时的关联分析。
实际开发过程中参考网站:
1,Tino官方文档:https://trino.io/docs/current
2,官方实现htpp连接例子:https://trino.io/docs/current/develop/example-http.html
package com.eastcom.trino.plugin;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import io.airlift.slice.Slice;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.ColumnMetadata;
import io.trino.spi.connector.ConnectorMetadata;
import io.trino.spi.connector.ConnectorNewTableLayout;
import io.trino.spi.connector.ConnectorOutputMetadata;
import io.trino.spi.connector.ConnectorOutputTableHandle;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorTableHandle;
import io.trino.spi.connector.ConnectorTableMetadata;
import io.trino.spi.connector.ConnectorTableProperties;
import io.trino.spi.connector.SchemaTableName;
import io.trino.spi.connector.SchemaTablePrefix;
import io.trino.spi.security.TrinoPrincipal;
import io.trino.spi.statistics.ComputedStatistics;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static com.google.common.base.Verify.verify;
import static io.trino.spi.StandardErrorCode.ALREADY_EXISTS;
import static io.trino.spi.StandardErrorCode.NOT_FOUND;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toMap;
/**
* @author panda
* @date 2021/1/25
*/
public class UrlMetadata implements ConnectorMetadata {
public static final String DEFAULT = "default";
private final List<String> schemas = new ArrayList<>();
private final AtomicLong nextTableId = new AtomicLong();
private final Map<SchemaTableName, Long> tableIds = new ConcurrentHashMap<>();
private final Map<Long, UrlTableInfo> tables = new ConcurrentHashMap<>();
public UrlMetadata() {
this.schemas.add(DEFAULT);
}
@Nullable
@Override
public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) {
Long id = tableIds.get(tableName);
if (id == null) {
return null;
}
return new UrlTableHandle(id, tableName, tables.get(id).getProperties());
}
@Override
public Map<String, ColumnHandle> getColumnHandles(ConnectorSession session, ConnectorTableHandle tableHandle) {
ImmutableMap.Builder<String, ColumnHandle> columnHandles = ImmutableMap.builder();
UrlTableHandle urlTableHandle = (UrlTableHandle) tableHandle;
for (UrlColumnInfo column : tables.get(urlTableHandle.getId()).getColumns()) {
columnHandles.put(column.getName(), column.getHandle());
}
return columnHandles.build();
}
@Override
public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle table) {
UrlTableHandle tableHandle = (UrlTableHandle) table;
return tables.get(tableHandle.getId()).getMetadata();
}
@Override
public ColumnMetadata getColumnMetadata(
ConnectorSession session,
ConnectorTableHandle tableHandle,
ColumnHandle columnHandle) {
return ((UrlColumnHandle) columnHandle).getMetadata();
}
@Override
public List<String> listSchemaNames(ConnectorSession session) {
return ImmutableList.copyOf(this.schemas);
}
@Override
public List<SchemaTableName> listTables(ConnectorSession session, Optional<String> schemaName) {
ImmutableList.Builder<SchemaTableName> builder = ImmutableList.builder();
tables.values().stream().filter(table -> schemaName.map(table.getSchemaName()::contentEquals).orElse(true))
.map(UrlTableInfo::getSchemaTableName).forEach(builder::add);
return builder.build();
}
@Override
public Map<SchemaTableName, List<ColumnMetadata>> listTableColumns(
ConnectorSession session,
SchemaTablePrefix prefix) {
return tables.values().stream().filter(table -> prefix.matches(table.getSchemaTableName()))
.collect(toMap(UrlTableInfo::getSchemaTableName, handle -> handle.getMetadata().getColumns()));
}
@Override
public void createSchema(ConnectorSession session, String schemaName, Map<String, Object> properties,
TrinoPrincipal owner) {
if (schemas.contains(schemaName)) {
throw new TrinoException(ALREADY_EXISTS, format("Schema [%s] already exists", schemaName));
}
schemas.add(schemaName);
}
@Override
public void dropSchema(ConnectorSession session, String schemaName) {
if (!schemas.contains(schemaName)) {
throw new TrinoException(NOT_FOUND, format("Schema [%s] does not exist", schemaName));
}
List<UrlTableInfo> tables = this.tables.values().stream()
.filter(table -> table.getSchemaName().equalsIgnoreCase(schemaName))
.collect(Collectors.toList());
tables.forEach(table -> {
tableIds.remove(table.getSchemaTableName());
tables.remove(table.getId());
});
verify(schemas.remove(schemaName));
}
@Override
public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) {
SchemaTableName table = tableMetadata.getTable();
if (!schemas.contains(table.getSchemaName())) {
schemas.add(table.getSchemaName());
}
if (checkTableNotExists(table, ignoreExisting)) {
ConnectorOutputTableHandle outputTableHandle = beginCreateTable(session, tableMetadata, Optional.empty());
finishCreateTable(session, outputTableHandle, ImmutableList.of(), ImmutableList.of());
}
}
@Override
public synchronized UrlOutputTableHandle beginCreateTable(
ConnectorSession session,
ConnectorTableMetadata tableMetadata,
Optional<ConnectorNewTableLayout> layout) {
long tableId = nextTableId.getAndIncrement();
ImmutableList.Builder<UrlColumnInfo> columns = ImmutableList.builder();
List<ColumnMetadata> columnMetadataList = tableMetadata.getColumns();
IntStream.range(0, columnMetadataList.size()).mapToObj(i -> new UrlColumnInfo(columnMetadataList.get(i), i)).forEach(columns::add);
tableIds.put(tableMetadata.getTable(), tableId);
tables.put(tableId, new UrlTableInfo(tableId, columns.build(), tableMetadata.getTable().getSchemaName(),
tableMetadata.getTable().getTableName(), tableMetadata.getProperties()));
return new UrlOutputTableHandle(tableId, ImmutableSet.copyOf(tableIds.values()));
}
@Override
public synchronized Optional<ConnectorOutputMetadata> finishCreateTable(
ConnectorSession session,
ConnectorOutputTableHandle tableHandle, Collection<Slice> fragments,
Collection<ComputedStatistics> computedStatistics) {
requireNonNull(tableHandle, "tableHandle is null");
return Optional.empty();
}
@Override
public void dropTable(ConnectorSession session, ConnectorTableHandle tableHandle) {
UrlTableHandle handle = (UrlTableHandle) tableHandle;
UrlTableInfo info = tables.remove(handle.getId());
if (info != null) {
tableIds.remove(info.getSchemaTableName());
}
}
@Override
public boolean usesLegacyTableLayouts() {
return false;
}
@Override
public Optional<Object> getInfo(ConnectorTableHandle table) {
return Optional.empty();
}
@Override
public ConnectorTableProperties getTableProperties(ConnectorSession session, ConnectorTableHandle table) {
return new ConnectorTableProperties();
}
private boolean checkTableNotExists(SchemaTableName tableName, boolean ignoreExisting) {
if (tableIds.containsKey(tableName)) {
if (ignoreExisting) {
return false;
} else {
throw new TrinoException(ALREADY_EXISTS, format("Table [%s] already exists", tableName.toString()));
}
}
return true;
}
}
package com.eastcom.trino.plugin;
import com.google.inject.Injector;
import io.airlift.bootstrap.Bootstrap;
import io.trino.spi.NodeManager;
import io.trino.spi.connector.Connector;
import io.trino.spi.connector.ConnectorContext;
import io.trino.spi.connector.ConnectorFactory;
import io.trino.spi.connector.ConnectorHandleResolver;
import java.util.Map;
import static java.util.Objects.requireNonNull;
/**
* @author panda
* @date 2021/1/25
*/
public class UrlConnectorFactory implements ConnectorFactory {
public static final String NAME = "url";
@Override
public String getName() {
return NAME;
}
@Override
public Connector create(String catalogName, Map<String, String> config, ConnectorContext context) {
requireNonNull(config, "config is null");
Bootstrap app = new Bootstrap(
binder -> binder.bind(NodeManager.class).toInstance(context.getNodeManager()),
new UrlModule());
Injector injector = app
.strictConfig()
.doNotInitializeLogging()
.setRequiredConfigurationProperties(config)
.initialize();
return injector.getInstance(UrlConnector.class);
}
@Override
public ConnectorHandleResolver getHandleResolver() {
return new UrlConnectorHandleResolver();
}
}
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.eastcom.trino.plugin;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorSplitManager;
import io.trino.spi.connector.ConnectorSplitSource;
import io.trino.spi.connector.ConnectorTableHandle;
import io.trino.spi.connector.ConnectorTransactionHandle;
import io.trino.spi.connector.DynamicFilter;
import io.trino.spi.connector.FixedSplitSource;
import java.util.stream.Collectors;
public class UrlSplitManager implements ConnectorSplitManager {
@Override
public ConnectorSplitSource getSplits(
ConnectorTransactionHandle transactionHandle, ConnectorSession session,
ConnectorTableHandle table, SplitSchedulingStrategy splitSchedulingStrategy, DynamicFilter dynamicFilter) {
return new FixedSplitSource(
((UrlTableHandle) table).sources().stream()
.map(url -> new UrlSplit(url)).collect(Collectors.toList()));
}
}
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.eastcom.trino.plugin;
import com.google.common.base.Joiner;
import io.airlift.log.Logger;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.RecordCursor;
import io.trino.spi.type.DecimalType;
import io.trino.spi.type.Decimals;
import io.trino.spi.type.StandardTypes;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.TimestampWithTimeZoneType;
import io.trino.spi.type.Type;
import io.trino.spi.type.VarcharType;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.csv.QuoteMode;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.math.BigDecimal;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.time.ZoneId;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.stream.IntStream;
import java.util.zip.GZIPInputStream;
import static com.google.common.base.Preconditions.checkArgument;
import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.BooleanType.BOOLEAN;
import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone;
import static io.trino.spi.type.DoubleType.DOUBLE;
import static io.trino.spi.type.IntegerType.INTEGER;
import static io.trino.spi.type.VarcharType.VARCHAR;
import static io.trino.spi.type.Varchars.truncateToLength;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.math.RoundingMode.HALF_UP;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.TimeUnit.SECONDS;
/**
* @author panda
*/
public class UrlRecordCursor implements RecordCursor {
private Logger log = Logger.get(UrlRecordCursor.class);
private final List<UrlColumnHandle> columns;
private CSVParser parser;
private Iterator<CSVRecord> iterator;
private Integer[] headerIndexes;
private CSVRecord current;
public UrlRecordCursor(String url, UrlTableHandle table, List<UrlColumnHandle> columns) {
this.columns = requireNonNull(columns, "columns is null");
this.headerIndexes = new Integer[columns.size()];
try {
final InputStream input = new URL(url).openStream();
final CSVFormat csvFormat = CSVFormat.newFormat(table.delimiterChar())
.withQuote(table.quoteChar())
.withHeader()
.withIgnoreEmptyLines()
.withQuoteMode(QuoteMode.MINIMAL);
this.parser = url.endsWith(".gz")
? csvFormat.parse(new InputStreamReader(new GZIPInputStream(input), StandardCharsets.UTF_8))
: csvFormat.parse(new InputStreamReader(input, StandardCharsets.UTF_8));
Map<String, Integer> headerMap = parser.getHeaderMap();
log.info("列信息::" + columns.toString());
log.info("文件头::" + parser.getHeaderMap());
IntStream.range(0, columns.size()).forEach(i -> {
headerIndexes[i] = headerMap.get(columns.get(i).getTitleName());
log.info("映射关系::headerIndexes[" + i + "]::" + headerIndexes[i]);
});
this.iterator = this.parser.iterator();
} catch (Exception e) {
if (this.parser != null) {
try {
this.parser.close();
} catch (IOException ex) {
// Ignore
}
}
this.iterator = Collections.emptyIterator();
throw new TrinoException(GENERIC_INTERNAL_ERROR, "open url: " + url + "failed", e);
}
}
@Override
public long getCompletedBytes() {
return 0;
}
@Override
public long getReadTimeNanos() {
return 0;
}
@Override
public Type getType(int field) {
checkArgument(field < columns.size(), "Invalid field index");
return columns.get(field).getColumnType();
}
@Override
public boolean advanceNextPosition() {
if (iterator.hasNext()) {
this.current = iterator.next();
return true;
}
return false;
}
@Override
public boolean getBoolean(int field) {
checkFieldType(field, BOOLEAN);
return Boolean.parseBoolean(getFieldValue(field));
}
@Override
public long getLong(int field) {
Type actual = getType(field);
UrlColumnHandle handle = columns.get(field);
try {
if (StandardTypes.DECIMAL.equals(actual.getBaseName())) {
BigDecimal value = (BigDecimal) handle.getNumberFormat().parse(getFieldValue(field));
return Decimals.encodeShortScaledValue(value, ((DecimalType) actual).getScale(), HALF_UP);
}
if (TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS.equals(actual)) {
OffsetDateTime time = OffsetDateTime.parse(getFieldValue(field), handle.getDateFormat())
.plus(500, ChronoUnit.MICROS).truncatedTo(ChronoUnit.MILLIS);
long epochMillis = time.toInstant().toEpochMilli();
int offsetMinutes = toIntExact(SECONDS.toMinutes(time.getOffset().getTotalSeconds()));
return packDateTimeWithZone(epochMillis, offsetMinutes);
}
if (TimestampType.TIMESTAMP_MILLIS.equals(actual)) {
long epochMilli = LocalDateTime.parse(getFieldValue(field), handle.getDateFormat())
.atZone(ZoneId.systemDefault()).toInstant().toEpochMilli();
return epochMilli * 1000;
}
checkFieldType(field, BIGINT, INTEGER);
return handle.getNumberFormat().parse(getFieldValue(field)).longValue();
} catch (ParseException e) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, e);
}
}
@Override
public double getDouble(int field) {
checkFieldType(field, DOUBLE);
UrlColumnHandle handle = columns.get(field);
try {
return handle.getNumberFormat().parse(getFieldValue(field)).doubleValue();
} catch (ParseException e) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, e);
}
}
@Override
public Slice getSlice(int field) {
Type actual = getType(field);
try {
if (actual instanceof VarcharType) {
return truncateToLength(Slices.utf8Slice(getFieldValue(field)), actual);
}
if (StandardTypes.DECIMAL.equals(actual.getBaseName())) {
UrlColumnHandle handle = columns.get(field);
BigDecimal value = (BigDecimal) handle.getNumberFormat().parse(getFieldValue(field));
return Decimals.encodeScaledValue(value, ((DecimalType) actual).getScale(), HALF_UP);
}
} catch (ParseException e) {
throw new TrinoException(GENERIC_INTERNAL_ERROR, e);
}
checkFieldType(field, VARCHAR);
return Slices.utf8Slice(getFieldValue(field));
}
@Override
public Object getObject(int field) {
throw new UnsupportedOperationException();
}
@Override
public boolean isNull(int field) {
checkArgument(field < columns.size(), "Invalid field index");
String fieldValue = getFieldValue(field);
return "null".equalsIgnoreCase(fieldValue) || fieldValue.isBlank();
}
private String getFieldValue(int field) {
return current.get(headerIndexes[field]);
}
@Override
public void close() {
if (this.parser != null) {
try {
this.parser.close();
} catch (IOException e) {
// Ignore
}
}
}
private void checkFieldType(int field, Type... expected) {
Type actual = getType(field);
if (Arrays.stream(expected).anyMatch(actual::equals)) {
return;
}
throw new IllegalArgumentException(
format("Expected field %s to be type %s but is %s", field, Joiner.on(", ").join(expected), actual));
}
}