输入表:select 查询涉及到的表
输出表:insert涉及到的表
需求:现在我们项目需要对sql操作的表加权限控制,要对表设置读写权限,输入表要有读权限,输出表要有写权限,所以我们要解析出一个sql的输入,输出表
IOPlan ioplan = null;
try {
String sqlExecutePlan = "explain (TYPE IO, FORMAT JSON) "+sqlContent;
ioplan = (IOPlan)bdpDataSourceService.executeSql(conn,sqlExecutePlan,PrestoSqlPardeUtil.result,unclose);
}catch (Exception e){
e.printStackTrace();
throw new ApplicationException(BDPResponseCode.ERROR.getCode(),"prestosql解析失败:"+e.getMessage());
}
Set<String> jobInputTableSet = new HashSet<>();
Set<String> jobOutTableSet = new HashSet<>();
if(ioplan != null){
ioplan.getInputTableColumnInfos().forEach(x->{
jobInputTableSet.add(x.getTable().getSchemaTable().getSchema()+"."+x.getTable().getSchemaTable().getTable());
});
if(ioplan.getOutputTable() != null){
jobOutTableSet.add(ioplan.getOutputTable().getSchemaTable().getSchema()+"."+ioplan.getOutputTable().getSchemaTable().getTable());
}
}
List<String> jobInputTableList = new ArrayList<>(jobInputTableSet);
List<String> jobOutTableList = new ArrayList<>(jobOutTableSet);
return getBdpInnerLineageVO(jobInputTableList, jobOutTableList);
当然我们可以用presto sqlparse那个模块来进行静态解析,就是不需要去创建connection,但是由于时间问题,最终使用explain (TYPE IO, FORMAT JSON)来进行解析
但是有个缺点,就是必须需要通过jdbc来创建prestosql连接
比如:
explain (TYPE IO, FORMAT JSON) insert into rdm.crm_cust_tmp_20201124_1 select * from rdm.crm_cust_tmp_20201125 where id = '20354714';
会返回:
{
"inputTableColumnInfos" : [ {
"table" : {
"catalog" : "hive",
"schemaTable" : {
"schema" : "rdm",
"table" : "crm_cust_tmp_20201125"
}
},
"columnConstraints" : [ {
"columnName" : "id",
"type" : "varchar",
"domain" : {
"nullsAllowed" : false,
"ranges" : [ {
"low" : {
"value" : "20354714",
"bound" : "EXACTLY"
},
"high" : {
"value" : "20354714",
"bound" : "EXACTLY"
}
} ]
}
} ],
"estimate" : {
"outputRowCount" : "NaN",
"outputSizeInBytes" : "NaN",
"cpuCost" : "NaN",
"maxMemory" : 0.0,
"networkCost" : 0.0
}
} ],
"outputTable" : {
"catalog" : "hive",
"schemaTable" : {
"schema" : "rdm",
"table" : "crm_cust_tmp_20201124_1"
}
},
"estimate" : {
"outputRowCount" : "NaN",
"outputSizeInBytes" : "NaN",
"cpuCost" : "NaN",
"maxMemory" : "NaN",
"networkCost" : "NaN"
}
}
我们可以定义一些类封装这些数据:
public class IOPlan
{
private final Set<TableColumnInfo> inputTableColumnInfos;
private final CatalogSchemaTableName outputTable;
public IOPlan(
Set<TableColumnInfo> inputTableColumnInfos,
CatalogSchemaTableName outputTable)
{
this.inputTableColumnInfos = inputTableColumnInfos;
this.outputTable = outputTable;
}
}
@Data
public class SchemaTableName {
private final String schema;
private final String table;
public SchemaTableName(String schema, String table) {
this.schema = schema;
this.table = table;
}
}
final class SchemaUtil
{
private SchemaUtil()
{
}
static String checkNotEmpty(String value, String name)
{
if (value == null) {
throw new NullPointerException(name + " is null");
}
if (value.isEmpty()) {
throw new IllegalArgumentException(name + " is empty");
}
return value;
}
}
@Data
public class TableColumnInfo
{
private final CatalogSchemaTableName table;
private final Set<ColumnConstraint> columnConstraints;
public TableColumnInfo(
CatalogSchemaTableName table,
Set<ColumnConstraint> columnConstraints)
{
this.table = table;
this.columnConstraints = columnConstraints;
}
}
@Data
public class FormattedRange
{
private final FormattedMarker low;
private final FormattedMarker high;
public FormattedRange(
FormattedMarker low,
FormattedMarker high)
{
this.low = low;
this.high = high;
}
}
@Data
public class FormattedMarker
{
private final String value;
private final Bound bound;
public FormattedMarker(
String value,
Bound bound)
{
this.value = value;
this.bound = bound;
}
}
@Data
public class FormattedDomain
{
private final boolean nullsAllowed;
private final Set<FormattedRange> ranges;
public FormattedDomain(
boolean nullsAllowed,
Set<FormattedRange> ranges)
{
this.nullsAllowed = nullsAllowed;
this.ranges = ranges;
}
}
@Data
public class ColumnConstraint
{
private final String columnName;
private final String type;
private final FormattedDomain domain;
public ColumnConstraint(
String columnName,
String type,
FormattedDomain domain)
{
this.columnName = columnName;
this.type = type;
this.domain = domain;
}
}
@Data
public class CatalogSchemaTableName
{
private final String catalog;
private final SchemaTableName schemaTable;
public CatalogSchemaTableName(
String catalog,
SchemaTableName schemaTable)
{
this.catalog = catalog;
this.schemaTable = schemaTable;
}
}
public enum Bound
{
BELOW, // lower than the value, but infinitesimally close to the value
EXACTLY, // exactly the value
ABOVE // higher than the value, but infinitesimally close to the value
}