#!/usr/bin/env python
# coding=utf-8
import datetime
import re
from requests.auth import HTTPBasicAuth
import pymysql
import pyhive
import sqlalchemy
import pandas
pymysql.install_as_MySQLdb()
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
import pandas as pd
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
"""
将 sql_text 这段SQL 脚本中所使用到表提取出来
"""
def extract_tables(sql_str):
# pattern = r'(?:(?<=from|join)\s+)(?:\w+\.\w+|\w+)'
pattern = r'(?:(?<=from|join)\s+)(?:\w+\.\w+\.\w+|\w+\.\w+)' # 经过测试这个比较完美
list_tables=list(set(re.findall(pattern, str(sql_str), re.I)))
return list_tables
if __name__ == '__main__':
pattern = r'(?:(?<=from|join)\s+)(?:\w+\.\w+|\w+)'
myChannel_sit_Name=""
myChannel_sit_pwd=""
engine = create_engine('mysql+pymysql://username:password@localhost/database')
sql = """select id,title,sql_text from tableName """
df = pd.read_sql(sql, myChannel_sit_engine)
print(df.loc[:,["id","sql_text"]])
dict_={}
for row_index, row in df.loc[:,["id","sql_text"]].iterrows():
print("*"*10)
print(row_index)
sql_text=str(row[1]).replace("\\r\\n"," ").replace("\\t","")
table_list=extract_tables(sql_text)
dict_[row[0]]=table_list
print(dict_)
inser_tmp=" INSERT INTO test_table_001 (data_api_id, extract_tables) VALUES "
for k,v in dict_.items():
inser_tmp+="(\"{}\",\"{}\"), ".format(k,str(v).replace(" ",''))
inser_str=inser_tmp[:len(inser_tmp) - 2] + ";"
print(inser_str)
conn=conn=myChannel_sit_engine.connect()
conn.execute("DELETE FROM db_apple_bbi_sit.test_table_001 ;")
conn.execute(inser_str)
使用python 将 SQL脚本中的相关表 提取出来:
于 2024-08-20 16:20:00 首次发布