import os
import re
# import pandas as pd
path_sql_file = "/u/users/vn55gxn/py_file/python_source_file_sql"
# path_sql_file = "/u/users/vn55gxn/py_from_file/from_soure"
path_target_file = "/u/users/vn55gxn/py_from_file/from_target"
# path_excle_file = "/u/users/vn55gxn/py_from_file/python_source_file_excle/python_repace.xlsx"
def search_file(path_sql_file):
pattern_from = r'from\s+(.*?)\s+' #匹配from 后面两个空格之间的内容
pattern_join = r'join\s+(.*?)\s+'
pattern_tmp = re.compile(r'^(?!.*(?:tmp|temp)).*$') # 过滤了tmp|temp 这些表
pattern_all = r'from\s+(.*?)\s+|join\s+(.*?)\s+'
match_from_list =[]
with open(path_sql_file, "r") as file_source:
lines = file_source.readlines()
file_source.close()
# with open(path_target_file, "w") as file_target: # 不存在会自动创建
# file_target.write('') # 写入前先清空目标文件
for line in lines:
match_from = re.search(pattern_from,line)
if match_from:
if len(match_from.group(1))>10 and re.findall(pattern_tmp,match_from.group(1)): #过滤一些不正确的匹配
# file_target.write(match_from.group(1)+"\n") #写入匹配的结果
match_from_list.append(match_from.group(1))
# print(match_from.group(1))
match_join = re.search(pattern_join,line)
if match_join:
if len(match_join.group(1))>10 and re.findall(pattern_tmp,match_join.group(1)): #过滤一些不正确的匹配
# file_target.write(match_join.group(1)+"\n") #写入匹配的结果
match_from_list.append(match_join.group(1))
# print(match_join.group(1))
match_from_set=set(match_from_list) #set去重
for i in match_from_set:
print(i)
# file_target.close()
if __name__ == '__main__':
print("开始------------------------------------")
list_dir = os.listdir(path_sql_file)
for i in list_dir:
path_sql_file_dtl = path_sql_file + r"/" + i
# path_target_file_dtl = path_target_file + r"/" + i
#替换开始
# search_file(path_sql_file_dtl,path_target_file_dtl)
search_file(path_sql_file_dtl)
print(path_sql_file_dtl + " 已完成")
print("\n")
# Python 字典(Dictionary) items() 函数以列表返回可遍历的(键, 值) 元组数组。
查询sql脚本的所有来源表
最新推荐文章于 2024-06-14 23:48:57 发布