今天写代码时,编译不过去,最后发现,配对符写丢了一个:
import sys
import re
import codecs
class TestCls(object):
def __init__(self):
return None
def ReadSqlFile(self, filename, encoding):
with codecs.open(filename=filename, mode='r', encoding=encoding) as f:
content = f.read()
# sql文件,以"--"作为注释符(暂不支持其他注释符),解析文件之前,先把注释替换掉,否则影响解析
# 正则表达式,零宽断言,零宽度正预测先行断言
content = re.sub(r"--.*(?=[\r\n]+?)", "", content)
# 以"CREATE TABLE IF NOT EXISTS"作为建表的开始标志,以");"作为结束标志
patternTable = "(" + """CREATE TABLE IF NOT EXISTS[ \t]+""" + """(?P<tableName>[a-zA-Z0-9_]+)""" + """[ \t\r\n]*\(""" + """(?P<allFieldContent>[^;]+)""" + """\);""" + ")"
# 定义字段时必须要带"NULL"或"NOT NULL",否则解析不出来,例:
# [colName] [colType] [NULL 或 NOT NULL] [后续字段(PRIMARY KEY等)]
patternField = """([ \t]*(?P<colName>[a-zA-Z0-9_]+)[ \t]+(?P<colType>[a-zA-Z0-9\(\)]+)[ \t]+((NULL)|(NOT NULL))(?P<otherStr>.*))"""
tableContentList = re.findall(patternTable, content)
for tableContent in list(zip(*tableContentList))[0]:
sreMatch = re.match(patternTable, tableContent)
tableName = sreMatch.groupdict()["tableName"]
allFieldContent = sreMatch.groupdict(["allFieldContent"]
#allFieldContent = sreMatch.groupdict()["allFieldContent"]
# 如果"()"等符号丢失了一个的话,会报错出来很诡异的错误,此时应当意识到,可能丢失了配对符等.
# 此时,建议使用排除法,一点一点的注释掉代码,找到出错的代码块,然后仔细查找原因.
fieldContentList = re.findall(patternField, allFieldContent)
for fieldContent in list(zip(*fieldContentList))[0]:
sreMatch = re.match(patternField, fieldContent)
colName = sreMatch.groupdict()["colName"]
colType = sreMatch.groupdict()["colType"]
otherStr = sreMatch.groupdict()["otherStr"]
print(colName)
print(colType)
print(otherStr)
return None
if __name__ == "__main__":
sqlStr = """
DROP TABLE IF EXISTS nnnnnn_etf_list;
CREATE TABLE IF NOT EXISTS nnnnnn_etf_list(
date INTEGER NOT NULL,--日期
code VARCHAR(32) NOT NULL,--code
field1 VARCHAR(64) NOT NULL,--字段1
field2 VARCHAR(64) NULL,--字段2
field3 VARCHAR(64) NULL,--字段3
field4 VARCHAR(64) NULL,--字段4
field5 VARCHAR(64) NULL,--字段5
field6 VARCHAR(64) NULL,--字段6
PRIMARY KEY(date,code,field1)
);"""
obj = TestCls()
fileName = r"D:\sql.sql"
obj.ReadSqlFile(fileName, 'utf8')
sys.exit(0)
完。