import re
readdir="C:\\Users\\user\\Desktop\\sql.txt"
#readdir为已写好的sql文件位置,后缀必须为txt
writedir1="C:\\Users\\user\\Desktop\\newsql.txt"
#write1dir1为去重后的sql文件位置
writedir2="C:\\Users\\user\\Desktop\\oldsql.txt"
#write1dir2为重复的sql文件位置
lines=[]
liness=[]
old_lines=[]
f1=open(readdir,'r')
f2=open(writedir1,'w')
f3=open(writedir2,'w')
for line1 in f1:
lines.append(line1)
a=(''.join(lines))
s1=re.sub('select','SELECT',a)
s2=re.sub('from','FROM',s1)
s3=re.sub('insert into','INSERT INTO',s2)
s4=re.sub('values','VALUES',s3)
s5=re.sub('update','UPDATE',s4)
s6=re.sub('set','SET',s5)
s7=re.sub('where','WHERE',s6)
s9=re.sub('alter table','ALTER TABLE',s7)
s10=re.sub('varchar','VARCHAR',s9)
s11=re.sub('modify column','MODIFY COLUMN',s10)
s12=re.sub('create table','CREATE TABLE',s11)
s13=re.sub('primary key','PRIMARY KEY',s12)
s14=re.sub('add column','ADD COLUMN',s13)
s15=re.sub('tinyint null comment','TINYINT NULL COMMENT',s14)
s16=re.sub('bigint null comment','BIGINT NULL COMMENT',s15)
s17=re.sub('not null comment','NOT NULL COMMENT',s16)
s18=re.sub('int null comment','INT NULL COMMENT',s17)
s19=re.sub('datetime null comment','DATETIME NULL COMMENT',s18)
s20=re.sub('date null comment','DATE NULL COMMENT',s19)
s21=re.sub('null comment','NULL COMMENT',s20)
s22=re.sub('null','NULL',s21)
s23=re.sub('comment','COMMENT',s22)
s24=re.sub('decimal','DECIMAL',s23)
c=s24.split(' ')
cs = [one.strip() for one in c]
while '' in cs:
cs.remove('')
while cs:
aname=cs.pop()+' '
old_lines.append(aname)
old_lines.reverse()
d=(''.join(old_lines))
b=d.split(';')
bs = [one.strip() for one in b]
while '' in bs:
bs.remove('')
new_lines=[]
while bs:
bname=bs.pop() + ';\n'
new_lines.append(bname)
new_lines.reverse()
f1.close()
f1=open(readdir,'r')
for line2 in new_lines:
a=re.search('USE `my`;',line2,re.IGNORECASE)
b=re.search('USE `my_log`;',line2,re.IGNORECASE)
c=re.search('USE `my_dzbl`;',line2,re.IGNORECASE)
d=re.search('USE `my_forum`;',line2,re.IGNORECASE)
e=re.search('USE `my_hds`;',line2,re.IGNORECASE)
f=re.search('USE `my_mall`;',line2,re.IGNORECASE)
g=re.search('USE `my_news`;',line2,re.IGNORECASE)
if a or b or c or d or e or f or g:
f2.writelines(line2)
if line2 not in liness:
f2.writelines(line2)
liness.append(line2)
d={}
for line3 in new_lines:
d[line3] = d.get(line3,0) + 1
for k, v in d.items():
if v > 1:
f3.writelines(k)
f1.close()
f2.close()
f3.close()
此脚本还有其他版本:
Python之文本去重(进阶版):https://blog.csdn.net/weixin_42840933/article/details/86605707
Python之文本去重(基础版):https://blog.csdn.net/weixin_42840933/article/details/86577903