Python之文本去重(最终版)

import re
readdir="C:\\Users\\user\\Desktop\\sql.txt"
#readdir为已写好的sql文件位置,后缀必须为txt
writedir1="C:\\Users\\user\\Desktop\\newsql.txt"
#write1dir1为去重后的sql文件位置
writedir2="C:\\Users\\user\\Desktop\\oldsql.txt"
#write1dir2为重复的sql文件位置
lines=[]
liness=[]
old_lines=[]
f1=open(readdir,'r')
f2=open(writedir1,'w')
f3=open(writedir2,'w')
for line1 in f1:
	lines.append(line1)
a=(''.join(lines))
s1=re.sub('select','SELECT',a)
s2=re.sub('from','FROM',s1)
s3=re.sub('insert into','INSERT INTO',s2)
s4=re.sub('values','VALUES',s3)
s5=re.sub('update','UPDATE',s4)
s6=re.sub('set','SET',s5)
s7=re.sub('where','WHERE',s6)
s9=re.sub('alter table','ALTER TABLE',s7)
s10=re.sub('varchar','VARCHAR',s9)
s11=re.sub('modify column','MODIFY COLUMN',s10)
s12=re.sub('create table','CREATE TABLE',s11)
s13=re.sub('primary key','PRIMARY KEY',s12)
s14=re.sub('add column','ADD COLUMN',s13)
s15=re.sub('tinyint null comment','TINYINT NULL COMMENT',s14)
s16=re.sub('bigint null comment','BIGINT NULL COMMENT',s15)
s17=re.sub('not null comment','NOT NULL COMMENT',s16)
s18=re.sub('int null comment','INT NULL COMMENT',s17)
s19=re.sub('datetime null comment','DATETIME NULL COMMENT',s18)
s20=re.sub('date null comment','DATE NULL COMMENT',s19)
s21=re.sub('null comment','NULL COMMENT',s20)
s22=re.sub('null','NULL',s21)
s23=re.sub('comment','COMMENT',s22)
s24=re.sub('decimal','DECIMAL',s23)
c=s24.split(' ')
cs = [one.strip() for one in c]
while '' in cs:
    cs.remove('')
while cs:
	aname=cs.pop()+' '
	old_lines.append(aname)
old_lines.reverse()
d=(''.join(old_lines))
b=d.split(';')
bs = [one.strip() for one in b]
while '' in bs:
    bs.remove('')
new_lines=[]
while bs:
	bname=bs.pop() + ';\n'
	new_lines.append(bname)
new_lines.reverse()
f1.close()
f1=open(readdir,'r')
for line2 in new_lines:
	a=re.search('USE `my`;',line2,re.IGNORECASE)
	b=re.search('USE `my_log`;',line2,re.IGNORECASE)
	c=re.search('USE `my_dzbl`;',line2,re.IGNORECASE)
	d=re.search('USE `my_forum`;',line2,re.IGNORECASE)
	e=re.search('USE `my_hds`;',line2,re.IGNORECASE)
	f=re.search('USE `my_mall`;',line2,re.IGNORECASE)
	g=re.search('USE `my_news`;',line2,re.IGNORECASE)
	if a or b or c or d or e or f or g:
		f2.writelines(line2)
	if line2 not in liness:
		f2.writelines(line2)
		liness.append(line2)
d={}
for line3 in new_lines:
    d[line3] = d.get(line3,0) + 1 
for k, v in d.items():
    if v > 1: 
        f3.writelines(k)
f1.close()
f2.close()
f3.close()

此脚本还有其他版本:
Python之文本去重(进阶版):https://blog.csdn.net/weixin_42840933/article/details/86605707
Python之文本去重(基础版):https://blog.csdn.net/weixin_42840933/article/details/86577903

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值