from DB import MSSQLDB
from openpyxl import load_workbook
import fnmatch, os, sys, win32com.client
import re
txtpath=r'C:\360高速下载' #绝对路径
count=0 #记录处理的数据行数
txtcount=0#记录doc或docx文件复制为txt文件的文件名
wordapp = win32com.client.Dispatch("Word.Application")
db=MSSQLDB()
resList=db.ExecQuery(" select name from lq_testdoc ")
try:
for r in resList:
filename=r[0]
if os.path.exists(filename):
if fnmatch.fnmatch(filename, '*.txt'):
## txtdeal = os.path.abspath(os.path.join(path,filename))
picnumstr='0'
f=open(filename,'r')
strlines=f.readlines()
string="".join(list(strlines))
f.close()
string=string.replace("'"," ")
db.ExecNonQuery("insert into lq_data(filename,filecontent,flag) values ('"+filename+"','"+string+"','"+picnumstr+"')" );
count=count+1
print('Succeed--第'+str(count)+'行数据')
elif fnmatch.fnmatch(filename, '*.docx') or fnmatch.fnmatch(filename, '*.doc'):
## doc = os.path.abspath(os.path.join(path,filename))
docc = wordapp.Documents.Open(filename)
picnum=docc.InlineShapes.Count
docc.Close()
if picnum!=0:
picnum=1
## if fnmatch.fnmatch(filename, '*.doc'):
## docastext = filename[:-3] + 'txt'
## else:
## docastext = filename[:-4] + 'txt'
txtcount=txtcount+1
txtname=str(txtcount)+'.txt'
docastext=os.path.abspath(os.path.join(txtpath,txtname))
wordapp.Documents.Open(filename)
wordapp.ActiveDocument.SaveAs(docastext,FileFormat=win32com.client.constants.wdFormatText)
wordapp.ActiveDocument.Close()
picnumstr=str(picnum)
f=open(docastext,'r')
strlines=f.readlines()
string="".join(list(strlines))
f.close()
string=string.replace("'"," ")
db.ExecNonQuery("insert into lq_data(filename,filecontent,flag) values ('"+filename+"','"+string+"','"+picnumstr+"')" );
## os.remove(docastext)
count=count+1
print('Succeed--第'+str(count)+'行数据')
else:
db.ExecNonQuery("insert into lq_data(filename,filecontent) values ("+
"'"+filename+"',"+
"'该文件不是{docx,doc,txt}格式的文件')");
count=count+1
print('Succeed--第'+str(count)+'行数据')
continue
else:
db.ExecNonQuery("insert into lq_data(filename,filecontent) values ("+
"'"+filename+"',"+
"'给出的绝对路径下该文件不存在')");
count=count+1
print('Succeed--第'+str(count)+'行数据')
continue
finally:
wordapp.Quit()
print ('end')