1、开始、关闭libreoffice服务;
开始之前同步字体文件时间,是因为创建soffice服务时,服务会检查所需加载的文件的时间,如果其认为时间不符,则其可能会重新加载,耗时较长,因此需事先统一时间。
使用时如果需要多次调用,最后每次调用均开启后关闭,否则libreoffice会创建一个缓存文档并越用越大,处理时间会增加。
class OfficeProcess(object):
def __init__(self):
self.p = 0
subprocess.Popen('find /usr/share/fonts | xargs touch -m -t 201801010000.00', shell=True)
def start_office(self):
self.p = subprocess.Popen('soffice --pidfile=sof.pid --invisible --accept="socket,host=localhost,port=2002;urp;"', shell=True)
while True:
try:
local_context = uno.getComponentContext()
resolver = local_context.getServiceManager().createInstanceWithContext('com.sun.star.bridge.UnoUrlResolver', local_context)
resolver.resolve('uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext')
return
except:
print(ts(), "wait for connecting soffice...")
time.sleep(1)
continue
def stop_office(self):
with open("sof.pid", "rb") as f:
try:
os.kill(int(f.read()), signal.SIGTERM)
self.p.wait()
except:
pass
2、init service manager
local_context = uno.getComponentContext()
service_manager = local_context.getServiceManager()
resolver = service_manager.createInstanceWithContext('com.sun.star.bridge.UnoUrlResolver', local_context)
self.ctx = resolver.resolve('uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext')
self.smgr = self.ctx.ServiceManager
self.desktop = self.smgr.createInstanceWithContext('com.sun.star.frame.Desktop', self.ctx)
3、从二进制数据中读取doc文档
def ImportFromMemory(self, data):
istream = self.smgr.createInstanceWithContext('com.sun.star.io.SequenceInputStream', self.ctx)
istream.initialize((uno.ByteSequence(data), ))
pv = PropertyValue()
pv.Name = 'InputStream'
pv.Value = istream
self.doc = {'doc': []}
try:
self.document = self.desktop.loadComponentFromURL('private:stream/swriter', '_blank', 0, (pv, ))
self.text = self.document.getText()
except:
self.text = None
4、读取doc文档中的数据
def ExportToJson(self):
try:
l = self.__ParseText(self.text, self.__Callback(self.doc['doc']))
self.doc['length'] = l
except:
self.doc = {'doc': [], 'length': 0}
return json.dumps(self.doc)
@staticmethod
def __Callback(alist):
def Append(sth):
alist.append(sth)
return Append
def __ParseText(self, text, func):
l = 0
text_it = text.createEnumeration()
while text_it.hasMoreElements():
element = text_it.nextElement()
if element.supportsService('com.sun.star.text.Paragraph'):
l += self.__ParseParagraph(element, func)
elif element.supportsService('com.sun.star.text.TextTable'):
l += self.__ParseTable(element, func)
else:
pass
return l
def __ParseParagraph(self, paragraph, func):
p = {'paragraph': []}
l = 0
paragraph_it = paragraph.createEnumeration()
while paragraph_it.hasMoreElements():
portion = paragraph_it.nextElement()
if portion.TextPortionType == 'Text':
l += self.__ParsePortionText(portion, self.__Callback(p['paragraph']))
elif portion.TextPortionType == 'SoftPageBreak':
pass
elif portion.TextPortionType == 'TextField':
l += self.__ParsePortionText(portion, self.__Callback(p['paragraph']))
else:
l += self.__ParseTextContent(portion, self.__Callback(p['paragraph']))
if hasattr(paragraph, 'createContentEnumeration'):
l +=