def __ParseParagraph(self, paragraph, func):
p= {'paragraph': []}
l=0
paragraph_it=paragraph.createEnumeration()whileparagraph_it.hasMoreElements():
portion=paragraph_it.nextElement()if portion.TextPortionType == 'Text':
l+= self.__ParsePortionText(portion, self.__Callback(p['paragraph']))elif portion.TextPortionType == 'SoftPageBreak':pass
elif portion.TextPortionType == 'TextField':
l+= self.__ParsePortionText(portion, self.__Callback(p['paragraph']))else:
l+= self.__ParseTextContent(portion, self.__Callback(p['paragraph']))if hasattr(paragraph, 'createContentEnumeration'):
l+= self.__ParseTextContent(paragraph, self.__Callback(p['paragraph']))
p['length'] =l
func(p)returnldef __ParseTextContent(self, textcontent, func):
l=0
content_it= textcontent.createContentEnumeration('com.sun.star.text.TextContent')whilecontent_it.hasMoreElements():
element=content_it.nextElement()if element.supportsService('com.sun.star.text.TextGraphicObject'):
l+= self.__ParsePortionGraphic(element, func)elif element.supportsService('com.sun.star.text.TextEmbeddedObject'):pass
elif element.supportsService('com.sun.star.text.TextFrame'):
l+= self.__ParseFrame(element, func)elif element.supportsService('com.sun.star.drawing.GroupShape'):
l+= self.__ParseGroup(element, func)else:pass
returnldef __ParseFrame(self, frame, func):
f= {'frame': []}
l= self.__ParseText(frame.getText(), self.__Callback(f['frame']))
f['length'] =l
func(f)returnldef __ParseGroup(self, group, func):
l=0for i inrange(group.getCount()):
it=group.getByIndex(i)if it.supportsService('com.sun.star.drawing.Text'):
l+= self.__ParseFrame(it, func)else:pass
returnldef __ParsePortionText(self, portion_text, func):
func({'portion': portion_text.String, 'length': len(portion_text.String)})returnlen(portion_text.String)def __ParsePortionGraphic(self, portion_graphic, func):
gp= self.smgr.createInstanceWithContext('com.sun.star.graphic.GraphicProvider', self.ctx)
stream= self.smgr.createInstanceWithContext('com.sun.star.io.TempFile', self.ctx)
pv1=PropertyValue()
pv1.Name= 'OutputStream'pv1.Value=stream
pv2=PropertyValue()
pv2.Name= 'MimeType'pv2.Value= 'image/png'gp.storeGraphic(portion_graphic.Graphic, (pv1, pv2))
stream.getOutputStream().flush()
stream.seek(0)
l=stream.getInputStream().available()
b= uno.ByteSequence(b'')
stream.seek(0)
l, b=stream.getInputStream().readBytes(b, l)
img= {'image': base64.b64encode(b.value).decode('ascii')}
img['height'] =portion_graphic.Height
img['width'] =portion_graphic.Width
img['actualheight'] =portion_graphic.ActualSize.Height
img['actualwidth'] =portion_graphic.ActualSize.Width
img['croptop'] =portion_graphic.GraphicCrop.Top
img['cropbottom'] =portion_graphic.GraphicCrop.Bottom
img['cropleft'] =portion_graphic.GraphicCrop.Left
img['cropright'] =portion_graphic.GraphicCrop.Right
img['length'] =0
func(img)return0def __ParseTable(self, table, func):
l=0try:
matrix= self.__GetTableMatrix(table)
seps= self.__GetTableSeparators(table)
t={}
count=0for ri inmatrix.keys():
t[ri]={}for ci inmatrix[ri].keys():
t[ri][ci]=dict(matrix[ri][ci])del t[ri][ci]['cell']
t[ri][ci]['content'] =[]
l+= self.__ParseText(matrix[ri][ci]['cell'], self.__Callback(t[ri][ci]['content']))
count+= t[ri][ci]['rowspan'] * t[ri][ci]['colspan']if count != len(t) *len(seps):raise ValueError('count of cells error')
func({'table': t, 'row': len(t), 'column': len(seps), 'length': l, 'tableid': self.table_id})
self.table_id+= 1
except:
l=0print('discard wrong table')returnl
@staticmethoddef __GetTableSeparators(table):
result=[table.TableColumnRelativeSum]for ri inrange(table.getRows().getCount()):
result+= [s.Position for s intable.getRows().getByIndex(ri).TableColumnSeparators]
result=sorted(set(result))for i in range(len(result) - 1):
result[i]+= 1 if result[i] + 1 == result[i + 1] else0returnsorted(set(result))
@staticmethoddef __NameToRC(name):
r= int(re.sub('[A-Za-z]', '', name)) - 1cstr= re.sub('[0-9]', '', name)
c=0for i inrange(len(cstr)):if cstr[i] >= 'A' and cstr[i] <= 'Z':
c= c * 52 + ord(cstr[i]) - ord('A')else:
c= c * 52 + 26 + ord(cstr[i]) - ord('a')returnr, c
@staticmethoddef __GetTableMatrix(table):
result={}for name intable.getCellNames():
ri, ci= WordToJson.__NameToRC(name)
cell=table.getCellByName(name)if ri not inresult:
result[ri]={}
result[ri][ci]= {'cell': cell, 'rowspan': cell.RowSpan, 'name': name}
seps= WordToJson.__GetTableSeparators(table)for ri inresult.keys():
sep= [s.Position for s in table.getRows().getByIndex(ri).TableColumnSeparators] +[table.TableColumnRelativeSum]
sep=sorted(set(sep))for ci inresult[ri].keys():
right= seps.index(sep[ci]) if sep[ci] in seps else seps.index(sep[ci] + 1)
left= -1 if ci == 0 else seps.index(sep[ci - 1]) if sep[ci - 1] in seps else seps.index(sep[ci - 1] + 1)
result[ri][ci]['colspan'] = right -leftreturn result