libreoffice python_libreoffice python 操作word及excel文档

这段代码展示了如何使用python的libreoffice库解析和操作Word及Excel文档中的段落、文本内容、图片、表格等元素。通过遍历文档的不同部分,如段落、文本内容、图形对象和表格,实现对每个元素的详细解析,并进行相应的处理。
摘要由CSDN通过智能技术生成

def __ParseParagraph(self, paragraph, func):

p= {'paragraph': []}

l=0

paragraph_it=paragraph.createEnumeration()whileparagraph_it.hasMoreElements():

portion=paragraph_it.nextElement()if portion.TextPortionType == 'Text':

l+= self.__ParsePortionText(portion, self.__Callback(p['paragraph']))elif portion.TextPortionType == 'SoftPageBreak':pass

elif portion.TextPortionType == 'TextField':

l+= self.__ParsePortionText(portion, self.__Callback(p['paragraph']))else:

l+= self.__ParseTextContent(portion, self.__Callback(p['paragraph']))if hasattr(paragraph, 'createContentEnumeration'):

l+= self.__ParseTextContent(paragraph, self.__Callback(p['paragraph']))

p['length'] =l

func(p)returnldef __ParseTextContent(self, textcontent, func):

l=0

content_it= textcontent.createContentEnumeration('com.sun.star.text.TextContent')whilecontent_it.hasMoreElements():

element=content_it.nextElement()if element.supportsService('com.sun.star.text.TextGraphicObject'):

l+= self.__ParsePortionGraphic(element, func)elif element.supportsService('com.sun.star.text.TextEmbeddedObject'):pass

elif element.supportsService('com.sun.star.text.TextFrame'):

l+= self.__ParseFrame(element, func)elif element.supportsService('com.sun.star.drawing.GroupShape'):

l+= self.__ParseGroup(element, func)else:pass

returnldef __ParseFrame(self, frame, func):

f= {'frame': []}

l= self.__ParseText(frame.getText(), self.__Callback(f['frame']))

f['length'] =l

func(f)returnldef __ParseGroup(self, group, func):

l=0for i inrange(group.getCount()):

it=group.getByIndex(i)if it.supportsService('com.sun.star.drawing.Text'):

l+= self.__ParseFrame(it, func)else:pass

returnldef __ParsePortionText(self, portion_text, func):

func({'portion': portion_text.String, 'length': len(portion_text.String)})returnlen(portion_text.String)def __ParsePortionGraphic(self, portion_graphic, func):

gp= self.smgr.createInstanceWithContext('com.sun.star.graphic.GraphicProvider', self.ctx)

stream= self.smgr.createInstanceWithContext('com.sun.star.io.TempFile', self.ctx)

pv1=PropertyValue()

pv1.Name= 'OutputStream'pv1.Value=stream

pv2=PropertyValue()

pv2.Name= 'MimeType'pv2.Value= 'image/png'gp.storeGraphic(portion_graphic.Graphic, (pv1, pv2))

stream.getOutputStream().flush()

stream.seek(0)

l=stream.getInputStream().available()

b= uno.ByteSequence(b'')

stream.seek(0)

l, b=stream.getInputStream().readBytes(b, l)

img= {'image': base64.b64encode(b.value).decode('ascii')}

img['height'] =portion_graphic.Height

img['width'] =portion_graphic.Width

img['actualheight'] =portion_graphic.ActualSize.Height

img['actualwidth'] =portion_graphic.ActualSize.Width

img['croptop'] =portion_graphic.GraphicCrop.Top

img['cropbottom'] =portion_graphic.GraphicCrop.Bottom

img['cropleft'] =portion_graphic.GraphicCrop.Left

img['cropright'] =portion_graphic.GraphicCrop.Right

img['length'] =0

func(img)return0def __ParseTable(self, table, func):

l=0try:

matrix= self.__GetTableMatrix(table)

seps= self.__GetTableSeparators(table)

t={}

count=0for ri inmatrix.keys():

t[ri]={}for ci inmatrix[ri].keys():

t[ri][ci]=dict(matrix[ri][ci])del t[ri][ci]['cell']

t[ri][ci]['content'] =[]

l+= self.__ParseText(matrix[ri][ci]['cell'], self.__Callback(t[ri][ci]['content']))

count+= t[ri][ci]['rowspan'] * t[ri][ci]['colspan']if count != len(t) *len(seps):raise ValueError('count of cells error')

func({'table': t, 'row': len(t), 'column': len(seps), 'length': l, 'tableid': self.table_id})

self.table_id+= 1

except:

l=0print('discard wrong table')returnl

@staticmethoddef __GetTableSeparators(table):

result=[table.TableColumnRelativeSum]for ri inrange(table.getRows().getCount()):

result+= [s.Position for s intable.getRows().getByIndex(ri).TableColumnSeparators]

result=sorted(set(result))for i in range(len(result) - 1):

result[i]+= 1 if result[i] + 1 == result[i + 1] else0returnsorted(set(result))

@staticmethoddef __NameToRC(name):

r= int(re.sub('[A-Za-z]', '', name)) - 1cstr= re.sub('[0-9]', '', name)

c=0for i inrange(len(cstr)):if cstr[i] >= 'A' and cstr[i] <= 'Z':

c= c * 52 + ord(cstr[i]) - ord('A')else:

c= c * 52 + 26 + ord(cstr[i]) - ord('a')returnr, c

@staticmethoddef __GetTableMatrix(table):

result={}for name intable.getCellNames():

ri, ci= WordToJson.__NameToRC(name)

cell=table.getCellByName(name)if ri not inresult:

result[ri]={}

result[ri][ci]= {'cell': cell, 'rowspan': cell.RowSpan, 'name': name}

seps= WordToJson.__GetTableSeparators(table)for ri inresult.keys():

sep= [s.Position for s in table.getRows().getByIndex(ri).TableColumnSeparators] +[table.TableColumnRelativeSum]

sep=sorted(set(sep))for ci inresult[ri].keys():

right= seps.index(sep[ci]) if sep[ci] in seps else seps.index(sep[ci] + 1)

left= -1 if ci == 0 else seps.index(sep[ci - 1]) if sep[ci - 1] in seps else seps.index(sep[ci - 1] + 1)

result[ri][ci]['colspan'] = right -leftreturn result

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值