Python解析.vsdx格式文件信息提取

安装Python包

这次的文件格式解析格外的轻松

pip install vsdx

上代码

from vsdx import VisioFile
import os
import json

class vsdx_analysis:
    def __init__(self, filepath,format=None):
        self.filepath = filepath
        self.format = format

    def vsdx2data(self):
        data_list = []
        # 打开Visio文件
        with VisioFile(self.filepath) as vis:
            # 遍历每个页面
            if self.format == "vsdx_text":
                return {"TEXT":self.remove_duplicates([shape.text for page in vis.pages for shape in page.child_shapes if shape.text])}

            else:
                for page in vis.pages:
                    data = dict()
                    # print(f"page.name: {page.name}")
                    data["page.name"] = page.name
                    data["page.child_shapes"] = dict()
                    
                    # 遍历每个形状
                    for shape in page.child_shapes:
                        data["page.child_shapes"][shape.ID] = shapes_dict = dict()
                        # print(f"Shape ID: {shape.ID},Shape name: {shape.shape_name}, Shape text: {shape.text}")
                        shapes_dict["shape_name"] = shape.shape_name # 名称
                        shapes_dict["text"] = shape.text # 文本
                        shapes_dict["shape_type"] = shape.shape_type # 获取Shape的类型,如"Group"、"Shape"等
                        shapes_dict["x"] = shape.x # 获取Shape的坐标位置
                        shapes_dict["y"] = shape.y 
                        shapes_dict["width"] = shape.width # 获取Shape的宽高
                        shapes_dict["height"] = shape.height
                        shapes_dict["angle"] = shape.angle # 获取Shape的旋转角度
                        shapes_dict["line_weight"] = shape.line_weight # 获取Shape的线条粗细
                        shapes_dict["line_color"] = shape.line_color # 获取Shape的线条颜色
                        shapes_dict["fill_color"] = shape.fill_color # 获取Shape的填充颜色
                        shapes_dict["text_color"] = shape.text_color # 获取Shape的文本颜色
                        shapes_dict["end_arrow"] = shape.end_arrow # 获取或设置线条末端的箭头样式
                        shapes_dict["child_shapes"] = shape.child_shapes # 获取此Shape包含的子Shape列表
                        shapes_dict["all_shapes"] = shape.all_shapes # 递归获取此Shape及其子级包含的所有Shape
                        shapes_dict["connects"] = shape.connects # 获取与此Shape相连的Connect对象列表
                        props = shape._data_properties # 获取Shape的自定义数据属性字典
                        if props:
                            props_dict = dict()
                            for prop, value in props.items():
                                print(f"{prop}: {value}")
                                props_dict[prop] = value
                            data["page.child_shapes"]["_data_properties"] = props_dict
                        
                    data_list.append(data)
            return data_list

    def remove_duplicates(self,lst):
        res = []
        seen = {}
        for i in lst:
            if i not in seen:
                seen[i] = 1
                res.append(i)
        return res

# 示例调用
if __name__ == "__main__":
    # DWG文件路径
    vsdx_path = "/home/hyh/data/Maintenance_test_data/DOH.vsdx"
    format = "vsdx_text"
    vsdx = vsdx_analysis(vsdx_path,format)
    vsdx_txt = vsdx.vsdx2data()
    output_path = os.path.join(os.path.dirname(vsdx_path), os.path.basename(vsdx_path).split(".")[0] + "_" +format+ ".json")
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(vsdx_txt, f, ensure_ascii=False, indent=4)
  • 3
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值