安装Python包
这次的文件格式解析格外的轻松
pip install vsdx
上代码
from vsdx import VisioFile
import os
import json
class vsdx_analysis:
def __init__(self, filepath,format=None):
self.filepath = filepath
self.format = format
def vsdx2data(self):
data_list = []
# 打开Visio文件
with VisioFile(self.filepath) as vis:
# 遍历每个页面
if self.format == "vsdx_text":
return {"TEXT":self.remove_duplicates([shape.text for page in vis.pages for shape in page.child_shapes if shape.text])}
else:
for page in vis.pages:
data = dict()
# print(f"page.name: {page.name}")
data["page.name"] = page.name
data["page.child_shapes"] = dict()
# 遍历每个形状
for shape in page.child_shapes:
data["page.child_shapes"][shape.ID] = shapes_dict = dict()
# print(f"Shape ID: {shape.ID},Shape name: {shape.shape_name}, Shape text: {shape.text}")
shapes_dict["shape_name"] = shape.shape_name # 名称
shapes_dict["text"] = shape.text # 文本
shapes_dict["shape_type"] = shape.shape_type # 获取Shape的类型,如"Group"、"Shape"等
shapes_dict["x"] = shape.x # 获取Shape的坐标位置
shapes_dict["y"] = shape.y
shapes_dict["width"] = shape.width # 获取Shape的宽高
shapes_dict["height"] = shape.height
shapes_dict["angle"] = shape.angle # 获取Shape的旋转角度
shapes_dict["line_weight"] = shape.line_weight # 获取Shape的线条粗细
shapes_dict["line_color"] = shape.line_color # 获取Shape的线条颜色
shapes_dict["fill_color"] = shape.fill_color # 获取Shape的填充颜色
shapes_dict["text_color"] = shape.text_color # 获取Shape的文本颜色
shapes_dict["end_arrow"] = shape.end_arrow # 获取或设置线条末端的箭头样式
shapes_dict["child_shapes"] = shape.child_shapes # 获取此Shape包含的子Shape列表
shapes_dict["all_shapes"] = shape.all_shapes # 递归获取此Shape及其子级包含的所有Shape
shapes_dict["connects"] = shape.connects # 获取与此Shape相连的Connect对象列表
props = shape._data_properties # 获取Shape的自定义数据属性字典
if props:
props_dict = dict()
for prop, value in props.items():
print(f"{prop}: {value}")
props_dict[prop] = value
data["page.child_shapes"]["_data_properties"] = props_dict
data_list.append(data)
return data_list
def remove_duplicates(self,lst):
res = []
seen = {}
for i in lst:
if i not in seen:
seen[i] = 1
res.append(i)
return res
# 示例调用
if __name__ == "__main__":
# DWG文件路径
vsdx_path = "/home/hyh/data/Maintenance_test_data/DOH.vsdx"
format = "vsdx_text"
vsdx = vsdx_analysis(vsdx_path,format)
vsdx_txt = vsdx.vsdx2data()
output_path = os.path.join(os.path.dirname(vsdx_path), os.path.basename(vsdx_path).split(".")[0] + "_" +format+ ".json")
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(vsdx_txt, f, ensure_ascii=False, indent=4)