群晖在线办公表格文件转成excel
群晖共享excel表格为osheet格式,同步至本地电脑后本地电脑无法用office或者wps等办公软件处理。
直接用记事本打开osheet文件,发现重要信息均为文本存储格式,可以做解析转存为osheet文件。
osheet文件示例:
x schema enc id # 1027_MQNCOFQNN90GT2AGIHOC62TSI0.sh ver ) 8948c4e945f51ccbb1165804d6dff3162e9bc290 text/comment [] text/define {} ? text/index ? {"gcVer":1,"maxIndex":2,"order":["sh_1","sh_2"],"schemaVersion":5,"sheets":{"sh_1":{"deleted":false,"title":"宸ヤ綔琛?"},"sh_2":{"deleted":false,"title":"宸ヤ綔琛?"}},"ver":3} text/locale "zh_CN" % text/sh_1 {"cfs":[],"colCount":30,"rowCount":100,"rows":{},"cols":{},"global":null,"drawing":[],"filter":{},"fixedColumnLeft":0,"fixedRowTop":0,"mergeCells":[],"validation":{},"hyperlinks":[],"cells":{"0":{"0":{"v":"test"},"1":{"v":"test"}},"1":{"0":{"v":"key"},"1":{"v":"value"}}}} h text/sh_2 T {"cfs":[],"colCount":30,"rowCount":100,"rows":{"1":{"hidden":true},"2":{"hidden":true}},"cols":{},"global":null,"drawing":[],"filter":{"id":"1e4d4eb6e85defeb","range":[0,0,6,1],"enabled":true,"filters":[null,{"type":"value","value":["2"]}]},"fixedColumnLeft":0,"fixedRowTop":0,"mergeCells":[],"validation":{},"hyperlinks":[],"cells":{"0":{"0":{"v":"娴嬭瘯"},"1":{"v":"娴嬭瘯"}},"1":{"0":{"v":"test1"},"1":{"v":1}},"2":{"0":{"v":"test1"},"1":{"v":1}},"3":{"0":{"v":"test1"},"1":{"v":2}},"4":{"0":{"v":"test2"},"1":{"v":2}},"5":{"0":{"v":"test2"},"1":{"v":2}},"6":{"0":{"v":"test2"},"1":{"v":2}}}} i text/style T {"styles":[],"fonts":[],"fills":[],"borders":[],"alignments":[],"numberFormats":[]} metatext
{"ntype":37} end
文件有许多未知字符,所以需要用二进制编码读取
def load_file(file_path='test.osheet'):
with open(file_path, 'rb') as f:
content = b''.join(f.readlines())
return content
重要内容均为json结构,所以根据大括号进行数据抽取,其中左大括号二进制编码为123,右大括号为125
def split_str(content):
left_count = 0
content_list = []
for i in range(0, len(content)):
if left_count == 0:
content_list.append(b'')
if content[i] == 123:
left_count += 1
elif content[i] == 125:
left_count -= 1
content_list[-1] += content[i].to_bytes(1, byteorder='little', signed=False)
return_list = []
for i in range(0, len(content_list)):
for j in range(0, len(content_list[i])):
if content_list[i][j] == 123:
return_list.append(content_list[i].decode('utf-8', 'replace'))
break
return return_list
经过抽取后生成的列表如下
[
{},
{'gcVer': 1, 'maxIndex': 2, 'order': ['sh_1', 'sh_2'], 'schemaVersion': 5, 'sheets': {'sh_1': {'deleted': False, 'title': '工作表1'}, 'sh_2': {'deleted': False, 'title': '工作表2'}}, 'ver': 3},
{'cfs': [], 'colCount': 30, 'rowCount': 100, 'rows': {}, 'cols': {}, 'global': None, 'drawing': [], 'filter': {}, 'fixedColumnLeft': 0, 'fixedRowTop': 0, 'mergeCells': [], 'validation': {}, 'hyperlinks': [], 'cells': {'0': {'0': {'v': 'test'}, '1': {'v': 'test'}}, '1': {'0': {'v': 'key'}, '1': {'v': 'value'}}}},
{'cfs': [], 'colCount': 30, 'rowCount': 100, 'rows': {'1': {'hidden': True}, '2': {'hidden': True}}, 'cols': {}, 'global': None, 'drawing': [], 'filter': {'id': '1e4d4eb6e85defeb', 'range': [0, 0, 6, 1], 'enabled': True, 'filters': [None, {'type': 'value', 'value': ['2']}]}, 'fixedColumnLeft': 0, 'fixedRowTop': 0, 'mergeCells': [], 'validation': {}, 'hyperlinks': [], 'cells': {'0': {'0': {'v': '测试'}, '1': {'v': '测试'}}, '1': {'0': {'v': 'test1'}, '1': {'v': 1}}, '2': {'0': {'v': 'test1'}, '1': {'v': 1}}, '3': {'0': {'v': 'test1'}, '1': {'v': 2}}, '4': {'0': {'v': 'test2'}, '1': {'v': 2}}, '5': {'0': {'v': 'test2'}, '1': {'v': 2}}, '6': {'0': {'v': 'test2'}, '1': {'v': 2}}}},
{'styles': [], 'fonts': [], 'fills': [], 'borders': [], 'alignments': [], 'numberFormats': []},
{'ntype': 37}
]
经过抽取后的字符串还是比较清晰,列表第二个元素为表格的sheet名称,第三和第四个元素为两张sheet的具体信息,接下来用xlsxwriter写入新的excel即可
if __name__ == '__main__':
wb = xlsxwriter.Workbook('test.xlsx')
sheets = []
content = split_str(load_file())
for _content in content:
_content = json.loads(_content)
if 'sheets' in _content:
for key, value in _content['sheets'].items():
sheets.append(value['title'])
sheet_num = 0
for _content in content:
_content = json.loads(_content)
if 'cells' in _content:
sh = wb.add_worksheet(sheets[sheet_num])
for row, row_value in _content['cells'].items():
for col, value in row_value.items():
sh.write(int(row), int(col), value['v'])
sheet_num += 1
wb.close()