由于实验需要,写了个脚本用来替换xml部分标签的内容。此处浅记录下
一、处理的文档格式:
以下是xml的格式:
此处将xPosition
、yPosition
等标签对里面的内容用新的数值来替代。
用下面文档的PL87和VL87后面的内容做为替换的新内容:
二、代码
# !/ProgramData/Anaconda3
# -*- coding:utf-8 -*-
# Author:xx呀
# @Time :2022/7/15 19:51
# -*- coding:utf-8 -*-
from datetime import datetime
import os
import sys
import os.path #批量修改Xml文件中的属性值
import xml.dom.minidom
import re
import time
import pandas as pd
import numpy as np
data = []
date = []
row = []
TimeStamp = []
number = []
def getxmldata(path,xmlFile):
c = 0
dom = xml.dom.minidom.parse(os.path.join(path, xmlFile))
root = dom.documentElement
GPSParam = root.getElementsByTagName('GPSParam')
for ele in GPSParam:
# 获取标签对
TimeStamp1 = ele.getElementsByTagName('TimeStamp')[0].firstChild.data
TimeStamp.append(TimeStamp1)
c += 1
return TimeStamp
def getGFBdata(path_GFB, GFBfile, TimeStamp):
n = len(TimeStamp)
print('GPS_num',n)
# 将日期转换为数字串
start_day = re.findall(r"\d+",TimeStamp[0])
start_day.reverse() #将列表反转下,使得列表可以从倒序遍历
end_day = re.findall(r"\d+",TimeStamp[n-1])
end_day.reverse()
start_index = 0
end_index = 0
# 读取数据文件
with open(os.path.join(path_GFB, GFBfile), 'r') as f:
for num, line in enumerate(f): #找出要替换内容的区间
number = re.findall(r"\d+", line)
number.reverse()
t_start = 0
t_start1 = 0
t_end = 0
t_end1 = 0
flag =0
flag1 = 0
if len(number) == len(start_day) == len(end_day):
for i, j in zip(number, start_day): # 比较是否是开始同一天的日期,从前往后遍历
if float(i) == float(j) and flag == 0:
t_start += 1 # 统计列表里元素相等个数
else:
if float(j) == 999999: #由于判断日期的办法比较笨,且有些日期精确到秒后,小数点后位数不是0,而提取新内容文档里的为0,不好比较,故加个判断
flag = 1
continue
else:
if float(i) == float(j) and flag ==1:
t_start1 += 1 # 统计列表里元素相等个数
else:
break
if t_start1 == 6:
start_index = num + 3 # 下一行即为开始日期
print('start_index:', start_index)
if t_start == 7:
start_index = num # 下一行即为开始日期
print('start_index', start_index)
for i, j in zip(number,end_day):
if float(i) == float(j) and flag1 == 0:
t_end += 1 # 统计列表里元素相等个数
else:
if float(j) == 999999:
flag1 = 1
continue
else:
if float(i) == float(j) and flag ==1:
t_end1 += 1 # 统计列表里元素相等个数
else:
break
if t_end1 == 6:
end_index = num + 4 # 结束日期再往后推下
print('end_index', end_index)
if t_end == 7:
end_index = num + 1 # 结束日期再往后推下
print('end_index', end_index)
return start_index,end_index
def getnewdata(path_GFB, GFBfile, start_index,end_index):
P1 = []
V1 = []
with open(os.path.join(path_GFB, GFBfile), 'r') as f:
for num, line in enumerate(f): #找出要替换内容的区间
if start_index <= num <= end_index + 1:
#提取数值
index_P_start = line.find("PL87")
index_V_start = line.find("VL87")
index_end = line.find("\n")
text_P = line[index_P_start:index_end]
text_V = line[index_V_start:index_end]
if text_P != '':
P = str(line[index_P_start + 5:index_end] + '\n').split()
P1.append(P)
if text_V != '':
V = str(line[index_V_start + 5:index_end] + '\n').split()
V1.append(V)
return P1,V1
def replacedata(path,new_path, xmlFile, a_P,a_V):
xPosition_new = []
yPosition_new = []
zPosition_new = []
xVelocity_new = []
yVelocity_new = []
zVelocity_new = []
#换算单位
for i in list(np.array(a_P).T[0]):
xPosition_new.append(float(i) * 1000) #将公里单位换算成米
# print(xPosition_new)
for i in list(np.array(a_P).T[1]):
yPosition_new.append(float(i) * 1000) #将公里单位换算成米
for i in list(np.array(a_P).T[2]):
zPosition_new.append(float(i) * 1000) #将公里单位换算成米
for i in list(np.array(a_V).T[0]):
xVelocity_new.append(float(i) * 0.1) #将分米/秒换算成米/秒
for i in list(np.array(a_V).T[1]):
yVelocity_new.append(float(i) * 0.1) #将分米/秒换算成米/秒
for i in list(np.array(a_V).T[2]):
zVelocity_new.append(float(i) * 0.1) #将分米/秒换算成米/秒
dom = xml.dom.minidom.parse(os.path.join(path, xmlFile))
root = dom.documentElement
xPosition = root.getElementsByTagName('xPosition') # 标签列表
yPosition = root.getElementsByTagName('yPosition')
zPosition = root.getElementsByTagName('zPosition')
xVelocity = root.getElementsByTagName('xVelocity')
yVelocity = root.getElementsByTagName('yVelocity')
zVelocity = root.getElementsByTagName('zVelocity')
# 判断
# 修改属性值
# xPosition
for j in range(len(xPosition)):
if xPosition[j].firstChild.data:
xPosition[j].firstChild.data = xPosition_new[j]
# print(xPosition[j].firstChild.data)
# yPosition
for u in range(len(yPosition)):
if yPosition[u].firstChild.data:
yPosition[u].firstChild.data = yPosition_new[u]
# print(yPosition[u].firstChild.data)
# zPosition
for v in range(len(zPosition)):
if zPosition[v].firstChild.data:
zPosition[v].firstChild.data = zPosition_new[v]
# print(zPosition[v].firstChild.data)
# xVelocity
for s in range(len(xVelocity)):
if xVelocity[s].firstChild.data:
xVelocity[s].firstChild.data = xVelocity_new[s]
# print(xVelocity[s].firstChild.data)
# yVelocity
for s1 in range(len(yVelocity)):
if yVelocity[s1].firstChild.data:
yVelocity[s1].firstChild.data = yVelocity_new[s1]
# print(yVelocity[s1].firstChild.data)
# zVelocity
for s2 in range(len(zVelocity)):
if zVelocity[s2].firstChild.data:
zVelocity[s2].firstChild.data = zVelocity_new[s2]
# print(zVelocity[s2].firstChild.data)
# 保存修改到xml文件中
with open(os.path.join(new_path, xmlFile), 'w') as wn:
dom.writexml(wn)
print("修改完成!")
if __name__ == '__main__':
file = sys.argv[1] # 通过cmd传参,xml文件输入
GFBfile = sys.argv[2] # GFB文件输入
new_path = sys.argv[3] # 参数输出路径
print('xmlfile:', file)
print('GFBfile', GFBfile)
TimeStamp = getxmldata(file)
start_index, end_index = getGFBdata(GFBfile, TimeStamp)
a_P, a_V = getnewdata(GFBfile, start_index, end_index)
replacedata(file, new_path, a_P, a_V)