- 我们分两步,第一步利用官网提供的解析函数进行解析,官网直接粘贴复制即可,稍微浏览一遍。
function theStruct = parseXML(filename)
% PARSEXML Convert XML file to a MATLAB structure.
try
tree = xmlread(filename);
catch
error('Failed to read XML file %s.',filename);
end
% Recurse over child nodes. This could run into problems
% with very deeply nested trees.
try
theStruct = parseChildNodes(tree);
catch
error('Unable to parse XML file %s.',filename);
end
% ----- Local function PARSECHILDNODES -----
function children = parseChildNodes(theNode)
% Recurse over node children.
children = [];
removeIndentNodes( theNode ); %%%%<remove #text>%%官网没有这句话%%%%%%%%%%%%%%%%%
if theNode.hasChildNodes
childNodes = theNode.getChildNodes;
numChildNodes = childNodes.getLength;
allocCell = cell(1, numChildNodes);
children = struct( 'Name', allocCell, 'Attributes', allocCell, ...
'Data', allocCell, 'Children', allocCell);
for count = 1:numChildNodes
theChild = childNodes.item(count-1);
children(count) = makeStructFromNode(theChild);
end
end
% ----- Local function MAKESTRUCTFROMNODE -----
function nodeStruct = makeStructFromNode(theNode)
% Create structure of node info.
nodeStruct = struct('Name', char(theNode.getNodeName), ...
'Attributes', parseAttributes(theNode), ...
'Data', '', ...
'Children', parseChildNodes(theNode));
if any(strcmp(methods(theNode), 'getData'))
nodeStruct.Data = char(theNode.getData);
else
nodeStruct.Data = '';
end
% ----- Local function PARSEATTRIBUTES -----
function attributes = parseAttributes(theNode)
% Create attributes structure.
attributes = [];
if theNode.hasAttributes
theAttributes = theNode.getAttributes;
numAttributes = theAttributes.getLength;
allocCell = cell(1, numAttributes);
attributes = struct('Name', allocCell, 'Value', allocCell);
for count = 1:numAttributes
attrib = theAttributes.item(count-1);
attributes(count).Name = char(attrib.getName);
attributes(count).Value = char(attrib.getValue);
end
end
- 接下来我们进行第二步,就是去除#text,首先了解一下#text 产生的原因,DOM树包含了被称为“不可忽略的空白”的文本节点,他是标签之间的空白(如回车符)。这里是removeIndentNodes( )函数,功能是去掉#text节点。
function removeIndentNodes( childNodes )
% remove #text
numNodes = childNodes.getLength;
remList = [];
for i = numNodes:-1:1
theChild = childNodes.item(i-1);
if (theChild.hasChildNodes)
removeIndentNodes(theChild.getChildNodes);
else
if ( theChild.getNodeType == theChild.TEXT_NODE && ...
~isempty(char(theChild.getData())) && ...
all(isspace(char(theChild.getData()))))
remList(end+1) = i-1; % java indexing
end
end
end
for i = 1:length(remList)
childNodes.removeChild(childNodes.item(remList(i)));
end
end