解析xml文件时,常遇到较大的xml文件,使用XmlDocument加载此文件会消耗大量内存,
这时可以使用XmlTextReader将大文件拆分成多个小文件,再使用XmlDocument加载。
xml 格式:
<?xml version="1.0" encoding="utf-8"?>
<LargeFile>
<Content id="1234171">
<Title>Tempting Sin</Title>
<GeoRight>
<GeoCountry code = "WW" model = "Distribution">World</GeoCountry>
</GeoRight>
</Content>
<Content id="1234172">
<Title>Tempting Sin1</Title>
<GeoRight>
<GeoCountry code = "WW" model = "Distribution">World</GeoCountry>
</GeoRight>
</Content>
<Content id="1234173">
<Title>Tempting Sin2</Title>
<GeoRight>
<GeoCountry code = "ad" model = "Distribution">Andorra</GeoCountry>
<GeoCountry code = "ae" model = "Distribution">United Arab Emirates</GeoCountry>
</GeoRight>
</Content>
</LargeFile>
拆分代码如下:
/// <summary>
/// xml拆分
/// </summary>
/// <param name="path">大文件路径</param>
/// <param name="nodeCount">小文件中节点数</param>
public static void SplitXml(string path, int nodeCount)
{
XmlTextReader reader = new XmlTextReader(path);
reader.DtdProcessing = DtdProcessing.Ignore;
XmlWriter writer = null;
string rootName = string.Empty;
string filePath = path.Substring(0, path.LastIndexOf("."));
try
{
List<string[]> rootAttributes = new List<string[]>();
int count = 0;
while (reader.Read())
{
switch (reader.NodeType)
{
case XmlNodeType.Whitespace:
if (writer != null && writer.WriteState != WriteState.Closed)
{
writer.WriteWhitespace(reader.Value);
}
break;
case XmlNodeType.Element:
if (reader.Depth == 0) rootName = reader.Name;
if (reader.Name == rootName) // root
{
// read root Attributes
if (reader.HasAttributes)
{
rootAttributes = new List<string[]>();
for (int i = 0; i < reader.AttributeCount; i++)
{
reader.MoveToAttribute(i);
rootAttributes.Add(new string[] { reader.Name, reader.Value });
}
reader.MoveToElement();
}
}
else
{
if (reader.Depth == 1 && count % nodeCount == 0)
{
writer = XmlWriter.Create(string.Format(filePath + ".part{0}.xml", count / nodeCount + 1));
writer.WriteStartDocument(); // <?xml version="1.0" encoding="utf-8"?>
writer.WriteWhitespace(Environment.NewLine);
// write root Start Element
writer.WriteStartElement(rootName);
// write root Attributes
foreach (var attribute in rootAttributes)
{
writer.WriteStartAttribute(attribute[0]);
writer.WriteString(attribute[1]);
writer.WriteEndAttribute();
}
writer.WriteWhitespace(Environment.NewLine);
}
if (reader.IsEmptyElement) // empty element, <{0} />
{
writer.WriteRaw(string.Format("<{0} />", reader.Name));
}
else
{
// writer Start Element
writer.WriteStartElement(reader.Name);
// writer Element Attributes
if (reader.HasAttributes)
{
for (int i = 0; i < reader.AttributeCount; i++)
{
reader.MoveToAttribute(i);
writer.WriteStartAttribute(reader.Name);
writer.WriteString(reader.Value);
writer.WriteEndAttribute();
}
reader.MoveToElement();
}
}
}
break;
case XmlNodeType.Text:
writer.WriteValue(reader.Value);
break;
case XmlNodeType.EndElement:
if (reader.Depth == 1)
{
writer.WriteEndElement();
count++;
// write root end element
if (count > 0 && count % nodeCount == 0)
{
writer.WriteWhitespace(Environment.NewLine);
writer.WriteEndElement();
writer.Close();
}
}
else
{
if (reader.Name != rootName)
writer.WriteEndElement();
}
// write root end element
if (reader.Depth == 0 && writer.WriteState != WriteState.Closed)
{
writer.WriteWhitespace(Environment.NewLine);
writer.WriteEndElement();
writer.Close();
}
break;
}
}
}
catch (Exception e)
{
throw e;
}
finally
{
if (writer != null && writer.WriteState != WriteState.Closed)
writer.Close();
if (reader != null && reader.ReadState != ReadState.Closed)
reader.Close();
}
}