c# 使用XmlTextReader拆分大文件

最新推荐文章于 2024-10-30 14:17:26 发布

wdyday

最新推荐文章于 2024-10-30 14:17:26 发布

阅读量1.6k

点赞数

分类专栏： C# 文章标签： xml

本文链接：https://blog.csdn.net/wdyday/article/details/8746633

版权

C# 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

解析xml文件时，常遇到较大的xml文件，使用XmlDocument加载此文件会消耗大量内存，

这时可以使用XmlTextReader将大文件拆分成多个小文件，再使用XmlDocument加载。

xml 格式:

<?xml version="1.0" encoding="utf-8"?>
<LargeFile>
  <Content id="1234171">
    <Title>Tempting Sin</Title>
    <GeoRight>
      <GeoCountry  code = "WW" model = "Distribution">World</GeoCountry>
    </GeoRight>
  </Content>
  <Content id="1234172">
    <Title>Tempting Sin1</Title>
    <GeoRight>
      <GeoCountry  code = "WW" model = "Distribution">World</GeoCountry>
    </GeoRight>
  </Content>
  <Content id="1234173">
    <Title>Tempting Sin2</Title>
    <GeoRight>
      <GeoCountry  code = "ad" model = "Distribution">Andorra</GeoCountry>
      <GeoCountry  code = "ae" model = "Distribution">United Arab Emirates</GeoCountry>
    </GeoRight>
  </Content>
</LargeFile>

拆分代码如下：

        /// <summary>
        /// xml拆分
        /// </summary>
        /// <param name="path">大文件路径</param>
        /// <param name="nodeCount">小文件中节点数</param>
        public static void SplitXml(string path, int nodeCount)
        {
            XmlTextReader reader = new XmlTextReader(path);
            reader.DtdProcessing = DtdProcessing.Ignore;
            XmlWriter writer = null;
            string rootName = string.Empty;
            string filePath = path.Substring(0, path.LastIndexOf("."));
            try
            {
                List<string[]> rootAttributes = new List<string[]>();
                int count = 0;
                while (reader.Read())
                {
                    switch (reader.NodeType)
                    {
                        case XmlNodeType.Whitespace:
                            if (writer != null && writer.WriteState != WriteState.Closed)
                            {
                                writer.WriteWhitespace(reader.Value);
                            }

                            break;
                        case XmlNodeType.Element:
                            if (reader.Depth == 0) rootName = reader.Name;

                            if (reader.Name == rootName) // root
                            {
                                // read root Attributes
                                if (reader.HasAttributes)
                                {
                                    rootAttributes = new List<string[]>();
                                    for (int i = 0; i < reader.AttributeCount; i++)
                                    {
                                        reader.MoveToAttribute(i);
                                        rootAttributes.Add(new string[] { reader.Name, reader.Value });
                                    }
                                    reader.MoveToElement();
                                }
                            }
                            else
                            {
                                if (reader.Depth == 1 && count % nodeCount == 0)
                                {
                                    writer = XmlWriter.Create(string.Format(filePath + ".part{0}.xml", count / nodeCount + 1));

                                    writer.WriteStartDocument(); // <?xml version="1.0" encoding="utf-8"?>
                                    writer.WriteWhitespace(Environment.NewLine);

                                    // write root Start Element
                                    writer.WriteStartElement(rootName);
                                    // write root Attributes
                                    foreach (var attribute in rootAttributes)
                                    {
                                        writer.WriteStartAttribute(attribute[0]);
                                        writer.WriteString(attribute[1]);
                                        writer.WriteEndAttribute();
                                    }
                                    writer.WriteWhitespace(Environment.NewLine);
                                }

                                if (reader.IsEmptyElement) // empty element, <{0} />
                                {
                                    writer.WriteRaw(string.Format("<{0} />", reader.Name));
                                }
                                else
                                {
                                    // writer Start Element
                                    writer.WriteStartElement(reader.Name);
                                    // writer Element Attributes
                                    if (reader.HasAttributes)
                                    {
                                        for (int i = 0; i < reader.AttributeCount; i++)
                                        {
                                            reader.MoveToAttribute(i);
                                            writer.WriteStartAttribute(reader.Name);
                                            writer.WriteString(reader.Value);
                                            writer.WriteEndAttribute();
                                        }
                                        reader.MoveToElement();
                                    }
                                }
                            }

                            break;
                        case XmlNodeType.Text:
                            writer.WriteValue(reader.Value);

                            break;
                        case XmlNodeType.EndElement:
                            if (reader.Depth == 1)
                                {
                                    writer.WriteEndElement();
                                    count++;

                                    // write root end element
                                    if (count > 0 && count % nodeCount == 0)
                                    {
                                        writer.WriteWhitespace(Environment.NewLine);
                                        writer.WriteEndElement();
                                        writer.Close();
                                    }
                                }
                                else
                                {
                                    if (reader.Name != rootName)
                                        writer.WriteEndElement();
                                }

                                // write root end element
                                if (reader.Depth == 0 && writer.WriteState != WriteState.Closed)
                                {
                                    writer.WriteWhitespace(Environment.NewLine);
                                    writer.WriteEndElement();
                                    writer.Close();
                                }

                            break;
                    }
                }
            }
            catch (Exception e)
            {
                throw e;
            }
            finally
            {
                if (writer != null && writer.WriteState != WriteState.Closed)
                    writer.Close();

                if (reader != null && reader.ReadState != ReadState.Closed)
                    reader.Close();
            }
        }