c# 使用XmlTextReader拆分大文件

        解析xml文件时,常遇到较大的xml文件,使用XmlDocument加载此文件会消耗大量内存,

这时可以使用XmlTextReader将大文件拆分成多个小文件,再使用XmlDocument加载。


xml 格式:

<?xml version="1.0" encoding="utf-8"?>
<LargeFile>
  <Content id="1234171">
    <Title>Tempting Sin</Title>
    <GeoRight>
      <GeoCountry  code = "WW" model = "Distribution">World</GeoCountry>
    </GeoRight>
  </Content>
  <Content id="1234172">
    <Title>Tempting Sin1</Title>
    <GeoRight>
      <GeoCountry  code = "WW" model = "Distribution">World</GeoCountry>
    </GeoRight>
  </Content>
  <Content id="1234173">
    <Title>Tempting Sin2</Title>
    <GeoRight>
      <GeoCountry  code = "ad" model = "Distribution">Andorra</GeoCountry>
      <GeoCountry  code = "ae" model = "Distribution">United Arab Emirates</GeoCountry>
    </GeoRight>
  </Content>
</LargeFile>

拆分代码如下:

        /// <summary>
        /// xml拆分
        /// </summary>
        /// <param name="path">大文件路径</param>
        /// <param name="nodeCount">小文件中节点数</param>
        public static void SplitXml(string path, int nodeCount)
        {
            XmlTextReader reader = new XmlTextReader(path);
            reader.DtdProcessing = DtdProcessing.Ignore;
            XmlWriter writer = null;
            string rootName = string.Empty;
            string filePath = path.Substring(0, path.LastIndexOf("."));
            try
            {
                List<string[]> rootAttributes = new List<string[]>();
                int count = 0;
                while (reader.Read())
                {
                    switch (reader.NodeType)
                    {
                        case XmlNodeType.Whitespace:
                            if (writer != null && writer.WriteState != WriteState.Closed)
                            {
                                writer.WriteWhitespace(reader.Value);
                            }

                            break;
                        case XmlNodeType.Element:
                            if (reader.Depth == 0) rootName = reader.Name;

                            if (reader.Name == rootName) // root
                            {
                                // read root Attributes
                                if (reader.HasAttributes)
                                {
                                    rootAttributes = new List<string[]>();
                                    for (int i = 0; i < reader.AttributeCount; i++)
                                    {
                                        reader.MoveToAttribute(i);
                                        rootAttributes.Add(new string[] { reader.Name, reader.Value });
                                    }
                                    reader.MoveToElement();
                                }
                            }
                            else
                            {
                                if (reader.Depth == 1 && count % nodeCount == 0)
                                {
                                    writer = XmlWriter.Create(string.Format(filePath + ".part{0}.xml", count / nodeCount + 1));

                                    writer.WriteStartDocument(); // <?xml version="1.0" encoding="utf-8"?>
                                    writer.WriteWhitespace(Environment.NewLine);

                                    // write root Start Element
                                    writer.WriteStartElement(rootName);
                                    // write root Attributes
                                    foreach (var attribute in rootAttributes)
                                    {
                                        writer.WriteStartAttribute(attribute[0]);
                                        writer.WriteString(attribute[1]);
                                        writer.WriteEndAttribute();
                                    }
                                    writer.WriteWhitespace(Environment.NewLine);
                                }

                                if (reader.IsEmptyElement) // empty element, <{0} />
                                {
                                    writer.WriteRaw(string.Format("<{0} />", reader.Name));
                                }
                                else
                                {
                                    // writer Start Element
                                    writer.WriteStartElement(reader.Name);
                                    // writer Element Attributes
                                    if (reader.HasAttributes)
                                    {
                                        for (int i = 0; i < reader.AttributeCount; i++)
                                        {
                                            reader.MoveToAttribute(i);
                                            writer.WriteStartAttribute(reader.Name);
                                            writer.WriteString(reader.Value);
                                            writer.WriteEndAttribute();
                                        }
                                        reader.MoveToElement();
                                    }
                                }
                            }

                            break;
                        case XmlNodeType.Text:
                            writer.WriteValue(reader.Value);

                            break;
                        case XmlNodeType.EndElement:
                            if (reader.Depth == 1)
                                {
                                    writer.WriteEndElement();
                                    count++;

                                    // write root end element
                                    if (count > 0 && count % nodeCount == 0)
                                    {
                                        writer.WriteWhitespace(Environment.NewLine);
                                        writer.WriteEndElement();
                                        writer.Close();
                                    }
                                }
                                else
                                {
                                    if (reader.Name != rootName)
                                        writer.WriteEndElement();
                                }

                                // write root end element
                                if (reader.Depth == 0 && writer.WriteState != WriteState.Closed)
                                {
                                    writer.WriteWhitespace(Environment.NewLine);
                                    writer.WriteEndElement();
                                    writer.Close();
                                }

                            break;
                    }
                }
            }
            catch (Exception e)
            {
                throw e;
            }
            finally
            {
                if (writer != null && writer.WriteState != WriteState.Closed)
                    writer.Close();

                if (reader != null && reader.ReadState != ReadState.Closed)
                    reader.Close();
            }
        }


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值