解析XML(经典对决)

最新推荐文章于 2024-06-14 19:31:50 发布

nyzfl

最新推荐文章于 2024-06-14 19:31:50 发布

阅读量636

点赞数

分类专栏： C# ASP.NET 收藏文章标签： xml string object class textbox null

本文链接：https://blog.csdn.net/nyzfl/article/details/1634147

版权

收藏同时被 3 个专栏收录

15 篇文章 0 订阅

订阅专栏

12 篇文章 0 订阅

订阅专栏

ASP.NET

12 篇文章 0 订阅

订阅专栏

功能描述：

要求实现对xml的解析，比如一段xml如下：<a></a><a/><t><t></t></t><hh>分别解析成四段xml,分别为<a></a>,<a/>,<t><t></t></t>和<hh>.其中<hh>缓存起来，假如下次传入<gg></gg></hh>,返回<hh><gg></gg></hh>.OK,功能描述完毕。

1.此人有 8 年的 .net 工作经验：

using System;

namespace WindowsApplication2
{
 /// <summary>
 /// 类名称：Bao
 /// 类功能：解析xml包
 /// </summary>
 public class Bao
 {
 private string xmlBuffer = "" ; // xml流缓存
 private Tag rootTag; // 根标签比如<a id=999>xxxx。。。。。，这里存“a”
 private int startIndex;
 private int tagCount;

 public delegate void BlockOverHandler( string xmlStream); // 抛出整块信息的抛出事件之委托
 public event BlockOverHandler BlockOver; // 抛出整块信息的抛出事件之定义

 /// <summary>
 /// 注入XML流
 /// </summary>
 public void Push( string xmlStream)
 {
 // 把新注入的XML流与缓存中滞留的XML流拼接在一起
 xmlBuffer += xmlStream;

 // 开始寻找是否有整块的数据
 while (xmlBuffer.Length > 0 ) // 整个被抛给客户端时xmlBuffer即为空
 {
 if (rootTag == null ) // 当true时，意味着第一次运行或者刚有整快数据抛出并从缓存中清除
 {
 // 取出第一个标签
 string rootTagName = GetRootTag(xmlBuffer);
 rootTag = new Tag(rootTagName);

 startIndex = 0 ;
 tagCount = 1 ;
 }

 // 搜索下一个同名标签的位置
 int samePos = GetNextSamePos(xmlBuffer, rootTag, startIndex + 1 );
 // 搜索第一个结束标签的位置
 int endPos = xmlBuffer.IndexOf(rootTag.EndTag, startIndex);
 if (endPos ==- 1 )
 endPos = int .MaxValue;
 // 比较一下上两个数字谁大谁小，如果samePos小，说明内嵌有同名的标签(如：<a><a>。。。</a>....)，这时需要给计数器++，如果endPos小，则--，当减到0，则该抛出了
 if (samePos == endPos) // 肯定是没找到都等于int.MaxValue
 {
 break ; // ~~end while
 }
 else if (samePos < endPos)
 {
 startIndex = samePos + 1 ;
 tagCount ++ ;
 }
 else
 {
 startIndex = endPos + 1 ;
 tagCount -- ;
 }

 if (tagCount == 0 )
 {
 string full = xmlBuffer.Substring( 0 , endPos + rootTag.EndTag.Length);
 // 抛给客户端
 if (BlockOver != null )
 BlockOver(full);
 // 把这段砍掉
 xmlBuffer = xmlBuffer.Substring(endPos + rootTag.EndTag.Length);

 rootTag = null ;
 }
 }
 }

 /// <summary>
 /// 搜索下一个同名标签的的位置
 /// </summary>
 /// curIndex 当前标签位置
 private int GetNextSamePos( string xmlStream, Tag tag, int curIndex)
 {
 int samePos1 = xmlBuffer.IndexOf(tag.BeginTag1, curIndex + 1 );
 if (samePos1 ==- 1 )
 samePos1 = int .MaxValue;
 int samePos2 = xmlBuffer.IndexOf(tag.BeginTag2, curIndex + 1 );
 if (samePos2 ==- 1 )
 samePos2 = int .MaxValue;

 return samePos1 < samePos2 ? samePos1 : samePos2;
 }

 /// <summary>
 /// 从XML流中取出第一个标签
 /// </summary>
 /// 如果是 <a id=999> xxxx 。。。。。，这里返回“a”
 /// 如果xmlStream非法（如空串），会抛异常
 public string GetRootTag( string xmlStream)
 {
 try
 {
 string rootTag;

 int pos = 0 ;
 int pos1 = xmlStream.IndexOf( " > " );
 if (pos1 == - 1 )
 pos1 = int .MaxValue;
 int pos2 = xmlStream.IndexOf( " " );
 if (pos2 == - 1 )
 pos2 = int .MaxValue;

 if (pos1 == pos2) // 没找到空格或者>，可能会是xmlBuffer里仅几个字符尚为凑够开始标签
 {
 rootTag = "" ;
 }
 else if (pos1 < pos2)
 {
 pos = pos1;
 rootTag = xmlStream.Substring( 1 , pos - 1 ); // 抽出“<”符号之后、第一个空格之前的内容
 }
 else
 {
 pos = pos2;
 rootTag = xmlStream.Substring( 1 , pos - 1 ); // 抽出“<”符号之后、第一个空格之前的内容
 }

 return rootTag;
 }
 catch
 {
 throw new ApplicationException( " XML结构错误 " );
 }
 }

 /// <summary>
 /// 标签
 /// </summary>
 private class Tag
 {
 // 属性字段
 private string _tagName; // 标签名,形如“a”
 private string _beginTag1; // 开始标签,形如“<a>”
 private string _beginTag2;
 private string _endTag; // 结束标签

 /// <summary>
 /// 构造函数
 /// </summary>
 public Tag( string tagName)
 {
 TagName = tagName;
 }

 /// <summary>
 /// 标签名,形如“a”
 /// </summary>
 public string TagName
 {
 get
 {
 return _tagName;
 }
 set
 {
 _tagName = value;

 _beginTag1 = " < " + _tagName + " > " ;
 _beginTag2 = " < " + _tagName + " " ;
 _endTag = " </ " + _tagName + " > " ;
 }
 }

 /// <summary>
 /// 开始标签,形如“ <a> ”
 /// </summary>
 public string BeginTag1
 {
 get
 {
 return _beginTag1;
 }
 }

 /// <summary>
 /// 开始标签,形如“ <a
 /// </summary>
 public string BeginTag2
 {
 get
 {
 return _beginTag2;
 }
 }

 /// <summary>
 /// 结束标签
 /// </summary>
 public string EndTag
 {
 get
 {
 return _endTag;
 }
 }
 }

 }
}

调试代码：

         private void Form1_Load( object sender, System.EventArgs e)
        {
            bao.BlockOver += new WindowsApplication2.Bao.BlockOverHandler(Show);
        }

         private void button1_Click( object sender, System.EventArgs e)
        {
            bao.Push(textBox1.Text);
        }

         private void Show( string strXml)
        {
            MessageBox.Show(strXml);
        }

功能评价：没有修改前，可以正常解析<a></a>，<t><t></t></t>，缓存<hh>，再次输入<gg></gg></hh>，可以正常解析<hh><gg></gg></hh>。可以解析<a />,不过不可以解析<a/>。不过修改之后，完全可以实现功能。

代码评价：使用好的观察者模式，有提示异常功能，比较好的代码编写规范，和注释规范。

2. 此人是经理级别：

代码如下：

using System;

namespace XML的节点分解程序
{
 /// <summary>
 /// 使用方法:
 /// 首先要创建一个对象：XMLDivider xd=new XMLDivider();
 /// 然后设置你要操作的字符串：xd.setString("你的字符串");
 /// 开始分析：xd.parse();
 /// </summary>
 public class XMLDivider
 {
 private bool debug = false ; // 是否处在调试状态

 private bool FindOne; // 是否找到了一个？IsEnd?
 private int CP; // 当前搜索到的位置 currentPosition
 private int CL; // 当前进入到第几层了？CurrentLayer

 private bool error = false ; // 处理是否出现了错误
 private string errormessage = "" ;
 private bool state = true ; // 是否处理完全，若为false说明还有不完整的字符串
 private string strbuf; // 保存正在处理的字符串
 private bool over = false ; // 是否该结束这一轮了？

 private System.Collections.ArrayList result = new System.Collections.ArrayList(); // 保存处理后的结果
 public void setString( string str) // 设置一下要处理的字符串
 {
 this .strbuf = this .strbuf + str.Trim();
 if ( ! this .strbuf.StartsWith( " < " ))
 { this .strbuf = "" ;
 this .error = true ;
 this .errormessage = " XML字符串不规范，不是以<开头 " ;
 return ;
 }
 #region "修改"
 init();
 #endregion
 }
 public bool getState() // 看看是不是字符串处理完了？
 {
 return this .state;
 }
 public string getRestString() // 取得剩下的字符串做缓存用
 {
 return this .strbuf;
 }
 public bool isError() // 看看处理中出错了没有？
 {
 return this .error;
 }
 public string getErrorMessage()
 {
 return this .errormessage;
 }
 public System.Collections.IList getResultList()
 {
 return this .result;
 }

 public XMLDivider(){
 #region "修改"
 // init();
 #endregion
 }
 private void init()
 { // 将数据进行初始化
 state = true ;
 error = false ;
 FindOne = false ;
 CP = 0 ;
 CL = 0 ;
 over = false ;
 }
 public void parse()
 {

 if (FindOne) // 标志找到了一个先把它取走再初始化各个标志
 {
 string temp = this .strbuf.Substring( 0 , this .CP);
 result.Add(temp);
 this .strbuf = this .strbuf.Substring( this .CP, this .strbuf.Length - this .CP).Trim();
 init();
 }
 else
 {
 switch (whatTypeNextPair())
 {
 case 0 : // 找到了一个开始标签层数加1
 this .CL = this .CL + 1 ;
 break ;
 case 1 :
 this .CL = this .CL - 1 ; // 找到了一个结束标签层数减1 然后判断是不是根节点结束了
 if ( this .CL == 0 )
 {
 this .FindOne = true ; // 找到了一个
 }
 break ;
 case 2 : // 如果是自关闭的标签应该什么也不做
 if ( this .CL == 0 )
 {
 this .FindOne = true ; // 根节点自关闭当然要通知找到了一个
 }
 break ;
 case 3 : // 什么也没找到，说明处理字符没有结束，所以需要缓存
 if ( this .strbuf.Length > 0 )
 {
 this .state = false ;
 }
 over = true ; // 处理结束了吗？
 break ;
 }
 }
 if ( ! over)parse();
 }
 private int whatTypeNextPair()
 {
 int startPoint = 0 ;
 int endPoint = 0 ;
 startPoint = this .strbuf.IndexOf( " < " , this .CP); // 从CP位置向后找
 endPoint = this .strbuf.IndexOf( " > " , this .CP); // 从CP位置向后找
 CP = endPoint + 1 ;
 if (startPoint < 0 || endPoint < 0 ) return PairType.nothing;
 if (endPoint < startPoint) return PairType.nothing;
 if (debug)System.Console.Out.WriteLine( " this.strbuf.Substring(startPoint+1,1)= " + this .strbuf.Substring(startPoint + 1 , 1 ));
 if ( this .strbuf.Substring(startPoint + 1 , 1 ).Equals( " / " )) return PairType.ender;
 if (debug)System.Console.Out.WriteLine( " this.strbuf.Substring(endPoint-1,1)= " + this .strbuf.Substring(endPoint - 1 , 1 ));
 if ( this .strbuf.Substring(endPoint - 1 , 1 ).Equals( " / " )) return PairType.startAndEnder;
 return PairType.starter;
 }
 }
 class PairType
 {
 public static int starter = 0 ; // 表示下一个标签是一个开始节点
 public static int ender = 1 ; // 表示下一个标签是一个结束节点
 public static int startAndEnder = 2 ; // 表示下一个标签是一个自关闭的节点
 public static int nothing = 3 ; // 表示没有找到任何下一个节点
 }
}

调试代码：

XMLDivider xd = new XMLDivider();
 private void button1_Click( object sender, System.EventArgs e)
 {
 xd.setString( this .richTextBox1.Text);
 xd.parse();
 if ( ! xd.isError()) // 如果没有出错的话
 {
 System.Collections.IList l = xd.getResultList();
 if (l.Count > 0 )
 {
 this .richTextBox2.AppendText( " 共找到 " + l.Count + " 个根节点： " );
 for ( int i = 0 ;i < l.Count;i ++ ) this .richTextBox2.AppendText( " 第 " + i + " 个: " + ( string )(l[i]) + " " );
 if ( ! xd.getState())
 {
 this .richTextBox2.AppendText( " 没有处理完的字符串为： " );
 this .richTextBox2.AppendText(xd.getRestString() + " " );
 }
 l.Clear();
 }
 }
 else
 {
 this .richTextBox2.AppendText( " 处理错误： " + xd.getErrorMessage() + " " );
 }
 }

功能评价：没有修改前，可以正确解析，缓存有小失误。

可清晰看到结果：

共找到3个根节点：

第0个:<a></a>

第1个:<a/>

第2个:<t><t></t></t>

没有处理完的字符串为：

<hh>

修改之后：

共找到3个根节点：

第0个:<a></a>

第1个:<a/>

第2个:<t><t></t></t>

没有处理完的字符串为：

<hh>

共找到1个根节点：

第0个:<hh><gg></gg></hh>

代码评价：有良好的代码编写规范，异常纠错功能，良好的思维方式，良好的代码注释规范。

2. 此人是1年工作经验：

代码如下：

using System;

namespace IMServer
{
 /// <summary>
 /// 类名称：Stack
 /// 类功能：栈
 /// </summary>
 public class Stack
 {
 /// <summary>
 /// first 栈顶
 /// </summary>
 private Node first = null ;
 /// <summary>
 /// 栈中节点数
 /// </summary>
 private int count = 0 ;

 /// <summary>
 /// 栈空属性，提供get访问器
 /// </summary>
 public bool Empty
 {
 get
 {
 return (first == null );
 }
 }

 /// <summary>
 /// 计数属性，提供get访问器
 /// </summary>
 public int Count
 {
 get
 {
 return count;
 }
 }

 /// <summary>
 /// 出栈
 /// </summary>
 /// <returns></returns>
 public object Pop()
 {
 if (first == null )
 throw new InvalidOperationException( " Can't pop from an empty stack " );
 else
 {
 object temp = first.Value;
 first = first.Next;
 count -- ;
 return temp;
 }
 }

 public void Push( object o)
 {
 first = new Node(o,first);
 count ++ ;
 }

 public Stack()
 {
 }
 }

 public class Node
 {
 public Node Next;
 public object Value;

 public Node( object value,Node next)
 {
 Next = next;
 Value = value;
 }

 public Node( object value): this (value, null )
 {}
 }
}

using System;

namespace IMServer
{
 /// <summary>
 /// 类名称：XMLPaster
 /// 类功能：解析XML
 /// </summary>
 public class XMLParster
 {
 /// <summary>
 /// 栈
 /// </summary>
 private Stack s;
 /// <summary>
 /// XML
 /// </summary>
 private string XML;
 /// <summary>
 /// 缓存xml
 /// </summary>
 private string XMLBuffer = "" ;
 /// <summary>
 /// 集合
 /// </summary>
 private System.Collections.ArrayList Ilist = new System.Collections.ArrayList();

 public XMLParster()
 {
 s = new Stack();
 }

 public System.Collections.ArrayList ReturnXmlList( string str)
 {
 try
 {
 if (XMLBuffer != "" )
 XML = XMLBuffer + str;
 else
 XML = str;

 // 第一个<>前面是否有/
 if (XML.Substring( 0 ,XML.IndexOf( " > " ) + 1 ).IndexOf( " /> " ) !=- 1 )
 { // 存在
 Ilist.Add(XML.Substring( 0 ,XML.IndexOf( " > " ) + 1 ));
 if (XML.Substring( 0 ,XML.IndexOf( " > " ) + 1 ).Length == XML.Length)
 {
 XMLBuffer = "" ;
 return Ilist;
 }
 string aa = XML.Substring(XML.IndexOf( " > " ) + 1 ,XML.Length - XML.IndexOf( " > " ) - 1 );
 ReturnXmlList(XML.Substring(XML.IndexOf( " > " ) + 1 ,XML.Length - XML.IndexOf( " > " ) - 1 ));
 }
 else
 { // 不存在
 // 清空栈
 for ( int num1 = 0 ;num1 < s.Count;num1 ++ )
 s.Pop();
 // 前缀<a
 string Prexi = "" ;
 #region "修改"
 if (XML.Substring( 0 ,XML.IndexOf( " > " )).IndexOf( " " ) ==- 1 )
 Prexi = XML.Substring( 0 ,XML.IndexOf( " > " )); // <a></a>
 else
 Prexi = XML.Substring( 0 ,XML.IndexOf( " " )); // <a ></a>
 #endregion
 s.Push(Prexi);
 // 以<>分开
 for ( int i = XML.IndexOf( " > " ) + 1 ;i < XML.Length;)
 {
 string ShengXiaXML = XML.Substring(i,XML.Length - i);
 // 是否存在</a>
 if (ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).IndexOf(Prexi.Substring( 0 , 1 ) + " / " + Prexi.Substring( 1 ,Prexi.Length - 1 )) !=- 1 )
 {
 // 出栈
 s.Pop();
 if (s.Count == 0 )
 {
 Ilist.Add(XML.Substring( 0 ,i + ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).Length));
 if (i + ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).Length == XML.Length)
 {
 XMLBuffer = "" ;
 return Ilist;
 }
 else
 {
 ReturnXmlList(XML.Substring(i + ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).Length,XML.Length - i - ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).Length));
 break ;
 }
 }
 else
 { // 缓存
 XMLBuffer = XML.Substring(i,str.Length - i);
 }
 }
 else if (ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).IndexOf(Prexi + " / " ) !=- 1 )
 { // 是否存在<a><a/></a>
 }
 else if (ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).IndexOf(Prexi) !=- 1 )
 s.Push(Prexi); // 是否存在<a><a></a></a>
 i += ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).Length; // 到下一个<>
 if (i == XML.Length)
 XMLBuffer = XML.Substring( 0 ,i); // 缓存
 }
 }
 }
 catch (Exception ex)
 { throw ex;}
 return Ilist;
 }
 }
}

调试代码：

        IMServer.XMLParster xml = new IMServer.XMLParster();
         private void button6_Click( object sender, System.EventArgs e)
        {
            ArrayList list = xml.ReturnXmlList(textBox1.Text);
             foreach ( string str in list)
            {
                MessageBox.Show(str);
            }
            list.Clear();
        }

功能评价：没有修改前，可以解析，但如果如果这种情况<a></a><a/><t><t></t></t><hh>会出错。修改之后搞定。

代码评价：使用太多的字符串的截取和查找，可能速度很慢。不过采用栈的思维方式比较多特。没有好的异常纠错功能和代码注释规范。

最终考虑使用第二种，希望大家多多评价。