解析XML(经典对决)

12 篇文章 0 订阅
12 篇文章 0 订阅

功能描述:

       要求实现对xml的解析,比如一段xml如下:<a><b></b></a><a/><t><t></t></t><hh>分别解析成四段xml,分别为<a><b></b></a>,<a/>,<t><t></t></t><hh>.其中<hh>缓存起来,假如下次传入<gg></gg></hh>,返回<hh><gg></gg></hh>.OK,功能描述完毕。

1.此人有 8 年的 .net 工作经验: 
using  System;

namespace  WindowsApplication2
{
    
///   <summary>
    
///  类名称:Bao
    
///  类功能:解析xml包
    
///   </summary>
     public   class  Bao
    {
        
private   string  xmlBuffer  =   "" ;                                 // xml流缓存
         private  Tag rootTag;                                         // 根标签 比如<a id=999><b>xxxx</b>。。。。。,这里存“a”
         private   int  startIndex;
        
private   int  tagCount;

        
public   delegate   void  BlockOverHandler( string  xmlStream);     // 抛出整块信息的抛出事件之委托
         public   event  BlockOverHandler BlockOver;                     // 抛出整块信息的抛出事件之定义

        
///   <summary>
        
///  注入XML流
        
///   </summary>
         public   void  Push( string  xmlStream)
        {
            
// 把新注入的XML流与缓存中滞留的XML流拼接在一起
            xmlBuffer  +=  xmlStream;

            
// 开始寻找是否有整块的数据
             while (xmlBuffer.Length > 0 )   // 整个被抛给客户端时xmlBuffer即为空
            {
                
if  (rootTag  ==   null )     // 当true时,意味着第一次运行或者刚有整快数据抛出并从缓存中清除
                {
                    
// 取出第一个标签
                     string  rootTagName  =  GetRootTag(xmlBuffer);
                    rootTag 
=   new  Tag(rootTagName);
                    
                    startIndex 
=   0 ;
                    tagCount 
=   1 ;
                }
                
                
// 搜索下一个同名标签的位置
                 int  samePos  =  GetNextSamePos(xmlBuffer, rootTag, startIndex + 1 );
                
// 搜索第一个结束标签的位置
                 int  endPos  =  xmlBuffer.IndexOf(rootTag.EndTag, startIndex);
                
if  (endPos ==- 1 )
                    endPos 
=   int .MaxValue;
                
// 比较一下上两个数字谁大谁小,如果samePos小,说明内嵌有同名的标签(如:<a><a>。。。</a>....),这时需要给计数器++,如果endPos小,则--,当减到0,则该抛出了
                 if  (samePos  ==  endPos)   // 肯定是没找到都等于int.MaxValue
                {
                    
break ;     // ~~end while
                }
                
else   if  (samePos  <  endPos)
                {
                    startIndex 
=  samePos  +   1 ;
                    tagCount
++ ;
                }
                
else
                {
                    startIndex 
=  endPos  +   1 ;
                    tagCount
-- ;
                }

                
if  (tagCount == 0 )
                {
                    
string  full  =  xmlBuffer.Substring( 0 , endPos + rootTag.EndTag.Length);
                    
// 抛给客户端
                     if  (BlockOver != null )
                        BlockOver(full);
                    
// 把这段砍掉
                    xmlBuffer  =  xmlBuffer.Substring(endPos + rootTag.EndTag.Length);

                    rootTag 
=   null ;
                }
            }
        }


        
///   <summary>
        
///  搜索下一个同名标签的的位置
        
///   </summary>
        
///  curIndex 当前标签位置
         private   int  GetNextSamePos( string  xmlStream, Tag tag,  int  curIndex)
        {
            
int  samePos1  =  xmlBuffer.IndexOf(tag.BeginTag1, curIndex + 1 );
            
if  (samePos1 ==- 1 )
                samePos1 
=   int .MaxValue;
            
int  samePos2  =  xmlBuffer.IndexOf(tag.BeginTag2, curIndex + 1 );
            
if  (samePos2 ==- 1 )
                samePos2 
=   int .MaxValue;

            
return  samePos1 < samePos2  ?  samePos1 : samePos2;
        }


        
///   <summary>
        
///  从XML流中取出第一个标签
        
///   </summary>
        
///  如果是 <a id=999><b> xxxx </b> 。。。。。,这里返回“a”
        
///  如果xmlStream非法(如空串),会抛异常
         public   string  GetRootTag( string  xmlStream)
        {
            
try
            {
                
string  rootTag;

                
int  pos  =   0 ;
                
int  pos1  =  xmlStream.IndexOf( " > " );
                
if  (pos1  ==   - 1 )
                    pos1 
=   int .MaxValue;
                
int  pos2  =  xmlStream.IndexOf( "   " );
                
if  (pos2  ==   - 1 )
                    pos2 
=   int .MaxValue;

                
if  (pos1 == pos2)   // 没找到空格或者>,可能会是xmlBuffer里仅几个字符尚为凑够开始标签
                {
                    rootTag 
=   "" ;
                }
                
else   if  (pos1 < pos2)
                {
                    pos 
=  pos1;
                    rootTag 
=  xmlStream.Substring( 1 , pos - 1 );     // 抽出“<”符号之后、第一个空格之前的内容
                }
                
else
                {
                    pos 
=  pos2;
                    rootTag 
=  xmlStream.Substring( 1 , pos - 1 );     // 抽出“<”符号之后、第一个空格之前的内容
                }
                
                
return  rootTag;
            }
            
catch
            {
                
throw   new  ApplicationException( " XML结构错误 " );
            }
        }

        
///   <summary>
        
///  标签
        
///   </summary>
         private   class  Tag
        {
            
// 属性字段
             private   string  _tagName;         // 标签名,形如“a”
             private   string  _beginTag1;         // 开始标签,形如“<a>”
             private   string  _beginTag2;
            
private   string  _endTag;         // 结束标签

            
///   <summary>
            
///  构造函数
            
///   </summary>
             public  Tag( string  tagName)
            {
                TagName 
=  tagName;
            }

            
///   <summary>
            
///  标签名,形如“a”
            
///   </summary>
             public   string  TagName
            {
                
get
                {
                    
return  _tagName;
                }
                
set
                {
                    _tagName 
=  value;

                    _beginTag1 
=   " < "   +  _tagName  +   " > " ;
                    _beginTag2 
=   " < "   +  _tagName  +   "   " ;
                    _endTag 
=   " </ "   +  _tagName  +   " > " ;
                }
            }

            
///   <summary>
            
///  开始标签,形如“ <a>
            
///   </summary>
             public   string  BeginTag1
            {
                
get
                {
                    
return  _beginTag1;
                }
            }

            
///   <summary>
            
///  开始标签,形如“ <a
             ///   </summary>
             public   string  BeginTag2
            {
                
get
                {
                    
return  _beginTag2;
                }
            }

            
///   <summary>
            
///  结束标签
            
///   </summary>
             public   string  EndTag
            {
                
get
                {
                    
return  _endTag;
                }
            }
        }

    }
}

调试代码:

         private   void  Form1_Load( object  sender, System.EventArgs e)
        {
            bao.BlockOver 
+=   new  WindowsApplication2.Bao.BlockOverHandler(Show);
        }

        
private   void  button1_Click( object  sender, System.EventArgs e)
        {
            bao.Push(textBox1.Text);
        }

        
private   void  Show( string  strXml)
        {
            MessageBox.Show(strXml);
        }

功能评价:没有修改前,可以正常解析<a><b></b></a><t><t></t></t>,缓存<hh>,再次输入<gg></gg></hh>,可以正常解析<hh><gg></gg></hh>。可以解析<a />,不过不可以解析<a/>。不过修改之后,完全可以实现功能。

代码评价:使用好的观察者模式,有提示异常功能,比较好的代码编写规范,和注释规范。

2.     此人是经理级别:

代码如下:
using  System;

namespace  XML的节点分解程序
{
    
///   <summary>
    
///  使用方法:
    
///  首先要创建一个对象:XMLDivider xd=new XMLDivider();
    
///  然后设置你要操作的字符串:xd.setString("你的字符串");
    
///  开始分析:xd.parse();
    
///   </summary>
     public   class  XMLDivider
    {
            
private   bool  debug = false ; // 是否处在调试状态

            
private   bool  FindOne; // 是否找到了一个?IsEnd?
             private   int  CP; // 当前搜索到的位置 currentPosition
             private   int  CL; // 当前进入到第几层了?CurrentLayer


            
private   bool  error = false ; // 处理是否出现了错误
             private   string  errormessage = "" ;
            
private   bool  state = true ; // 是否处理完全,若为false说明还有不完整的字符串
             private   string  strbuf; // 保存正在处理的字符串
             private   bool  over = false ; // 是否该结束这一轮了?

            
private  System.Collections.ArrayList result = new  System.Collections.ArrayList(); // 保存处理后的结果
         public   void  setString( string  str) // 设置一下要处理的字符串
        {    
            
this .strbuf = this .strbuf + str.Trim();
            
if ( ! this .strbuf.StartsWith( " < " ))
            {    
this .strbuf = "" ;
                
this .error = true ;
                
this .errormessage = " XML字符串不规范,不是以<开头 " ;
                
return ;
            }
            
#region  "修改"
            init();
            
#endregion
        }
        
public   bool  getState() // 看看是不是字符串处理完了?
        {
            
return   this .state;
        }
        
public   string  getRestString() // 取得剩下的字符串 做缓存用
        {
            
return   this .strbuf;
        }
        
public   bool  isError() // 看看处理中出错了没有?
        {
            
return   this .error;
        }
        
public   string  getErrorMessage()
        {
            
return   this .errormessage;
        }
        
public  System.Collections.IList getResultList()
        {
            
return   this .result;
        }

        
public  XMLDivider(){
            
#region  "修改"
            
// init();
             #endregion
        }
        
private   void  init()
        {
// 将数据进行初始化
            state = true ;
            error
= false ;
            FindOne
= false ;
            CP
= 0 ;
            CL
= 0 ;
            over
= false ;
        }
        
public   void  parse()
        {

            
if (FindOne) // 标志找到了一个先把它取走再初始化各个标志
            {
                
string  temp = this .strbuf.Substring( 0 , this .CP);
                result.Add(temp);
                
this .strbuf = this .strbuf.Substring( this .CP, this .strbuf.Length - this .CP).Trim();
                init();
            }
            
else
            {
                
switch (whatTypeNextPair())
                {
                    
case   0 : // 找到了一个开始标签 层数加1
                         this .CL = this .CL + 1 ;
                        
break ;
                    
case   1 :
                        
this .CL = this .CL - 1 ; // 找到了一个结束标签 层数减1 然后判断是不是根节点结束了
                         if ( this .CL == 0 )
                        {
                            
this .FindOne = true ; // 找到了一个
                        }
                        
break ;
                    
case   2 : // 如果是自关闭的标签 应该什么也不做
                         if ( this .CL == 0 )
                        {
                            
this .FindOne = true ; // 根节点自关闭当然要通知找到了一个
                        }
                        
break ;
                    
case   3 : // 什么也没找到,说明处理字符没有结束,所以需要缓存
                         if ( this .strbuf.Length > 0 )
                        {
                            
this .state = false ;
                        }
                        over
= true ; // 处理结束了吗?
                         break ;
                }
            }
            
if ( ! over)parse();
        }
        
private   int  whatTypeNextPair()
        {    
            
int  startPoint = 0 ;
            
int  endPoint = 0 ;
            startPoint
= this .strbuf.IndexOf( " < " , this .CP); // 从CP位置向后找
            endPoint = this .strbuf.IndexOf( " > " , this .CP); // 从CP位置向后找
            CP = endPoint + 1 ;
            
if (startPoint < 0 || endPoint < 0 ) return  PairType.nothing;
            
if  (endPoint < startPoint)  return  PairType.nothing;
            
if (debug)System.Console.Out.WriteLine( " this.strbuf.Substring(startPoint+1,1)= " + this .strbuf.Substring(startPoint + 1 , 1 ));
            
if ( this .strbuf.Substring(startPoint + 1 , 1 ).Equals( " / " ))  return  PairType.ender;
            
if (debug)System.Console.Out.WriteLine( " this.strbuf.Substring(endPoint-1,1)= " + this .strbuf.Substring(endPoint - 1 , 1 ));
            
if ( this .strbuf.Substring(endPoint - 1 , 1 ).Equals( " / " ))  return  PairType.startAndEnder;
            
return  PairType.starter;
        }
    }
    
class  PairType
    {
        
public    static   int   starter = 0 ; // 表示下一个标签是一个开始节点
         public    static   int   ender = 1 ; // 表示下一个标签是一个结束节点
         public    static   int   startAndEnder = 2 ; // 表示下一个标签是一个自关闭的节点
         public    static   int   nothing = 3 ; // 表示没有找到任何下一个节点
    }
}
调试代码:
        XMLDivider xd = new  XMLDivider();
        
private   void  button1_Click( object  sender, System.EventArgs e)
        {            
            xd.setString(
this .richTextBox1.Text);
            xd.parse();
            
if ( ! xd.isError()) // 如果没有出错的话
            {
                System.Collections.IList l
= xd.getResultList();
                
if (l.Count > 0 )
                {
                    
this .richTextBox2.AppendText( " 共找到 " + l.Count + " 个根节点: " );
                    
for ( int  i = 0 ;i < l.Count;i ++ )     this .richTextBox2.AppendText( " " + i + " 个: " + ( string )(l[i]) + " " );
                    
if ( ! xd.getState())
                    {
                        
this .richTextBox2.AppendText( " 没有处理完的字符串为: " );
                        
this .richTextBox2.AppendText(xd.getRestString() + " " );
                    }
                    l.Clear();
                }
            }
            
else
            {
                
this .richTextBox2.AppendText( " 处理错误: " + xd.getErrorMessage() + " " );
            }
        }

功能评价:没有修改前,可以正确解析,缓存有小失误。

可清晰看到结果:

共找到3个根节点:

第0个:<a><b></b></a>

第1个:<a/>

第2个:<t><t></t></t>

没有处理完的字符串为:

<hh>

修改之后:

共找到3个根节点:

第0个:<a><b></b></a>

第1个:<a/>

第2个:<t><t></t></t>

没有处理完的字符串为:

<hh>

共找到1个根节点:

第0个:<hh><gg></gg></hh>

 

代码评价:有良好的代码编写规范,异常纠错功能,良好的思维方式,良好的代码注释规范。

2.       此人是1年工作经验:

代码如下:

using  System;

namespace  IMServer
{
    
///   <summary>
    
///  类名称:Stack
    
///  类功能:栈
    
///   </summary>
     public   class  Stack
    {
        
///   <summary>
        
///  first 栈顶
        
///   </summary>
         private  Node first = null ;
        
///   <summary>
        
///  栈中节点数
        
///   </summary>
         private   int  count = 0 ;

        
///   <summary>
        
///  栈空属性,提供get访问器
        
///   </summary>
         public   bool  Empty
        {
            
get
            {
                
return  (first == null );
            }
        }

        
///   <summary>
        
///  计数属性,提供get访问器
        
///   </summary>
         public   int  Count
        {
            
get
            {
                
return  count;
            }
        }

        
///   <summary>
        
///  出栈
        
///   </summary>
        
///   <returns></returns>
         public   object  Pop()
        {
            
if (first == null )
                
throw   new  InvalidOperationException( " Can't pop from an empty stack " );
            
else
            {
                
object  temp = first.Value;
                first
= first.Next;
                count
-- ;
                
return  temp;
            }
        }

        
public   void  Push( object  o)
        {
            first
= new  Node(o,first);
            count
++ ;
        }

        
public  Stack()
        {
        }
    }

    
public   class  Node
    {
        
public  Node Next;
        
public   object  Value;

        
public  Node( object  value,Node next)
        {
            Next
= next;
            Value
= value;
        }

        
public  Node( object  value): this (value, null )
        {}
    }
}

 

using  System;

namespace  IMServer
{
    
///   <summary>
    
///  类名称:XMLPaster
    
///  类功能:解析XML
    
///   </summary>
     public   class  XMLParster
    {
        
///   <summary>
        
///  栈
        
///   </summary>
         private  Stack s;
        
///   <summary>
        
///  XML
        
///   </summary>
         private   string  XML;
        
///   <summary>
        
///  缓存xml
        
///   </summary>
         private   string  XMLBuffer = "" ;
        
///   <summary>
        
///  集合
        
///   </summary>
         private  System.Collections.ArrayList Ilist = new  System.Collections.ArrayList();

        
public  XMLParster()
        {
            s
= new  Stack();
        }

        
public  System.Collections.ArrayList ReturnXmlList( string  str)
        {
            
try
            {
                
if (XMLBuffer != "" )
                    XML
= XMLBuffer + str;
                
else
                    XML
= str;

                
// 第一个<>前面是否有/
                 if (XML.Substring( 0 ,XML.IndexOf( " > " ) + 1 ).IndexOf( " /> " ) !=- 1 )
                {
// 存在
                    Ilist.Add(XML.Substring( 0 ,XML.IndexOf( " > " ) + 1 ));
                    
if (XML.Substring( 0 ,XML.IndexOf( " > " ) + 1 ).Length == XML.Length)
                    {
                        XMLBuffer
= "" ;
                        
return  Ilist;
                    }
                    
string  aa = XML.Substring(XML.IndexOf( " > " ) + 1 ,XML.Length - XML.IndexOf( " > " ) - 1 );
                    ReturnXmlList(XML.Substring(XML.IndexOf(
" > " ) + 1 ,XML.Length - XML.IndexOf( " > " ) - 1 ));
                }
                
else
                {
// 不存在
                    
// 清空栈
                     for ( int  num1 = 0 ;num1 < s.Count;num1 ++ )
                        s.Pop();
                    
// 前缀<a
                     string  Prexi = "" ;
                    
#region  "修改"
                    
if (XML.Substring( 0 ,XML.IndexOf( " > " )).IndexOf( "   " ) ==- 1 )
                        Prexi
= XML.Substring( 0 ,XML.IndexOf( " > " )); // <a></a>
                     else
                        Prexi
= XML.Substring( 0 ,XML.IndexOf( "   " )); // <a ></a>
                     #endregion
                    s.Push(Prexi);
                    
// 以<>分开
                     for ( int  i = XML.IndexOf( " > " ) + 1 ;i < XML.Length;)
                    {
                        
string  ShengXiaXML = XML.Substring(i,XML.Length - i);
                        
// 是否存在</a>
                         if (ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).IndexOf(Prexi.Substring( 0 , 1 ) + " / " + Prexi.Substring( 1 ,Prexi.Length - 1 )) !=- 1 )
                        {
                            
// 出栈
                            s.Pop();
                            
if (s.Count == 0 )
                            {
                                Ilist.Add(XML.Substring(
0 ,i + ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).Length));
                                
if (i + ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).Length == XML.Length)
                                {
                                    XMLBuffer
= "" ;
                                    
return  Ilist;
                                }
                                
else
                                {
                                    ReturnXmlList(XML.Substring(i
+ ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).Length,XML.Length - i - ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).Length));
                                    
break ;
                                }                                
                            }
                            
else
                            {
// 缓存
                                XMLBuffer = XML.Substring(i,str.Length - i);
                            }
                        }
                        
else   if (ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).IndexOf(Prexi + " / " ) !=- 1 )
                        {
// 是否存在<a><a/></a>
                        }
                        
else   if (ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).IndexOf(Prexi) !=- 1 )
                            s.Push(Prexi);
// 是否存在<a><a></a></a>
                        i += ShengXiaXML.Substring( 0 ,ShengXiaXML.IndexOf( " > " ) + 1 ).Length; // 到下一个<>
                         if (i == XML.Length)
                            XMLBuffer
= XML.Substring( 0 ,i); // 缓存
                    }
                }
            }
            
catch (Exception ex)
            {
throw  ex;}
            
return  Ilist;
        }
    }
}
调试代码:
        IMServer.XMLParster xml = new  IMServer.XMLParster();
        
private   void  button6_Click( object  sender, System.EventArgs e)
        {            
            ArrayList list
= xml.ReturnXmlList(textBox1.Text);
            
foreach ( string  str  in  list)
            {
                MessageBox.Show(str);
            }
            list.Clear();
        }

功能评价:没有修改前,可以解析,但如果如果这种情况<a><b a=""></b></a><a/><t><t></t></t><hh>会出错。修改之后搞定。

代码评价:使用太多的字符串的截取和查找,可能速度很慢。不过采用栈的思维方式比较多特。没有好的异常纠错功能和代码注释规范。

 

最终考虑使用第二种,希望大家多多评价。
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值