蛙蛙推荐:蛙蛙牌XML压缩算法

 

 

摘要:

在用XML传输数据的时候,XML本身的元素名称,属性名称可能比有效的信息量占的地方还要大,本文示例一种简单实用的算法来进行XML压缩,主要思路是把XML标签和属性用整数来表示以便降低传输量。

单元测试代码

 

 

class  Program {
   
public   static   string  XML  =   @" <?xml version=""1.0"" encoding=""utf-16""?>
    <Customer>
<CustomerID>ALFKI</CustomerID>
<PO>9572658</PO>
<Address AddressType=""work"">
    <Street>One Main Street</Street>
    <City>Anywhere</City>
    <State>NJ</State>
    <Zip>08080</Zip>
</Address>
<Order>
    <OrderID>10966</OrderID >
    <LineItem>
        <ProductID>37</ProductID>
        <UnitPrice>26.50 </UnitPrice>
        <Quantity>8</Quantity>
        <Description>Gravad lax </Description>             
    </LineItem>
    <LineItem>
        <ProductID>56 </ProductID>
        <UnitPrice>38.00</UnitPrice>
        <Quantity>12</Quantity>
        <Description>Gnocchi di nonna Alice</Description>             
    </LineItem>
</Order>    
</Customer>
" ;
static   void  Main( string [] args) {
    XmlZip zip 
=   new  XmlZip();

    
byte [] bs  =  Encoding.UTF8.GetBytes(XML);
    Console.WriteLine(
" 原始文件长度:{0} " , bs.Length);
    MemoryStream ms 
=   new  MemoryStream();
    DeflateStream compressedzipStream 
=   new  DeflateStream(ms, CompressionMode.Compress,  true );
    compressedzipStream.Write(bs, 
0 , bs.Length);
    compressedzipStream.Close();
    Console.WriteLine(
" Deflate压缩后长度: {0} " , ms.Length);

    zip.Init(XML);
    bs 
=  zip.XmlToBytes(XML);
    Console.WriteLine(
" XML压缩后长度:{0} " , bs.Length);
    
string  str  =  zip.BytesToXml(bs);
    Console.WriteLine(
" 还原后长度:{0} " , Encoding.UTF8.GetByteCount(str));
    Console.WriteLine(str);


    ms 
=   new  MemoryStream();
    compressedzipStream 
=   new  DeflateStream(ms, CompressionMode.Compress,  true );
    compressedzipStream.Write(bs, 
0 , bs.Length);
    compressedzipStream.Close();
    Console.WriteLine(
" 先XML压缩,再Deflate压缩后的长度:{0} " , ms.Length);
    Console.ReadKey();

}
}

 

 

测试输出

原始文件长度:740
Deflate压缩后长度: 438
XML压缩后长度:295
还原后长度:727
<?xml version="1.0" encoding="utf-16"?>
<Customer>
  <CustomerID>ALFKI</CustomerID>
  <PO>9572658</PO>
  <Address AddressType="work">
    <Street>One Main Street</Street>
    <City>Anywhere</City>
    <State>NJ</State>
    <Zip>08080</Zip>
  </Address>
  <Order>
    <OrderID>10966</OrderID>
    <LineItem>
      <ProductID>37</ProductID>
      <UnitPrice>26.50 </UnitPrice>
      <Quantity>8</Quantity>
      <Description>Gravad lax </Description>             
    </LineItem>
    <LineItem>
      <ProductID>56 </ProductID>
      <UnitPrice>38.00</UnitPrice>
      <Quantity>12</Quantity>
      <Description>Gnocchi di nonna Alice</Description>             
    </LineItem>
  </Order>
</Customer>
先XML压缩,再Deflate压缩后的长度:357

 

可以看到,压缩后的数据约是原来数据的3分之一,可能没有其它专有的压缩算法的压缩率高,但效果还算是满意吧,而且我的算法是比较通用的,只要通信双方知道了XMLSchema,甚至双方只需要有一段完整的示例代码,就可以进行压缩通信,只做了功能测试,没做性能测试,大家可以先借鉴下思路。

 

完整代码

大致原理,就是通信双方各持有一个XML文档节点名称,属性名称的一个字典,然后发送方传输的时候用ushort代替原有的XML标签和属性名,接收方通过字典把ushort再转换成原始的元素名和属性名,这样大量不必要的重复的标签等就省去了。

代码只做本文的示例,写的比较随意,没有什么防御性和健壮性。

 

internal   enum  ItemType {
    Element,
    Attritube
}
internal   class  XmlNodeItem {
    
public   string  Xpath {  get set ; }
    
public   string  Text {  get set ; }
    
public  ItemType ItemType {  get set ; }
    
public   override   string  ToString() {
        
return  Xpath;
    }
}
internal   class  MyXpath {
    LinkedList
< string >  _node  =   new  LinkedList < string > ();
    
public   void  AddElement( string  name) {
        _node.AddLast(
string .Format( " /{0} " , name));
    }
    
public   void  AddAttribute( string  name) {
        _node.AddLast(
string .Format( " /@{0} " , name));
    }
    
public   void  RemoveLastElement() {
        _node.RemoveLast();
    }
    
public   override   string  ToString() {
        StringBuilder sb 
=   new  StringBuilder();
        LinkedListNode
< string >  node  =  _node.First;
        sb.Append(node.Value);
        
while  ((node  =  node.Next)  !=   null ) {
            sb.Append(node.Value);
        }
        
return  sb.ToString();
    }
}
class  XmlZip {
    Dictionary
< ushort , XmlNodeItem >  _map  =   new  Dictionary < ushort , XmlNodeItem > ();
    Dictionary
< string ushort >  _map2  =   new  Dictionary < string ushort > ();
    MyXpath _path 
=   new  MyXpath();

    
public   void  Init( string  xmlInput) {
        StringReader sr 
=   new  StringReader(xmlInput);
        XmlReader reader 
=  XmlReader.Create(sr);
        MemoryStream ms 
=   new  MemoryStream();
        
ushort  i  =   1 ;
        
while  (reader.Read()) {
            
switch  (reader.NodeType) {
                
case  XmlNodeType.Element:
                    _path.AddElement(reader.Name);
                    _map[i
++ =   new  XmlNodeItem() {
                        Xpath 
=  _path.ToString(),
                        Text 
=  reader.Name,
                        ItemType 
=  ItemType.Element
                    };
                    
if  (reader.HasAttributes) {
                        reader.MoveToFirstAttribute();
                        _path.AddAttribute(reader.Name);
                        _map[i
++ =   new  XmlNodeItem() {
                            Xpath 
=  _path.ToString(),
                            Text 
=  reader.Name,
                            ItemType 
=  ItemType.Attritube
                        };
                        _path.RemoveLastElement();
                        
while  (reader.MoveToNextAttribute()) {
                            _path.AddAttribute(reader.Name);
                            _map[i
++ =   new  XmlNodeItem() {
                                Xpath 
=  _path.ToString(),
                                Text 
=  reader.Name,
                                ItemType 
=  ItemType.Attritube
                            };
                            _path.RemoveLastElement();
                        }
                        reader.MoveToElement();
                    }
                    
if  (reader.IsEmptyElement) _path.RemoveLastElement();
                    
break ;
                
case  XmlNodeType.EndElement:
                    _path.RemoveLastElement();
                    
break ;
                
default :
                    
break ;
            }
        }
        
foreach  (KeyValuePair < ushort , XmlNodeItem >  pair  in  _map) {
            _map2[pair.Value.Xpath] 
=  pair.Key;
        }
    }

    
public   byte [] XmlToBytes( string  xmlInput) {
        StringReader sr 
=   new  StringReader(xmlInput);
        XmlReader reader 
=  XmlReader.Create(sr);
        MemoryStream ms 
=   new  MemoryStream();
        BinaryWriter bw 
=   new  BinaryWriter(ms);
        
while  (reader.Read()) {
            
ushort  index;
            
byte [] bs;
            
switch  (reader.NodeType) {
                
case  XmlNodeType.Element:
                    _path.AddElement(reader.Name);
                    
if  (_map2.TryGetValue(_path.ToString(),  out  index)) {
                        bw.Write(index);
                    }
                    
if  (reader.HasAttributes) {
                        reader.MoveToFirstAttribute();
                        _path.AddAttribute(reader.Name);
                        
if  (_map2.TryGetValue(_path.ToString(),  out  index)) {
                            _path.RemoveLastElement();
                            bw.Write(index);
                            bs 
=  Encoding.UTF8.GetBytes(reader.Value);
                            bw.Write((
ushort )bs.Length);
                            bw.Write(bs);
                        }
                        
while  (reader.MoveToNextAttribute()) {
                            _path.AddAttribute(reader.Name);
                            
if  (_map2.TryGetValue(_path.ToString(),  out  index)) {
                                _path.RemoveLastElement();
                                bw.Write(index);
                                bs 
=  Encoding.UTF8.GetBytes(reader.Value);
                                bw.Write((
ushort )bs.Length);
                                bw.Write(bs);
                            }
                        }
                        reader.MoveToElement();
                    }
                    
if  (reader.IsEmptyElement) {
                        _path.RemoveLastElement();
                        bw.Write(
ushort .MaxValue);
                    }
                    
break ;
                
case  XmlNodeType.EndElement:
                    _path.RemoveLastElement();
                    bw.Write(
ushort .MaxValue);
                    
break ;
                
case  XmlNodeType.Text:
                    bw.Write((
ushort ) 0 );
                    bs 
=  Encoding.UTF8.GetBytes(reader.Value);
                    bw.Write((
ushort )bs.Length);
                    bw.Write(bs);
                    
break ;
                
default :
                    
break ;
            }
        }
        bw.Close();
        ms.Close();
        reader.Close();
        
return  ms.ToArray();
    }

    
public   string  BytesToXml( byte [] bytes) {
        MemoryStream ms 
=   new  MemoryStream(bytes);
        BinaryReader br 
=   new  BinaryReader(ms);
        StringBuilder sb 
=   new  StringBuilder();
        StringWriter sw 
=   new  StringWriter(sb);
        XmlWriterSettings settings 
=   new  XmlWriterSettings();
        settings.Indent 
=   true ;
        XmlWriter writer 
=  XmlWriter.Create(sw, settings);

        XmlNodeItem item;
        
while  (br.PeekChar()  !=   - 1 ) {
            
ushort  readFlag  =  br.ReadUInt16();
            
int  len;
            
byte [] bs;
            
string  str;
            
if  (_map.TryGetValue(readFlag,  out  item)) {
                
if  (item.ItemType  ==  ItemType.Element)
                    writer.WriteStartElement(item.Text);
                
else   if  (item.ItemType  ==  ItemType.Attritube) {
                    len 
=  br.ReadUInt16();
                    bs 
=  br.ReadBytes(len);
                    str 
=  Encoding.UTF8.GetString(bs);
                    writer.WriteAttributeString(item.Text, str);
                }
            }
            
else   if  (readFlag  ==   0 ) {
                len 
=  br.ReadUInt16();
                bs 
=  br.ReadBytes(len);
                str 
=  Encoding.UTF8.GetString(bs);
                writer.WriteString(str);
            }
            
else   if  (readFlag  ==   ushort .MaxValue) {
                writer.WriteEndElement();
            }
        }
        writer.Flush();
        writer.Close();
        sw.Close();
        br.Close();
        
return  sb.ToString();
    }
}

 

 

参考链接

XML压缩和传输性能的改善

http://blog.csdn.net/BruceWayen/archive/2006/03/13/623483.aspx

XQzipXML压缩技术(1--介绍

http://qiyanfeng.blog.51cto.com/503144/105203

XQzip:可查询MXL压缩算法分析(1)

http://qiyanfeng.blog.51cto.com/503144/105578

WAP Binary XML Content Format

http://www.w3.org/TR/wbxml/

 

 

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值