thrift框架序列化及反序列化解析

最新推荐文章于 2023-10-05 09:34:40 发布

砖头他爸

最新推荐文章于 2023-10-05 09:34:40 发布

阅读量1.4k

点赞数

分类专栏：互联网相关文章标签： thrift

互联网相关专栏收录该内容

19 篇文章 0 订阅

订阅专栏

本文炒冷饭.说实话,一直挺看好Thrift,支持的语言又多,代码写的有很清晰,效率又不低,为啥研究Protocol Buffer的人那么多.不管那么多了....

Thrift中的对象序列化是我很看好的东西,他用compiler+类库,让你高效的完成任务,而且可以少犯错误.试想,有谁可以保证自己设计的对象,不会再改变呢?数据库的schema改了,你可以改改查询语句,但是如果你对象改了,之前序列化好的东西,有时候就很难搞回来了.(哎.....)

废话不说,看Thrift里面怎么搞的.

1. Thrift支持的数据类型

Thrift支持的数据类型定义在TProtocol.h这个头文件中,有一个TType的枚举:

而每一种Protocol都不一定全部支持这么多数据格式,T_LIST之前的都是被支持的.T_STRING是c string,可以和utf-8兼容.

2. Thrift对各种数据类型的读写

Thrift把对象序列化抽象成TProtocol这样一个抽象类,这个类的成员非常多,但是思路很明显,就是对各种数据类型的读写操作:

  
  
   
   class
   
    TProtocol {
 
   
   public
   
   :
  
   
   virtual
   
    
   
   ~
   
   TProtocol() {}
  uint32_t writeMessageBegin(
   
   const
   
    std::
   
   string
   
   &
   
    name,
                             
   
   const
   
    TMessageType messageType,
                             
   
   const
   
    int32_t seqid);
  uint32_t writeMessageEnd();
  uint32_t writeFieldBegin(
   
   const
   
    
   
   char
   
   *
   
    name,
                           
   
   const
   
    TType fieldType,
                           
   
   const
   
    int16_t fieldId) ;
  uint32_t writeFieldEnd();
  uint32_t writeFieldStop();
  
   
   //
   
   写各种类型的数据
   
   

   
     uint32_t writeBool(
   
   const
   
    
   
   bool
   
    value);
  uint32_t writeByte(
   
   const
   
    int8_t 
   
   byte
   
   );
  uint32_t writeI16(
   
   const
   
    int16_t i16);
  uint32_t writeStructBegin(
   
   const
   
    
   
   char
   
   *
   
    name);
  uint32_t writeStructEnd();  
  
   
   //
   
   此处省略若干行
   
   


   
     uint32_t readMessageBegin(std::
   
   string
   
   &
   
    name,
                            TMessageType
   
   &
   
    messageType,
                            int32_t
   
   &
   
    seqid);
  uint32_t readMessageEnd();
  uint32_t readFieldBegin(std::
   
   string
   
   &
   
    name,
                          TType
   
   &
   
    fieldType,
                          int16_t
   
   &
   
    fieldId);
  uint32_t readFieldEnd() ;
  
   
   //
   
   读各种类型的数据
   
   

   
     uint32_t readBool(
   
   bool
   
   &
   
    value);
  uint32_t readI16(int16_t
   
   &
   
    i16) ;
  uint32_t readStructBegin(std::
   
   string
   
   &
   
    name) ;
  uint32_t readStructEnd(); 
  
   
   //
   
   此处省略若干行
   
   


   
   };

每一种数据类型都一对read/write方法,另外Message,Field也有read/write方法.

网友可能很奇怪,为啥抽象类的方法不是虚的.......其实我这里代码省略了很多,之前0.5.0版本的thrift里面,这些方法都是虚的,对TProtocol的实现都重写了这些方法;0.6.0里面,直接的read/write方法都不是虚的,但是添加了额外的虚函数:

  
  
   
     
   
   //
   
   比如说对list的读
   
   

   
     
   
   virtual
   
    uint32_t readListBegin_virt(TType
   
   &
   
    elemType,
                                      uint32_t
   
   &
   
    size) 
   
   =
   
    
   
   0
   
   ;
  
   
   virtual
   
    uint32_t readListEnd_virt() 
   
   =
   
    
   
   0
   
   ;
  
   
   //
   
   另外有
   
   

   
     uint32_t readListBegin(TType
   
   &
   
    elemType, uint32_t
   
   &
   
    size) {
    T_VIRTUAL_CALL();
    
   
   return
   
    readListBegin_virt(elemType, size);
  }
  uint32_t readListEnd() {
    T_VIRTUAL_CALL();
    
   
   return
   
    readListEnd_virt();
  }

其实和直接使用虚函数是一样的.

OK,接口看完了,这就去看对接口的实现,我们来看TBinaryProtocolT是怎么实现的.这里多说几句,Thrift里面实现了好多种序列化,如果你觉得这种序列化不好,可以去重新实现一个上面说的那个接口,就可以工作了:-D

TBinaryProtocolT里面我们看一两个具有代表性的,int32_t/map的读写:

  
  
   
   template 
   
   <
   
   class
   
    Transport_
   
   >
   
   
uint32_t TBinaryProtocolT
   
   <
   
   Transport_
   
   >
   
   ::writeI32(
   
   const
   
    int32_t i32) {
  
   
   //
   
   把数字转化成网络字节序
   
   

   
     int32_t net 
   
   =
   
    (int32_t)htonl(i32);
  
   
   //
   
   然后写入到transport
   
   

   
     
   
   this
   
   ->
   
   trans_
   
   ->
   
   write((uint8_t
   
   *
   
   )
   
   &
   
   net, 
   
   4
   
   );
  
   
   //
   
   返回写入数据的大小
   
   

   
     
   
   return
   
    
   
   4
   
   ;
}
template 
   
   <
   
   class
   
    Transport_
   
   >
   
   
uint32_t TBinaryProtocolT
   
   <
   
   Transport_
   
   >
   
   ::readI32(int32_t
   
   &
   
    i32) {
  uint8_t b[
   
   4
   
   ];
  
   
   this
   
   ->
   
   trans_
   
   ->
   
   readAll(b, 
   
   4
   
   );
  i32 
   
   =
   
    
   
   *
   
   (int32_t
   
   *
   
   )b;
  
   
   //
   
   读取四个字节,转为本地字节序
   
   

   
     i32 
   
   =
   
    (int32_t)ntohl(i32);
  
   
   //
   
   返回读出数据的大小
   
   

   
     
   
   return
   
    
   
   4
   
   ;
}
template 
   
   <
   
   class
   
    Transport_
   
   >
   
   
uint32_t TBinaryProtocolT
   
   <
   
   Transport_
   
   >
   
   ::writeMapBegin(
   
   const
   
    TType keyType,
                                                     
   
   const
   
    TType valType,
                                                     
   
   const
   
    uint32_t size) {
  uint32_t wsize 
   
   =
   
    
   
   0
   
   ;
  
   
   //
   
   写入一个byte的key类型TType
   
   

   
     wsize 
   
   +=
   
    writeByte((int8_t)keyType);
  
   
   //
   
   写入一个byte的value类型TType
   
   

   
     wsize 
   
   +=
   
    writeByte((int8_t)valType);
  
   
   //
   
   再写入元素的个数,int32_t的
   
   

   
     wsize 
   
   +=
   
    writeI32((int32_t)size);
  
   
   return
   
    wsize;
}
template 
   
   <
   
   class
   
    Transport_
   
   >
   
   
uint32_t TBinaryProtocolT
   
   <
   
   Transport_
   
   >
   
   ::readMapBegin(TType
   
   &
   
    keyType,
                                                    TType
   
   &
   
    valType,
                                                    uint32_t
   
   &
   
    size) {
  int8_t k, v;
  uint32_t result 
   
   =
   
    
   
   0
   
   ;
  int32_t sizei;
  
   
   //
   
   读的时候也是类似,读取key的类型,value的类型,还有元素的个数
   
   

   
     result 
   
   +=
   
    readByte(k);
  keyType 
   
   =
   
    (TType)k;
  result 
   
   +=
   
    readByte(v);
  valType 
   
   =
   
    (TType)v;
  result 
   
   +=
   
    readI32(sizei);
  
   
   if
   
    (sizei 
   
   <
   
    
   
   0
   
   ) {
    
   
   throw
   
    TProtocolException(TProtocolException::NEGATIVE_SIZE);
  } 
   
   else
   
    
   
   if
   
    (
   
   this
   
   ->
   
   container_limit_ 
   
   &&
   
    sizei 
   
   >
   
    
   
   this
   
   ->
   
   container_limit_) {
    
   
   throw
   
    TProtocolException(TProtocolException::SIZE_LIMIT);
  }
  size 
   
   =
   
    (uint32_t)sizei;
  
   
   return
   
    result;
}

可以看到,代码内聚很强,也很易懂.float/double的序列化是通过强转成uint32_t/uint64_t来实现的,string么,先去写一个大小,然后才是内容.list和set都是类似的~~

对于field的read/write都是直接写该filed的类型信息,而field那么就忽略掉了,因为二进制序列化用不到那些东西,只有json这样的文本序列化才能用到:-D,有兴趣的可以去看看JSON Protocol的实现

3. 代码生成

如果让你手写对象的序列化,反序列化,你肯定要抱怨了,因为那样出错的机会非常大.Thrift和Protocol Buffer都给你提供了编译器,写好IDL之后,可以用编译器生成好代码~~这样可以保证不会出错.以UserProfile为例:

  
  
   
   struct
   
    UserProfile {
  
   
   1
   
   : i32 uid,
  
   
   2
   
   : 
   
   string
   
    name,
  
   
   3
   
   : 
   
   string
   
    blurb
}

生成代码: thrift-0.6.0.exe --gen cpp UserProfile.thrift

这样,thrift会在gen-cpp文件夹内生成好UserProfile的代码,我们只想看UserProfile类的read和write是怎么实现的:

  
  
   
   uint32_t UserProfile::write(::apache::thrift::protocol::TProtocol
   
   *
   
    oprot) 
   
   const
   
    {
  uint32_t xfer 
   
   =
   
    
   
   0
   
   ;
  
   
   //
   
   write的代码比较简单,就是按照顺序
  
   
   //
   
   把field的类型和fieldid和value写进去
   
   

   
     xfer 
   
   +=
   
    oprot
   
   ->
   
   writeStructBegin(
   
   "
   
   UserProfile
   
   "
   
   );
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeFieldBegin(
   
   "
   
   uid
   
   "
   
   , ::apache::thrift::protocol::T_I32, 
   
   1
   
   );
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeI32(
   
   this
   
   ->
   
   uid);
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeFieldEnd();
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeFieldBegin(
   
   "
   
   name
   
   "
   
   , ::apache::thrift::protocol::T_STRING, 
   
   2
   
   );
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeString(
   
   this
   
   ->
   
   name);
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeFieldEnd();
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeFieldBegin(
   
   "
   
   blurb
   
   "
   
   , ::apache::thrift::protocol::T_STRING, 
   
   3
   
   );
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeString(
   
   this
   
   ->
   
   blurb);
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeFieldEnd();
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeFieldStop();
  xfer 
   
   +=
   
    oprot
   
   ->
   
   writeStructEnd();
  
   
   return
   
    xfer;
}


uint32_t UserProfile::read(::apache::thrift::protocol::TProtocol
   
   *
   
    iprot) {

  uint32_t xfer 
   
   =
   
    
   
   0
   
   ;
  std::
   
   string
   
    fname;
  ::apache::thrift::protocol::TType ftype;
  int16_t fid;

  xfer 
   
   +=
   
    iprot
   
   ->
   
   readStructBegin(fname);

  
   
   using
   
    ::apache::thrift::protocol::TProtocolException;


  
   
   while
   
    (
   
   true
   
   )
  {
  
   
   //
   
   read的时候,每次都是先读出来field的类型和fieldid
   
   

   
       xfer 
   
   +=
   
    iprot
   
   ->
   
   readFieldBegin(fname, ftype, fid);
    
   
   if
   
    (ftype 
   
   ==
   
    ::apache::thrift::protocol::T_STOP) {
      
   
   break
   
   ;
    }
    
   
   switch
   
    (fid)
    {
      
   
   //
   
   然后查看当前反序列化的fieldid和读出来的field是不是同一个类型的
      
   
   //
   
   如果是就反序列化
      
   
   //
   
   不是就skip....
      
   
   //
   
   类型就是靠之前说的TType
   
   

   
         
   
   case
   
    
   
   1
   
   :
        
   
   if
   
    (ftype 
   
   ==
   
    ::apache::thrift::protocol::T_I32) {
          xfer 
   
   +=
   
    iprot
   
   ->
   
   readI32(
   
   this
   
   ->
   
   uid);
          
   
   this
   
   ->
   
   __isset.uid 
   
   =
   
    
   
   true
   
   ;
        } 
   
   else
   
    {
          xfer 
   
   +=
   
    iprot
   
   ->
   
   skip(ftype);
        }
        
   
   break
   
   ;
      
   
   case
   
    
   
   2
   
   :
        
   
   if
   
    (ftype 
   
   ==
   
    ::apache::thrift::protocol::T_STRING) {
          xfer 
   
   +=
   
    iprot
   
   ->
   
   readString(
   
   this
   
   ->
   
   name);
          
   
   this
   
   ->
   
   __isset.name 
   
   =
   
    
   
   true
   
   ;
        } 
   
   else
   
    {
          xfer 
   
   +=
   
    iprot
   
   ->
   
   skip(ftype);
        }
        
   
   break
   
   ;
      
   
   case
   
    
   
   3
   
   :
        
   
   if
   
    (ftype 
   
   ==
   
    ::apache::thrift::protocol::T_STRING) {
          xfer 
   
   +=
   
    iprot
   
   ->
   
   readString(
   
   this
   
   ->
   
   blurb);
          
   
   this
   
   ->
   
   __isset.blurb 
   
   =
   
    
   
   true
   
   ;
        } 
   
   else
   
    {
          xfer 
   
   +=
   
    iprot
   
   ->
   
   skip(ftype);
        }
        
   
   break
   
   ;
      
   
   default
   
   :
        xfer 
   
   +=
   
    iprot
   
   ->
   
   skip(ftype);
        
   
   break
   
   ;
    }
    xfer 
   
   +=
   
    iprot
   
   ->
   
   readFieldEnd();
  }

  xfer 
   
   +=
   
    iprot
   
   ->
   
   readStructEnd();

  
   
   return
   
    xfer;
}

使用这个类也是比较简单的,Thrift的transport对读写操作做了一定的抽象,你可以读写网络端口,文件,内存等,我们这里用内存:

  
  
   
   typedef unsigned 
   
   long
   
       uint32_t;
typedef unsigned 
   
   char
   
       uint8_t;
uint32_t bufferSize 
   
   =
   
    
   
   64
   
   *
   
   1024
   
   ;
uint8_t 
   
   *
   
   buffer 
   
   =
   
    (uint8_t
   
   *
   
   )malloc(bufferSize);
boost::shared_ptr
   
   <
   
   TMemoryBuffer
   
   >
   
    _write(
   
   new
   
    TMemoryBuffer(buffer,bufferSize,TMemoryBuffer::TAKE_OWNERSHIP));
TProtocol 
   
   *
   
   protowrite 
   
   =
   
    
   
   new
   
    TBinaryProtocol(_write);

UserProfile _userProfile;

   
   //
   
   在这里修改_userProfile的属性

   
   //
   
   ....

   
   //
   
   序列化
   
   

   
   uint32_t writeSize 
   
   =
   
    _userProfile.write(protowrite);

boost::shared_ptr
   
   <
   
   TMemoryBuffer
   
   >
   
    _read(
   
   new
   
    TMemoryBuffer(buffer,bufferSize));
TProtocol 
   
   *
   
   protoread 
   
   =
   
    
   
   new
   
    TBinaryProtocol(_read);
UserProfile _userProfile2;
_userProfile2.read(protoread);

asser(_userProfile 
   
   ==
   
    _userProfile2);

很ez吧

4. Thrift向后兼容性的实现

看生成好的read代码可以看到有一个skip的方法(当他类型不一样的时候),这个就是向后兼容性实现的关键.一个对象,难免被改来改去,改不要紧,field id要变化,不要一个field id用到死..当然你继续用也没问题,比如你之前field id 1 type int32_t,之后还是这样,读是读出来了,有可能逻辑不对了.....来看看skip的实现.

  
  
   
   //
   
   策略就是,读到什么抛弃什么
   
   

   
   template 
   
   <
   
   class
   
    Protocol_
   
   >
   
   
uint32_t skip(Protocol_
   
   &
   
    prot, TType type) 
{
  
   
   switch
   
    (type) {
  
   
   case
   
    T_BOOL:
    {
      
   
   bool
   
    boolv;
      
   
   return
   
    prot.readBool(boolv);
    }
  
   
   case
   
    T_BYTE:
    {
      int8_t bytev;
      
   
   return
   
    prot.readByte(bytev);
    }
   
   
   //
   
   此处省略若干行
   
   
   //
   
   因为代码都是类似的

    
   
   //
   
   对结构体,map/list/set之类的skip操作是比较复杂的
   
   

   
       
   
   case
   
    T_STRUCT:
    {
      uint32_t result 
   
   =
   
    
   
   0
   
   ;
      std::
   
   string
   
    name;
      int16_t fid;
      TType ftype;
      result 
   
   +=
   
    prot.readStructBegin(name);
      
   
   while
   
    (
   
   true
   
   ) {
        result 
   
   +=
   
    prot.readFieldBegin(name, ftype, fid);
        
   
   if
   
    (ftype 
   
   ==
   
    T_STOP) {
          
   
   break
   
   ;
        }
        result 
   
   +=
   
    skip(prot, ftype);
        result 
   
   +=
   
    prot.readFieldEnd();
      }
      result 
   
   +=
   
    prot.readStructEnd();
      
   
   return
   
    result;
    }
}

OK,至此,Thrift对象序列化的代码基本上就看的差不多了.因为我们不会去用Thrift的Service,所以那部分代码没看过....

Thrift千好万好,但是如果你数据写好了,schema丢失了.....那就不好玩了

PS:很早之前看过Thrift,但是没写篇文章总结一下.这篇也算是了了心愿.

砖头他爸

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
thrift框架序列化及反序列化解析

本文炒冷饭.说实话,一直挺看好Thrift,支持的语言又多,代码写的有很清晰,效率又不低,为啥研究Protocol Buffer的人那么多.不管那么多了....Thrift中的对象序列化是我很看好的东西,他用compiler+类库,让你高效的完成任务,而且可以少犯错误.试想,有谁可以保证自己设计的对象,不会再改变呢?数据库的schema改了,你可以改改查询语句,但是如果你对象改了,之前序列
复制链接

扫一扫