bencoding编码解析器

BT客户端开始一个下载首先要处理的就是torrent文件.
而torrent文件使用bencoding编码.
所以实现bencoding编码的解析器,就是第一步工作.

Bencoding is done as follows:
Strings are length-prefixed base ten followed by a colon and the string. For example /'4:spam/' corresponds to /'spam/'.
Integers are represented by an /'i/' followed by the number in base 10 followed by an /'e/'. For example /'i3e/' corresponds to 3 and /'i-3e/' corresponds to -3. Integers have no size limitation. /'i-0e/' is invalid. All encodings with a leading zero, such as /'i03e/', are invalid, other than /'i0e/', which of course corresponds to 0.
Lists are encoded as an /'l/' followed by their elements (also bencoded) followed by an /'e/'. For example /'l4:spam4:eggse/' corresponds to [/'spam/', /'eggs/'].
Dictionaries are encoded as a /'d/' followed by a list of alternating keys and their corresponding values followed by an /'e/'. For example, /'d3:cow3:moo4:spam4:eggse/' corresponds to {/'cow/': /'moo/', /'spam/': /'eggs/'} and /'d4:spaml1:a1:bee/' corresponds to {/'spam/': [/'a/', /'b/']} . Keys must be strings and appear in sorted order (sorted as raw strings, not alphanumerics).

下面是实现的bencoding解码器的VC++源代码:

// BEncode.h: interface for the CBEncode class.
//
//

#if !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)
#define AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#pragma warning( disable : 4786 )
#pragma warning( disable : 4355 )
#include <list>
#include <map>
#include <string>
#include <vector>
using namespace std;
enum BEncodeParserErrorCode
{
enm_BEncodeErr_noerr = 0,//没有错误
enm_BEncodeErr_errString,//错误的字符串
enm_BEncodeErr_errInt,//错误的整型数据
enm_BEncodeErr_errList,//错误的列表
enm_BEncodeErr_errDict,//错误的词典结构
enm_BEncodeErr_End,//文本结束
enm_BEncodeErr_unknown//未知错误
};
enum BEncodeObjectType
{
enum_BEncodeType_Objectbase = 0,
enum_BEncodeType_String,
enum_BEncodeType_Int,
enum_BEncodeType_List,
enum_BEncodeType_Dict,
};
class CBEncodeObjectBase
{
public:
CBEncodeObjectBase(BEncodeObjectType type = enum_BEncodeType_Objectbase){m_type = type;clear();}
virtual ~CBEncodeObjectBase(){};
void clear(){szPos = NULL;m_error = enm_BEncodeErr_noerr;}
public:
BEncodeObjectType m_type; //对象类型
char * szPos; //对象在字符串中的位置
int ilen;//对象的数据长度
BEncodeParserErrorCode m_error;//错误值
};
class CBEncodeInt : public CBEncodeObjectBase
{
public:
CBEncodeInt() : CBEncodeObjectBase(enum_BEncodeType_Int) {}
virtual ~CBEncodeInt(){}
public:
int m_iValue;//整型对象的值
};
class CBEncodeString : public CBEncodeObjectBase
{
public:
CBEncodeString() : CBEncodeObjectBase(enum_BEncodeType_String) {m_szData = NULL;}
virtual ~CBEncodeString(){}
public:
bool getstring(string & strValue)
{
if(m_error == enm_BEncodeErr_noerr && m_szData)
{
strValue.assign(m_szData,m_ilen);
return true;
}
return false;
}
char * m_szData;
int m_ilen;
};
class CBEncodeList : public CBEncodeObjectBase
{
public:
CBEncodeList() : CBEncodeObjectBase(enum_BEncodeType_List) {}
virtual ~CBEncodeList(){clear();}
void clear()
{
list<CBEncodeObjectBase *>::iterator it;
for(it = m_listObj.begin();it!=m_listObj.end();++it)
delete (*it);
m_listObj.clear();
}
public:
list<CBEncodeObjectBase*> m_listObj;
};
class CBEncodeDict : public CBEncodeObjectBase
{
public:
CBEncodeDict() : CBEncodeObjectBase(enum_BEncodeType_Dict) {}
virtual ~CBEncodeDict(){clear();}
CBEncodeObjectBase* getvalue(const char * szName)
{
map<string,CBEncodeObjectBase*>::iterator it = m_mapObj.find(szName);
if(it != m_mapObj.end())
return it->second;
return NULL;
}
void clear()
{
list<CBEncodeObjectBase *>::iterator it;
for(it = m_listObj.begin();it!=m_listObj.end();++it)
delete (*it);
m_listObj.clear();
m_mapObj.clear();
}
public:
map<string,CBEncodeObjectBase*> m_mapObj;//
list<CBEncodeObjectBase*> m_listObj;//真正的对象保存在list中,list是一个name对象一个value对象.map只是一个映射表,引用了指针而已
};
class CBEncode
{
public:
bool readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);
bool readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);
bool readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);
bool readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj);
bool parse(const char * szData);
CBEncode();
virtual ~CBEncode();
void clear()
{
list<CBEncodeObjectBase *>::iterator it;
for(it = m_listObj.begin();it!=m_listObj.end();++it)
delete (*it);
m_listObj.clear();
}
public:
list<CBEncodeObjectBase*> m_listObj;
CBEncodeObjectBase* m_plastObj;//解析出来的最后一个对象
char * m_szTxt;
};

#endif // !defined(AFX_BENCODE_H__4D0BB462_2AE0_45B3_8BE8_19D51B2DBB2E__INCLUDED_)
// BEncode.cpp: implementation of the CBEncode class.
//
//

#include "stdafx.h"
#include "BEncode.h"

//
// Construction/Destruction
//

CBEncode::CBEncode()
{
m_plastObj = NULL;
m_szTxt = NULL;
}

CBEncode::~CBEncode()
{
clear();
}

bool CBEncode::parse(const char *szData)
{
if(szData == NULL||*szData==NULL)
return false;
clear();
m_szTxt = (char*)szData;
char * szCurPos = (char*)szData;
int iendpos;
while(*szCurPos)
{
if(*szCurPos== 'i')
{
if(!readint(szCurPos,iendpos,m_listObj))
break;//遇到任何错误都终止整个解析
szCurPos+=iendpos;
}
else if(*szCurPos== 'l')
{
if(!readlist(szCurPos,iendpos,m_listObj))
break;
szCurPos+=iendpos;
}
else if(*szCurPos== 'd')
{
if(!readdict(szCurPos,iendpos,m_listObj))
break;
szCurPos+=iendpos;
}
else
{
if(!readstring(szCurPos,iendpos,m_listObj))
break;
szCurPos+=iendpos;
}
}
if(*szCurPos==0&&m_plastObj->m_error == enm_BEncodeErr_noerr)
return true;
return false;
}
//从当前位置读取一个字符串
bool CBEncode::readstring(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)
{
char * szTmp = szCurPos;
CBEncodeString * pNewString = new CBEncodeString;
pNewString->szPos = szCurPos;

char szLen[20];
int i = 0;
while(*szTmp>='0'&&*szTmp<='9')
szLen[i++]=*(szTmp++);
szLen[i]=0;
if(*szTmp==':')
{
int ilen = atoi(szLen);
if(ilen>0)
{
pNewString->m_szData = ++szTmp;
pNewString->m_ilen = ilen;
szTmp+=ilen;
}
else
pNewString->m_error = enm_BEncodeErr_errString;
}
else
pNewString->m_error = enm_BEncodeErr_errString;
listObj.push_back(pNewString);
iendpos = szTmp-szCurPos;
m_plastObj = pNewString;
m_plastObj->ilen = iendpos;
return pNewString->m_error == enm_BEncodeErr_noerr?true:false;
}
//读取一个整型数据
bool CBEncode::readint(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)
{
char * szTmp = szCurPos;
CBEncodeInt * pNewInt= new CBEncodeInt;
pNewInt->szPos = szCurPos;

if(*szTmp == 'i')
{
szTmp++;
char szLen[20];
int i = 0;
while(*szTmp>='0'&&*szTmp<='9')
szLen[i++]=*(szTmp++);
szLen[i]=0;
if(*szTmp=='e')
{
pNewInt->m_iValue = atoi(szLen);
++szTmp;
}
else
pNewInt->m_error = enm_BEncodeErr_errInt;
}
else
pNewInt->m_error = enm_BEncodeErr_errInt;
listObj.push_back(pNewInt);
iendpos = szTmp-szCurPos;
m_plastObj = pNewInt;
m_plastObj->ilen = iendpos;
return pNewInt->m_error == enm_BEncodeErr_noerr?true:false;
}
//读取一个列表
bool CBEncode::readlist(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)
{
char * szTmp = szCurPos;
CBEncodeList * pNewList= new CBEncodeList;
pNewList->szPos = szCurPos;
if(*szTmp == 'l')
{
szTmp++;
int ilistendpos;
while(*szTmp!='e')
{
if(*szTmp== 'i')
{
if(!readint(szTmp,ilistendpos,pNewList->m_listObj))
break;//遇到任何错误都终止整个解析
szTmp+=ilistendpos;
}
else if(*szTmp== 'l')
{
if(!readlist(szTmp,ilistendpos,pNewList->m_listObj))
break;
szTmp+=ilistendpos;
}
else if(*szTmp== 'd')
{
if(!readdict(szTmp,ilistendpos,pNewList->m_listObj))
break;
szTmp+=ilistendpos;
}
else
{
if(!readstring(szTmp,ilistendpos,pNewList->m_listObj))
break;
szTmp+=ilistendpos;
}
}
if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr)
pNewList->m_error = enm_BEncodeErr_errList;
else
szTmp++;
}
else
pNewList->m_error = enm_BEncodeErr_errList;
listObj.push_back(pNewList);
iendpos = szTmp-szCurPos;
m_plastObj = pNewList;
m_plastObj->ilen = iendpos;
return pNewList->m_error == enm_BEncodeErr_noerr?true:false;
}
//读取一个字典
bool CBEncode::readdict(char *szCurPos,int & iendpos,list<CBEncodeObjectBase*> & listObj)
{
char * szTmp = szCurPos;
CBEncodeDict * pNewDict= new CBEncodeDict;
pNewDict->szPos = szCurPos;
if(*szTmp == 'd')
{
szTmp++;
int ilistendpos;
string strname;
while(*szTmp!='e')
{
if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj))
break;
if(m_plastObj->m_type !=enum_BEncodeType_String)
break;
strname.assign(((CBEncodeString *)m_plastObj)->m_szData,((CBEncodeString *)m_plastObj)->m_ilen);
szTmp+=ilistendpos;
if(*szTmp== 'i')
{
if(!readint(szTmp,ilistendpos,pNewDict->m_listObj))
break;//遇到任何错误都终止整个解析
szTmp+=ilistendpos;
}
else if(*szTmp== 'l')
{
if(!readlist(szTmp,ilistendpos,pNewDict->m_listObj))
break;
szTmp+=ilistendpos;
}
else if(*szTmp== 'd')
{
if(!readdict(szTmp,ilistendpos,pNewDict->m_listObj))
break;
szTmp+=ilistendpos;
}
else
{
if(!readstring(szTmp,ilistendpos,pNewDict->m_listObj))
break;
szTmp+=ilistendpos;
}
pNewDict->m_mapObj.insert(pair<string,CBEncodeObjectBase*>(strname,m_plastObj));
}
if(*szTmp!='e'||m_plastObj->m_error != enm_BEncodeErr_noerr)
pNewDict->m_error = enm_BEncodeErr_errDict;
else
szTmp++;
}
else
pNewDict->m_error = enm_BEncodeErr_errDict;
listObj.push_back(pNewDict);
iendpos = szTmp-szCurPos;
m_plastObj = pNewDict;
m_plastObj->ilen = iendpos;
return pNewDict->m_error == enm_BEncodeErr_noerr?true:false;
}
 
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值