突然就想要解析了。。代码写的好乱。
想起来以前GIF图片使用的LZW的编码我还没写完解析呢。。近期大概也会写的吧。。
解析了一个7k的,在我这已经算小的种子文件了。
└─dictionary
├─string:announce
├─string:http://tracker.ktxp.com:6868/announce
├─string:announce-list
├─list
├─string:created by
├─string:BitComet/1.25
├─string:creation date
├─string:1301449953
├─string:encoding
├─string:UTF-8
├─string:info
├─dictionary
├─string:nodes
└─list
第一个list里的内容是这样的:
<list>
<list>
string:http://tracker.ktxp.com:7070/announce
</list>
<list>
string:http://tracker.ktxp.com:6868/announce
</list>
</list>
大概明白是怎么一回事了。
虽然好像这东西跟json很像?没注意过哪里有这个跟json的比较.
不过这东西有点太傻了。。它唯一的好处就是不像json那样,解析起来传统上需要分2部,词法分析和语法分析。
bencode的解码基本上完全可以把2步合起来。。
总之,种子文件的根节点是一个字典。
字典里面的第一个映射是由announce映射到一个主tracker
然后是由announce-list映射到一个列表,列表的内容是一个由tracker服务器组成的表。
字典里面剩下的内容是created by信息,creation date信息,encoding信息,info信息。
前2个的内容都是字符串,info信息是一个字典。
info字典的第一个内容就是files。
files的内容是一个列表,列表中每一个元素都是字典。
这些字典中的每一个当然都是对文件的描述。
有以下这些描述:
ed2k,filehash,length,path,path.utf-8
由于根字典下有:
string:encoding
string:UTF-8
所以这里的path.utf-8和path的值是一样的。
刚刚说到info字典的一个对象,files列表
info字典还有以下对象:
name,name.utf-8,piece length,pieces
说起这个pieces对象我就觉得囧。
所以说,这B编码的字符串根本不是字符串嘛。其实是字节流吧。
像filehash这样的东西都用字符串表示,不觉得很奇怪吗?理论上来说filehash应该是一个整数。
当然,更不济的情况下应该是一个由整数组成的列表。
算了,以后我懂了,不能把B编码中的字符串类型看成是C字符串,把它看做char型数组还差不多。
而pieces。。哎。。
说完info字典,根字典下还有最后一个对象是nodes列表。。这个列表的内容都是列表,里面是由一个其实标识着数字的字符串和一个整数0组成的。
你看,又把字符串和整数混着用了。。
刚刚看到一个好贴:
http://hi.baidu.com/52taoist/blog/item/7ada6a5ea0a6934ffaf2c0fa.html
常见内容:
|
另外参考的是:http://www.cnblogs.com/hnrainll/archive/2011/07/21/2112793.html
感谢这位博主。
代码:
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string>
#ifndef MAX_PATH
#define MAX_PATH 260
#endif
namespace BCode
{
class CNode
{
public:
CNode *parent;
CNode *firstChild;
CNode *nextSibling;
public:
CNode(FILE *fp,char type);
~CNode();
void Search();
CNode *CreateChild(char type);
void SetValue(char *);
char Guess();
static char Guess(FILE *fp);
void WriteFile(char *file_name);
private:
FILE *m_handle;
char type;
char *value;
private:
void ReleaseSubTree();
void Write(FILE *fp,int nLevel);
};
}
namespace BCode
{
int isdigit(char a)
{
return a >= '0' && a <= '9' ? 1 : 0;
}
int get_string_length(FILE *fp)
{
long pos = ftell(fp);
char bf[32];
int bf_len = 0;
char ch;
while(isdigit(ch = fgetc(fp)) )
{
assert(bf_len < 32);
bf[bf_len ++ ] = ch;
}
bf[bf_len] = 0;
fseek(fp,pos,SEEK_SET);
return atoi(bf);
}
int get_string(FILE *fp,char *buffer,int cch_buf)
{
char bf[32];
int bf_len = 0;
char ch;
while(isdigit(ch = fgetc(fp)))
{
bf[bf_len ++ ] = ch;
}
assert(bf_len > 0);
assert(ch == ':' || (printf("ch=%d",ch),0));
bf[bf_len] = 0;
int str_len = atoi(bf);
assert(str_len > 0 && str_len < cch_buf);
int i;
for(i = 0; i < str_len && i < cch_buf - 1; ++i)
{
buffer[i] = fgetc(fp);
}
buffer[i] = 0;
return i;
}
int get_integer(FILE *fp)
{
int ret;
assert(1 == fscanf(fp,"%de",&ret));
return ret;
}
void get_encoded_string(char *buffer,int cchBuffer)
{
char *out_buffer = new char[cchBuffer];
int buffer_ptr = 0 , out_buffer_ptr = 0;
memset(out_buffer,0,sizeof out_buffer);
while(buffer[buffer_ptr])
{
if(buffer[buffer_ptr] != '<' && buffer[buffer_ptr] != '>'
&& buffer[buffer_ptr] != '&')
{
out_buffer[out_buffer_ptr] = buffer[buffer_ptr];
out_buffer_ptr += 1;
}
else
{
if(buffer[buffer_ptr] == '<')
{
out_buffer[out_buffer_ptr++] = '&';
out_buffer[out_buffer_ptr++] = 'l';
out_buffer[out_buffer_ptr++] = 't';
out_buffer[out_buffer_ptr++] = ';';
}
else if(buffer[buffer_ptr] == '>')
{
out_buffer[out_buffer_ptr++] = '&';
out_buffer[out_buffer_ptr++] = 'g';
out_buffer[out_buffer_ptr++] = 't';
out_buffer[out_buffer_ptr++] = ';';
}
else if(buffer[buffer_ptr] == '&')
{
out_buffer[out_buffer_ptr++] = '&';
out_buffer[out_buffer_ptr++] = 'a';
out_buffer[out_buffer_ptr++] = 'm';
out_buffer[out_buffer_ptr++] = 'p';
out_buffer[out_buffer_ptr++] = ';';
}
else if(buffer[buffer_ptr] == ' ')
{
out_buffer[out_buffer_ptr++] = '&';
out_buffer[out_buffer_ptr++] = 'n';
out_buffer[out_buffer_ptr++] = 'b';
out_buffer[out_buffer_ptr++] = 's';
out_buffer[out_buffer_ptr++] = 'p';
out_buffer[out_buffer_ptr++] = ';';
}
else if(buffer[buffer_ptr] == '\n')
{
out_buffer[out_buffer_ptr++] = '&';
out_buffer[out_buffer_ptr++] = '#';
out_buffer[out_buffer_ptr++] = 'x';
out_buffer[out_buffer_ptr++] = '0';
out_buffer[out_buffer_ptr++] = 'A';
out_buffer[out_buffer_ptr++] = ';';
}
}
buffer_ptr += 1;
}
out_buffer[out_buffer_ptr] = 0;
strcpy(buffer,out_buffer);
delete out_buffer;
}
CNode::CNode(FILE *fp, char type)
:value(NULL),parent(NULL),firstChild(NULL),nextSibling(NULL)
{
this ->m_handle = fp;
this ->type = type;
}
CNode::~CNode()
{
delete this ->value;
ReleaseSubTree();
}
void CNode::SetValue(char *s)
{
delete this ->value;
this ->value = new char[strlen(s) + 1];
strcpy(this ->value,s);
}
char CNode::Guess()
{
return Guess(m_handle);
}
char CNode::Guess(FILE *fp)
{
char ch = fgetc(fp);
if(isdigit(ch))
{
fseek(fp,-1,SEEK_CUR);
return 's';
}
else if(ch == 'i' || ch == 'l' || ch == 'd')
{
return ch;
}
return 0;
}
void CNode::Search()
{
char ch;
if(this ->type == 's')
{
int len = get_string_length(m_handle) + 1;
char *buffer = new char[len];
get_string(m_handle,buffer,len);
SetValue(buffer);
delete buffer;
}
else if(this ->type == 'i')
{
int ret;
ret = get_integer(m_handle);
char buffer[MAX_PATH];
itoa(ret,buffer,10);
SetValue(buffer);
}
else if(this ->type == 'l')
{
char ch;
while('e' != (ch = fgetc(m_handle)))
{
if(isdigit(ch))
{
fseek(m_handle,-1,SEEK_CUR);
CNode *n = CreateChild('s');
n ->Search();
}
else if(ch == 'i' || ch == 'l' || ch == 'd')
{
CNode *n = CreateChild(ch);
n ->Search();
}
else
{
fprintf(stderr,"unexpected data at 0x%X\n",ftell(m_handle));
exit(-1);
}
}
}
else if(this ->type == 'd')
{
while(fgetc(m_handle) != 'e')
{
fseek(m_handle,-1,SEEK_CUR);
assert(Guess() == 's');
CNode *n = CreateChild('s');
n ->Search();
n = CreateChild(Guess());
n ->Search();
}
}
}
void CNode::ReleaseSubTree()
{
if(this ->firstChild == NULL)return;
CNode *ptr = this ->firstChild;
for(CNode *nextSibling = NULL; ptr != NULL; ptr = nextSibling)
{
if(ptr ->firstChild == NULL)
{
nextSibling = ptr ->nextSibling;
delete ptr;
}
else
{
ptr ->ReleaseSubTree();
nextSibling = ptr ->nextSibling;
delete ptr;
}
}
this ->firstChild = NULL;
}
CNode *CNode::CreateChild(char type)
{
CNode *n = new CNode(m_handle,type);
if(this ->firstChild != NULL)
{
CNode *ptr = this ->firstChild;
while(ptr ->nextSibling != NULL)ptr = ptr ->nextSibling;
ptr ->nextSibling = n;
}
else
{
this ->firstChild = n;
}
n ->parent = this;
return n;
}
void CNode::WriteFile(char *file_name)
{
FILE *fp = fopen(file_name,"w+");
assert(fp);
fprintf(fp,"<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n");
fprintf(fp,"<root>\n");
Write(fp,0);
fprintf(fp,"</root>\n");
fclose(fp);
}
void CNode::Write(FILE *fp,int nLevel)
{
int nSpace;
for(nSpace = 0; nSpace < nLevel; ++ nSpace)
{
fputc(' ',fp);
}
switch(this ->type)
{
case 'i':
fprintf(fp,"number:%s\n",value);
break;
case 'd':
fprintf(fp,"<dictionary>\n");
if(this ->firstChild)
{
CNode *ptr = this ->firstChild;
while(ptr)
{
ptr ->Write(fp,1 + nLevel);
ptr = ptr ->nextSibling;
}
}
for(nSpace = 0; nSpace < nLevel; ++ nSpace)
fputc(' ',fp);
fprintf(fp,"</dictionary>\n");
break;
case 'l':
fprintf(fp,"<list>\n");
if(this ->firstChild)
{
CNode *ptr = this ->firstChild;
while(ptr)
{
ptr ->Write(fp,1 + nLevel);
ptr = ptr ->nextSibling;
}
}
for(nSpace = 0; nSpace < nLevel; ++ nSpace)
fputc(' ',fp);
fprintf(fp,"</list>\n");
break;
case 's':
int len = 5 * ( 1 + strlen(this ->value));
char *t = new char[len];
strcpy(t,this ->value);
get_encoded_string(t,len);
fprintf(fp,"string:%s\n",t);
delete t;
break;
}
}
}
int main(int argc, char *argv[])
{
FILE *fp = fopen("t.torrent","rb");
if(!fp)
{
perror("");
return -1;
}
BCode::CNode *root = NULL;
char type = root ->Guess(fp);
root = new BCode::CNode(fp,type);
root ->Search();
fclose(fp);
root ->WriteFile("out.xml");
delete root;
return 0;
}