网络上的IP数据库以纯真版的最为流行,本文不研究格式只给个将其转换为文本格式的代码。
关于纯真ip数据库的格式。本文参考了
Luma大虾的文章和LumaQQ的IPseeker类的代码,用c语言编写。本来是想写个专门读纯真ip数据库的类的,所以本文的代码本来是个测试代码,后来懒了就没继续做下去,所以代码看上去比较乱,嘿嘿。 //file:getlist.c #include #include #include #include #include #include #include #include #include #include #define IP_DBPATH "/usr/local/LumaQQ/QQWry.dat" #define HEAD_SIZE 8 char* record_mmap; #define IP_QUAD_FORMAT_STRING "%03u.%03u.%03u.%03u" #define IP_QUAD_LE(x) / *((unsigned char*)x + 3),/ *((unsigned char*)x + 2),/ *((unsigned char*)x + 1),/ *((unsigned char*)x + 0) void oops(const char* msg) { perror(msg); exit(1); } //Proto : unsigned long int_of_4byte_le(const char* data) //Function: read 4 byte from data, convert it to unsigned // integer in little endian order. //Argument: data in little endian(LE) order //Return : value of 4 byte data in LE unsigned long int_of_4byte_LE(const char* data) { const unsigned char* p = (const unsigned char*)data; return ((p[0]) + (p[1] <", IP_QUAD_LE(record), offset );*/ read_record(offset); }while(1); } int main(int argc, char** argv) { char buf[HEAD_SIZE]; unsigned int index_beg,index_end; int db; if( (db = open(IP_DBPATH,O_RDONLY)) == -1 )oops("opendb error!"); if(read(db,buf,HEAD_SIZE) != HEAD_SIZE)oops("read data header error!"); index_beg=int_of_4byte_LE(buf); index_end=int_of_4byte_LE(buf+4); assert((index_end - index_beg)%7 == 0); //printf("index start from:%#08X/n" // " end at:%#08X/n", // index_beg,index_end); record_mmap = mmap(NULL,index_beg,PROT_READ,MAP_SHARED,db,0); if(record_mmap == (void *) -1)oops("mmap error"); lseek(db,index_beg - 8,SEEK_CUR); // printf("indexes:/n"); iterate_index(db); } 使用么就执行 ./getlist > chunzhenip.GB ./getlist | iconv -f GB -t UTF-8 > chunzhengip.UTF-8 格式是 ip段/t一级地名/t二级地名/n 然后就可以用awk/sed/perl这些文本分析工具来分析了。