将抓到的pcap文件中Http包转换为可读的txt格式

最新推荐文章于 2021-08-09 14:15:49 发布

VirusCode2016

最新推荐文章于 2021-08-09 14:15:49 发布

阅读量1.4k

点赞数

文章标签： json 网络 java

原文链接：http://www.cnblogs.com/2017Crown/p/7171822.html

版权

编写思路：

　　　　1.首先将要读取、写入信息的两个文件分别进行打开，接下来先进行文件读操作,获得指向该文件头的指针fp

　　　　2.从文件中读取pcap文件头大小的信息存储在pcap_head_buf里，再将偏移偏移量offset设置为pcap头之后

　　　　3.将文件pkt(数据报)头信息存储在pkt_head_buf里，将偏移量置offset设置到pkt头+pkt数据之后，即为下个数据报的偏移量，将fp移动到数据报头之后。

　　　　4从pcap文件头中读出linktype从而获得帧头种类，再从帧头中读出改数据帧是ipv4还是ipv6，本文只处理ipv4。同时指针移动到帧头部后面

　　　　5.当帧为ipv4时，首先将ip头部取出存储在ip_head_buf中，指针后移到ip头后面

　　　　6.从ip头中读出Protocol,从而获取是否为TCP链接，本文只处理TCP链接。当前是TCP连接时，直接将TCP头信息存储在tcp_head_buf里，fp移动至TCP头后

　　　　7.接下来offset-fp大小即为数据域大小。首先将数据域最开始的四个字符存储在tempBuf中，指针随之后移。

　　　　8。将随后部分的数据域全部存储在tcp_data_buf中，并从中读取出method、url、host、user-Agent信息。

　　　　9.将method、url、host、user-Agent信息进行格式化存储。

数据域结构：1.开头四个字节为请求方式，POST、GET或HEAD。

用到的一些函数：

　　1.ftell

　　　　函数原型：long ftell（FILE *fp）;

　　　　作用：获取当前指针和文件开头便宜的字节数

　　2.memset

　　　　函数原型：void *memset(void *s, int ch, size_t n);

　　　　作用：将s中当前位置后面的n个字节用 ch 替换并返回 s 。

mian.cpp

#include "pcap.h"
#include "method.hpp"
#include <iostream>
#include <memory.h>
#include　<fstream>
using namespace std;

int main()
{
    pcap_header pcap_head_buf;
    pkt_header pkt_head_buf;
    ip_header ip_head_buf;
    tcp_header tcp_head_buf;
    FILE *fp = fopen("Login.pcap" , "rw");
    ofstream outfile("HttpMsg.txt",ios::app);

    getPcapFileHead(fp , pcap_head_buf);//将文件中一个pkt_header大小的内容存入pcap_head_buf的内存地址

    fseek(fp, 0, SEEK_END);
    long fileSize=ftell(fp);
    long fpOffset=sizeof( pcap_header ) ;

    while( (fseek(fp, fpOffset, SEEK_SET) == 0)  && ( fpOffset < fileSize ) )     //在循环中处理每一个网络帧
    {
        getPktHead(fp , pkt_head_buf);
        fpOffset += ( sizeof(pkt_header) + pkt_head_buf.capture_len );
                                            //fpOffset 当前位置 +sizeof( pkt_header) +sizeof (pkt_data) ,得到下一网络帧的 offset

        u_int16 framType=getFramType(fp , pcap_head_buf.linktype);         //framType 标识了该帧是否为 IPV6链接
        if ( framType == 0xdd86 )                                          //IPV6链接 , 跳过该网络帧
        {
            continue ;
        }

        else
        {
            getIpHead(fp , ip_head_buf);

            if ( ip_head_buf.Protocol != 0x06 )            // Protocol != 0x06  表示非ＴＣＰ链接 , 跳过该网络帧
            {
                continue ;
            }
            else                                           //TCP 链接类型
            {
                getTcpHead(fp ,tcp_head_buf) ;
                int tcp_data_size = fpOffset - ftell(fp);
                  // 当前位置在一个 tcp_header 后  ，fpOffset - 当前位置 得到 tcp_data 的长度
                if ( tcp_data_size !=0)
                {
                    u_int8 tempBuf[4];
                    string methodBuf;
                    string urlBuf;
                    string hostBuf;
                    string uaBuf;
                    fread(tempBuf ,4 ,1 ,fp);
                    fseek(fp , -4 ,SEEK_CUR);
                    char tcp_data_buf[1024];
                    memset(tcp_data_buf,0,sizeof(tcp_data_buf)/sizeof(char));//将整个tcp_data_buf空间都置为0

                    if (   ( tempBuf[0]==0x50 && tempBuf[1]==0x4f && tempBuf[2]==0x53  && tempBuf[3]==0x54 )  ||
                           ( tempBuf[0]==0x47 && tempBuf[1]==0x45 && tempBuf[2]==0x54 )
                       )  //两个条件分别表示  "POST " 和 " GET "  ，判断成功表明 该网络帧包含了一个 ＨＴＴＰ　 get 或者 post 链接

                    {


                        fread(tcp_data_buf , tcp_data_size ,1 ,fp );
                        matchHttp(tcp_data_buf , methodBuf , urlBuf , hostBuf , uaBuf );
                        outfile<<"method :"<<methodBuf<<endl;
                        outfile<<"url :" <<urlBuf<<endl;
                        outfile<<"host :"<<hostBuf<<sendl;
                        outfile<<"ua :"<<uaBuf<<endl;
                        outfile<<"=======*===========*=========*==========="<<std::endl;
            //将内容存在output.txt中
            //
                    }

                }
            }

        }
    }
outfile.close();
}

pcap.h

#ifndef DEFINEPCAP_H
#define DEFINEPCAP_H

/* pacp文件构成：

  1： pacp = pcap_head + pkt_head + pket_data +  next->pkt_head  +  next->pkt_data :  ……
  2:  pkt_data=frame_head + ip_head + tcp_head +tcp_data

  //其中 pacp_head 中的 linktype 又决定了 frame_head 的类型


*/

typedef unsigned int  u_int32;
typedef unsigned short  u_int16;
typedef unsigned char u_int8;
typedef int int32;
/*
 Pcap文件头24B各字段说明：
 Magic：4B：0x1A 2B 3C 4D:用来标示文件的开始
 Major：2B，0x02 00:当前文件主要的版本号
 Minor：2B，0x04 00当前文件次要的版本号
 ThisZone：4B当地的标准时间；全零
 SigFigs：4B时间戳的精度；全零
 SnapLen：4B最大的存储长度
 LinkType：4B链路类型
 常用类型：
 　0            BSD loopback devices, except for later OpenBSD
 1            Ethernet, and Linux loopback devices
 6            802.5 Token Ring
 7            ARCnet
 8            SLIP
 9            PPP
 */
typedef struct pcap_header {
    u_int32 magic;
    u_int16 version_major;
    u_int16 version_minor;
    int32 thiszone;
    u_int32 sigfigs;
    u_int32 snaplen;
    u_int32 linktype;
}pcap_header;

/*
 Packet 包头和Packet数据组成
 字段说明：
 Timestamp：时间戳高位，精确到seconds
 Timestamp：时间戳低位，精确到microseconds
 Caplen：当前数据区的长度，即抓取到的数据帧长度，由此可以得到下一个数据帧的位置。
 Len：离线数据长度：网络中实际数据帧的长度，一般不大于caplen，多数情况下和Caplen数值相等。
 Packet 数据：即 Packet（通常就是链路层的数据帧）具体内容，长度就是Caplen，这个长度的后面，就是当前PCAP文件中存放的下一个Packet数据包，也就 是说：PCAP文件里面并没有规定捕获的Packet数据包之间有什么间隔字符串，下一组数据在文件中的起始位置。我们需要靠第一个Packet包确定。
 */

typedef struct  timestamp{
    u_int32 timestamp_s;
    u_int32 timestamp_ms;
}timestamp;

typedef struct pkt_header{
    timestamp ts;
    u_int32 capture_len;
    u_int32 len;

}pkt_header;

/**以太网帧头格式**/
typedef struct Ethernet
{
    u_int8  DstMAC[6];   //目的MAC地址
    u_int8  SrcMAC[6];   //源MAC地址
    u_int16 FrameType;    //帧类型
} Ethernet;

/**另一种帧头格式**/
typedef struct Linux_cooked_capture
{
    u_int16 package_type;
    u_int16 address_type;
    u_int16 address_length;
    u_int16 un_used[4];
    u_int16 FrameType; //帧类型
}Linux_cooked_capture;


typedef struct ip_header
{    //IP数据报头
    u_int8   Ver_HLen;       //版本+报头长度
    u_int8   TOS;            //服务类型
    u_int16  TotalLen;       //总长度
    u_int16  ID;     //标识
    u_int16  Flag_Segment;   //标志+片偏移
    u_int8   TTL;            //生存周期
    u_int8   Protocol;       //协议类型
    u_int16  Checksum;       //头部校验和
    u_int32  SrcIP;  //源IP地址
    u_int32  DstIP;  //目的IP地址
}ip_header ;


typedef struct tcp_header
{    //TCP数据报头
u_int16  SrcPort;    //源端口
u_int16  DstPort;    //目的端口
u_int32  SeqNO;  //序号
u_int32  AckNO;  //确认号
u_int8   HeaderLen;  //数据报头的长度(4 bit) + 保留(4 bit)
u_int8   Flags;  //标识TCP不同的控制消息
u_int16  Window;     //窗口大小
u_int16  Checksum;   //校验和
u_int16  UrgentPointer;  //紧急指针
}tcp_header ;

#endif // DEFINEPCAP_H

method.hpp

#ifndef METHOD_H
#define METHOD_H

#include "pcap.h"
#include <cstdio>
#include <iostream>
#include <vector>

/**将文件中1个pcap_header大小的内容存放在内存中pcap_head的的地址中**/
 void getPcapFileHead( FILE *fp  ,  pcap_header &pcap_head )
 {
     fread( &pcap_head , sizeof( pcap_header ) , 1 , fp);
 }
Linux_cooked_capture
/**将文件中1个pkt_header大小的内容存放在内存中pkt_head的的地址中**/
 void getPktHead(FILE *fp , pkt_header &pkt_head)
 {
     fread( &pkt_head  , sizeof( pkt_header ) , 1 , fp);
 }


 u_int32 getFramType(FILE *fp , u_int32 linktype)        // linktype 决定了 frame_head 的大小
 {                                                       // FrameType 决定了 该网络帧是 ipv6链接 或是 ipv4链接
     if (linktype == 0x71)　　　　　　//是另一种帧时，从帧头中读取FrameType获得该以太网帧是ipv4还是ipv6
     {
         Linux_cooked_capture temp;
         fread(&temp ,sizeof(temp) ,1 , fp);
         return temp.FrameType;
     }
     if (linktype == 0x01)　　　　　　//是以太网帧时，从帧头中读取FrameType获得该以太网帧是ipv4还是ipv6
     {
         Ethernet temp;
         fread(&temp ,sizeof(temp) ,1 , fp);
         return temp.FrameType ;
     }
 }

 void getIpHead(FILE *fp , ip_header & ip_head_buf)
 {
     fread( &ip_head_buf , sizeof( ip_header ) , 1 , fp);
 }

 void getTcpHead(FILE *fp , tcp_header &tcp_head_buf)
 {
     fread( &tcp_head_buf , sizeof( tcp_header ) , 1 , fp);
     fseek(fp , ( tcp_head_buf.HeaderLen>>2 ) - sizeof(tcp_header)  ,SEEK_CUR );
     // fseek() 是因为 tcp_header 大小是变动的 ，且由 Headerlen>>2 可以计算出来 ，由于只需要关心前半部分数据 ，后半部分数据可以直接跳过
 }

 void matchHttp(char tcp_data_buf[] , std::string & methodBuf ,std::string & urlBuf , std::string & hostBuf ,std::string &uaBuf)
 {
     std::vector<std::string> tempStrVector;
     std::string tempSring(tcp_data_buf);
     for(std::string::size_type beganPos=0 ; beganPos != tempSring.size() ; )    //将tcp_data_buf[] 内的字符串
     {

         std::string::size_type endPos=beganPos;                                 //按照 " \n " 分组放入tempStrVecor
         while(++endPos && endPos != tempSring.size())
         {
             if( tempSring[endPos] =='\n' )
             {
                 break;
             }
         }
         tempStrVector.push_back( tempSring.substr(beganPos ,endPos - beganPos)  );
         if( endPos == tempSring.size() )
         {
             break;
         }
         beganPos=endPos ;
     }

     for(std::vector<std::string>::iterator posVector =tempStrVector.begin() ; posVector !=tempStrVector.end() ; ++posVector )
     {
         //遍历  tempStrVecor 的包含的字符串 ，获取 method url  host  ua 值
         if ( std::string::size_type tempPos = (*posVector).find("GET")  !=  (*posVector).npos   )
         {
             methodBuf="GET";
             std::string::size_type endPos=(*posVector).find("HTTP/1.1");
             urlBuf=(*posVector).substr(tempPos + sizeof("GET") - 1  ,  endPos - tempPos - sizeof("GET") );
         }   // “ GET ” 和  “ HTTP/1.1”  之间字符串为 url

         if ( std::string::size_type tempPos = (*posVector).find("POST")  !=  (*posVector).npos   )
         {
             std::string::size_type endPos=(*posVector).find("HTTP/1.1");
             methodBuf="POST";
             urlBuf=(*posVector).substr(tempPos+sizeof("POST") -1 ,  endPos - tempPos - sizeof("POST") );
         }   // “ POST ” 和  “ HTTP/1.1”  之间的字符串为 url


         if ( std::string::size_type tempPos = (*posVector).find("Host:")  !=  (*posVector).npos   )
         {
             hostBuf=(*posVector).substr(tempPos+sizeof("Host:"  ) );
         }  //" Host:" 后的字符串为 host

         if ( std::string::size_type tempPos = (*posVector).find("User-Agent:")  !=  (*posVector).npos   )
         {
             uaBuf=(*posVector).substr(tempPos+sizeof("User-Agent:")   );
         }   // " User-Agent:" 后的字符串为 ua


     }



 }



#endif // METHOD_H