libpcap steps (3) pcap内存映射分析

4 篇文章 0 订阅
3 篇文章 0 订阅

结论: pcap默认使用mmap方式读取包。所以不需要过多的去考虑抓包的效率问题。内部已经做得很完美了。


ref: http://www.diybl.com/course/3_program/c++/cppxl/20100408/202002_3.html

在libpcap-1.0.0中引入了zerocopy BPF,那么这个zerocopy BPF又是什么呢?PACKET_MMAP

查看两个版本libpcap编译的程序的strace的差异,除了poll之外,对于setsockopt还有一个差异:
setsockopt(4, SOL_PACKET, PACKET_RX_RING, "\0@\0\0\376\0\0\0@ \0\0\376\0\0\0", 16) = 0
mmap2(NULL, 4161536, PROT_READ|PROT_WRITE, MAP_SHARED, 4, 0) = 0xb7a54000
从字面上来猜猜看:setsockopt设置socket的PACKET_RX_RING选项,至于这个选项是做什么的,只能够猜测是一个接收环形缓冲区相关的东西,具体其他的要看其他的参数了。
mmap2将一段内核空间地址映射到用户空间,这样用户空间就可以直接操作内核缓冲区中的数据了,至于内核缓冲区中的数据如何来的,就是所谓的zerocopy BPF底层实现的了。

这个zerocopy叫做PACKET_MMAP,之前也叫做PACKET_RING,查看kernel的config文件的话是:
CONFIG_PACKET_MMAP=y【viktor:3.1版本内核里面没有这个选项。

packet: Kill CONFIG_PACKET_MMAP.
    
    Early on this was an experimental facility that few
    people other than Alexey Kuznetsov played with.
    
    Now it's a pretty fundamental thing and as people add
    more features to AF_PACKET sockets this config options
    creates ifdef spaghetti.
    
    So kill it off.


以前的时候有一个专门的PACKET_MMAP版本的libpcap,但是在libpcap-1.0.0中已经增加了部分平台的PACKET_MMAP/PACKET_RING支持。


以上为转载。

源码分析:libpcap 1.3.0-pre-git

pcap_open_live调用的……pcap-linux.c: pcap_activate_linux

static int
pcap_activate_linux(pcap_t *handle)
{
        const char      *device;
        int             status = 0;
//viktor: 首先设置成普通的文件读取方式。
        device = handle->opt.source;

        handle->inject_op = pcap_inject_linux;
        handle->setfilter_op = pcap_setfilter_linux;
        handle->setdirection_op = pcap_setdirection_linux;
        handle->set_datalink_op = NULL; /* can't change data link type */
        handle->getnonblock_op = pcap_getnonblock_fd;
        handle->setnonblock_op = pcap_setnonblock_fd;
        handle->cleanup_op = pcap_cleanup_linux;
        handle->read_op = pcap_read_linux;
        handle->stats_op = pcap_stats_linux;
......
        /*
         * viktor:activate_new激活 PF_PACKET 连接。老版本内核是 SOCK_PACKET,维持兼容性.把判错代码都删掉了
         */
        status = activate_new(handle);
        if (status < 0) {
                goto fail;
        }
        if (status == 1) {
                /*
                 * Success.
                 * Try to use memory-mapped access.
                 */
                switch (activate_mmap(handle, &status)) {            //  <<-----------------------------这里

                case 1:
                        /*
                         * We succeeded.  status has been
                         * set to the status to return,
                         * which might be 0, or might be
                         * a PCAP_WARNING_ value.
                         */
                        return status;

                case 0:
                        /*
                         * Kernel doesn't support it - just continue
                         * with non-memory-mapped access.
                         */
                        break;
......


pcap-linux.c: activate_mmap

#ifdef HAVE_PACKET_RING
/*
 * viktor: 成功返回1.不支持返回0.出错返回-1 以下代码把判错的部分都删掉了。
 */
static int
activate_mmap(pcap_t *handle, int *status)
{
        int ret;

        handle->md.oneshot_buffer = malloc(handle->snapshot);

        if (handle->opt.buffer_size == 0) {
                /* by default request 2M for the ring buffer */
                handle->opt.buffer_size = 2*1024*1024;
        }
        ret = prepare_tpacket_socket(handle);

        ret = create_ring(handle, status);                         //  <<-----------------------------这里


        /*
         * Success.  把操作都改成mmap操作......
         */
        handle->read_op = pcap_read_linux_mmap;
        handle->cleanup_op = pcap_cleanup_linux_mmap;
        handle->setfilter_op = pcap_setfilter_linux_mmap;
        handle->setnonblock_op = pcap_setnonblock_mmap;
        handle->getnonblock_op = pcap_getnonblock_mmap;
        handle->oneshot_callback = pcap_oneshot_mmap;
        handle->selectable_fd = handle->fd;
        return 1;
}
#else /* HAVE_PACKET_RING */
static int
activate_mmap(pcap_t *handle _U_, int *status _U_)
{
        return 0;
}
#endif /* HAVE_PACKET_RING */

pcap-linux.c: create_ring

/*
 * Attempt to set up memory-mapped access.
 *......
 */
static int
create_ring(pcap_t *handle, int *status)
{
        unsigned i, j, frames_per_block;
        struct tpacket_req req;
        socklen_t len;
        unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff;
        unsigned int frame_size;

        /*
         * Start out assuming no warnings or errors.
         */
        *status = 0;
.....................
Line #3471
        req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size);
        req.tp_frame_nr = handle->opt.buffer_size/req.tp_frame_size;
        /* compute the minumum block size that will handle this frame. 
         * The block has to be page size aligned. 
         * The max block size allowed by the kernel is arch-dependent and 
         * it's not explicitly checked here. */
        req.tp_block_size = getpagesize();
        while (req.tp_block_size < req.tp_frame_size)
                req.tp_block_size <<= 1;

        frames_per_block = req.tp_block_size/req.tp_frame_size;
.....................
Line #3588
retry:
        req.tp_block_nr = req.tp_frame_nr / frames_per_block;


        /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */
        req.tp_frame_nr = req.tp_block_nr * frames_per_block;


        if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,              //<<------------------------这里
                                        (void *) &req, sizeof(req))) {
                if ((errno == ENOMEM) && (req.tp_block_nr > 1)) {
                        /*
                         * Memory failure; try to reduce the requested ring
                         * size.
                         *
                         * We used to reduce this by half -- do 5% instead.
                         * That may result in more iterations and a longer
                         * startup, but the user will be much happier with
                         * the resulting buffer size.
                         */
                        if (req.tp_frame_nr < 20)
                                req.tp_frame_nr -= 1;
                        else
                                req.tp_frame_nr -= req.tp_frame_nr/20;
                        goto retry;
                }
                if (errno == ENOPROTOOPT) {
                        /*
                         * We don't have ring buffer support in this kernel.
                         */
                        return 0;
                }
                snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
                    "can't create rx ring on packet socket: %s",
                    pcap_strerror(errno));
                *status = PCAP_ERROR;
                return -1;
        }
        /* memory map the rx ring */
        handle->md.mmapbuflen = req.tp_block_nr * req.tp_block_size;
        handle->md.mmapbuf = mmap(0, handle->md.mmapbuflen,              //  <<-----------------------------这里

            PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0);
        if (handle->md.mmapbuf == MAP_FAILED) {
                snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
                    "can't mmap rx ring: %s", pcap_strerror(errno));

                /* clear the allocated ring on error*/
                destroy_ring(handle);
                *status = PCAP_ERROR;
                return -1;
        }

        /* allocate a ring for each frame header pointer*/
        handle->cc = req.tp_frame_nr;
        handle->buffer = malloc(handle->cc * sizeof(union thdr *));
        if (!handle->buffer) {
                snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
                    "can't allocate ring of frame headers: %s",
                    pcap_strerror(errno));

                destroy_ring(handle);
                *status = PCAP_ERROR;
                return -1;
        }
        /* fill the header ring with proper frame ptr*/
        handle->offset = 0;
        for (i=0; i
  
  
   
   md.mmapbuf[i*req.tp_block_size];
                for (j=0; j
   
   
    
    offset) {
                        RING_GET_FRAME(handle) = base;
                        base += req.tp_frame_size;
                }
        }

        handle->bufsize = req.tp_frame_size;
        handle->offset = 0;
        return 1;
}


   
   
  
  


我自己的strace结果:

bind(3, {sa_family=AF_PACKET, proto=0x03, if3, pkttype=PACKET_HOST, addr(0)={0, }, 20) = 0
getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
setsockopt(3, SOL_PACKET, PACKET_AUXDATA, [1], 4) = 0
getsockopt(3, SOL_PACKET, PACKET_HDRLEN, [28], [4]) = 0
setsockopt(3, SOL_PACKET, PACKET_VERSION, [1], 4) = 0
setsockopt(3, SOL_PACKET, PACKET_RESERVE, [4], 4) = 0
setsockopt(3, SOL_PACKET, PACKET_RX_RING, {block_size=131072, block_nr=31, frame_size=65600, frame_nr=31}, 16) = 0
mmap2(NULL, 4063232, PROT_READ|PROT_WRITE, MAP_SHARED, 3, 0) = 0xb714d000             //<<----------------这里
write(2, "capture_short.cpp:42: Start capt"..., 47capture_short.cpp:42: Start capture on ��...
) = 47


之后就是抓包-读时间-输出的循环:


poll([{fd=3, events=POLLIN}], 1, 1000)  = 1 ([{fd=3, revents=POLLIN}])
stat64("/etc/localtime", {st_mode=S_IFREG|0644, st_size=834, ...}) = 0
write(2, "capture_short.cpp:46: 10:16:29,4"..., 49capture_short.cpp:46: 10:16:29,483605 us, len:79
) = 49


  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值