linux c 抓取网页

#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <err.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netdb.h>
#include <arpa/inet.h>

/* "BSIZE" is the size of the buffer we use to read from the socket. */

#define BSIZE 0x1000

/* Get the web page and print it to standard output. */

static void get_page (int s, const char * host, const char * page)
{
    char * msg;

    /* "format" is the format of the HTTP request we send to the web
       server. */

    const char * format =
        "GET /%s HTTP/1.0\r\nHost: %s\r\nUser-Agent: fetch.c\r\n\r\n";
    asprintf (& msg, format, page, host);
    send (s, msg, strlen (msg), 0);
    while (1) {
        int bytes;
        char buf[BSIZE+10];
        bytes = recvfrom (s, buf, BSIZE, 0, 0, 0);
        if (bytes == -1) {
            fprintf (stderr, "%s\n", strerror(errno));
            exit (1);
        }
        buf[bytes] = '\0';
        printf ("%s", buf);
        if (bytes == 0) {
            break;
        }
    }
    free (msg);
}

int main ()
{
    struct addrinfo hints, *res, *res0;
    int error;
    /* "s" is the file descriptor of the socket. */
    //socket的文件描述符
    int s;
    /* Get one of the web pages here. */
    //www.lemoda.net
    const char * host = "www.lemoda.net";

    memset (&hints, 0, sizeof(hints));

    hints.ai_family = PF_UNSPEC;
    hints.ai_socktype = SOCK_STREAM;//套接字流方式
    //获取主机地址信息
    error = getaddrinfo (host, "http", & hints, & res0);
    if (error) {
        fprintf (stderr, "%s\n", gai_strerror(error));
        exit (1);
    }
    s = -1;
    //遍历res0 addrinfo结构体
    for (res = res0; res; res = res->ai_next) {
    	//建立socket
        s = socket (res->ai_family, res->ai_socktype, res->ai_protocol);
        if (s < 0) {
            fprintf (stderr, "socket: %s\n", strerror (errno));
            exit (1);
        }
        //链接socket
        if (connect(s, res->ai_addr, res->ai_addrlen) < 0) {
            fprintf (stderr, "connect: %s\n", strerror (errno));
            close(s);
            exit (1);
        }
        break;
    }
    if (s != -1) {
        get_page (s, host, "momoe/");
    }
    freeaddrinfo (res0);
    return 0;
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值