简易HTTP代理服务器

这是一个简易的HTTP代理服务器:

功能:

1.解析HTTP请求包,在客户浏览器和目标地址之间转发数据

2.屏蔽非HTTP报文转发,只支持网页浏览代理

3.启用多进程并发套接字模型,同时代理多个客户端浏览网页

在网上抄来的

 

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <sys/wait.h>
#include <signal.h>
#include <sys/queue.h>
#include <ctype.h>

#include <sys/queue.h>

#define LOG_ERROR		0
#define LOG_WARNING 1
#define LOG_NOTICE 	2
#define LOG_TRACE		3

#define ACTIVE_LEVEL 3
#define LOG(LEVEL, MSG, ...) 					\
  if(LEVEL <= ACTIVE_LEVEL)  {		\
    printf("LOG(%d): ", LEVEL); 	\
    printf(MSG, ##__VA_ARGS__);									\
  }																\


char *read_line(int sockfd); 

enum http_methods_enum {
  OPTIONS,
  GET,
  HEAD,
  POST,
  PUT, 
  DELETE, 
  TRACE,
  CONNECT, 
  UNKNOWN
};

enum http_versions_enum {
  HTTP_VERSION_1_0,
  HTTP_VERSION_1_1,
  HTTP_VERSION_INVALID
};

typedef struct http_request
{
  enum http_methods_enum method; 
  enum http_versions_enum version;
  const char *search_path; 

  TAILQ_HEAD(METADATA_HEAD, http_metadata_item) metadata_head; 
} http_request;

typedef struct http_metadata_item
{ 
  const char *key; 
  const char *value; 

  TAILQ_ENTRY(http_metadata_item) entries; 
} http_metadata_item; 


int http_methods_len = 9; 
const char *http_methods[] = 
{
    "OPTIONS", 
    "GET", 
    "HEAD", 
    "POST", 
    "PUT", 
    "DELETE", 
    "TRACE", 
    "CONNECT",
    "INVALID"
}; 

void http_request_init(http_request **req)
{
    *req = (http_request*)malloc(sizeof(http_request));

    http_request *request = *req; 
    request->method = 0; 
    request->search_path = NULL; 

    TAILQ_INIT(&request->metadata_head); 
}

void http_request_destroy(http_request *req)
{
    free((char*)req->search_path);

    struct http_metadata_item *item; 
    TAILQ_FOREACH(item, &req->metadata_head, entries) {
        free((char*)item->key);
        free((char*)item->value); 
        free(item);
    }
}

void http_request_print(http_request *req)
{
    printf("[HTTP_REQUEST] \n"); 

    switch (req->version) {
      case HTTP_VERSION_1_0:
        printf("version:\tHTTP/1.0\n");
        break;
      case HTTP_VERSION_1_1:
        printf("version:\tHTTP/1.1\n");
        break;
      case HTTP_VERSION_INVALID:
        printf("version:\tInvalid\n");
        break;
    }

    printf("method:\t\t%s\n", 
            http_methods[req->method]);
    printf("path:\t\t%s\n", 
            req->search_path); 

    printf("[Metadata] \n"); 
    struct http_metadata_item *item; 
    TAILQ_FOREACH(item, &req->metadata_head, entries) {
        printf("%s: %s\n", item->key, item->value); 
    }

    printf("\n"); 
}

void http_parse_method(http_request* result, const char* line)
{
    enum parser_states {
        METHOD,
        URL,
        VERSION,
        DONE
    };

    char* copy;
    char* p;
    copy = p = strdup(line);
    char* token = NULL;
    int s = METHOD;

    while ((token = strsep(&p, " \r\n")) != NULL) {
        switch (s) {
            case METHOD: {
                int found = 0;
                for (int i = 0; i < http_methods_len; i++) {
                    if (strcmp(token, http_methods[i]) == 0) {
                        found = 1;
                        result->method = i;
                        break;
                    }
                }
                if (found == 0) {
                    result->method = http_methods_len - 1;
                    free(copy);
                    return;
                }
                s++;
                break;
            }
            case URL:
              result->search_path = strdup(token);
              s++;
              break;
            case VERSION:
            {
              if(strcmp(token, "HTTP/1.0") == 0) {
                  result->version = HTTP_VERSION_1_0;
              } else if(strcmp(token, "HTTP/1.1") == 0) {
                  result->version = HTTP_VERSION_1_1;
              } else {
                  result->version = HTTP_VERSION_INVALID;
              }
              s++;
              break;
            }
            case DONE:
              break;
        }
    }
    free(copy);
    return;
}

// Content-Byte: 101
void http_parse_metadata(http_request *result, char *line)
{
    char *line_copy = strdup(line); 
    char *key = strdup(strtok(line_copy, ":")); 

    char *value = strtok(NULL, "\r"); 

    // remove whitespaces :)
    char *p = value; 
    while(*p == ' ') p++; 
    value = strdup(p); 

    free(line_copy);

    // create the http_metadata_item object and
    // put the data in it
    http_metadata_item *item = malloc(sizeof(*item)); 
    item->key = key; 
    item->value = value; 

    // add the new item to the list of metadatas
    TAILQ_INSERT_TAIL(&result->metadata_head, item, entries); 
}



char *http_build_request(http_request *req)
{
    const char *search_path = req->search_path; 

    // construct the http request 
    int size = strlen("GET ") + 1; 
    //char *request_buffer = calloc(sizeof(char)*size);
    char *request_buffer = calloc(size, sizeof(char));
    strncat(request_buffer, "GET ", 4);

    size += strlen(search_path) + 1; 
    request_buffer = realloc(request_buffer, size);
    strncat(request_buffer, search_path, strlen(search_path));

    // TODO: Check the actual HTTP version that is used, and if 
    // 1.1 is used we should append:
    // 	Connection: close 
    // to the header. 
    switch(req->version)
    {
        case HTTP_VERSION_1_0:
            size += strlen(" HTTP/1.0\r\n\r\n");
            request_buffer = realloc(request_buffer, size); 
            strncat(request_buffer, " HTTP/1.0\r\n", strlen(" HTTP/1.0\r\n"));
            break; 
        case HTTP_VERSION_1_1:
            size += strlen(" HTTP/1.1\r\n\r\n");
            request_buffer = realloc(request_buffer, size); 
            strncat(request_buffer, " HTTP/1.1\r\n", strlen(" HTTP/1.1\r\n"));
            break; 
        default: 
            LOG(LOG_ERROR, "Failed to retrieve the http version\n");
            return NULL; 
    }

    http_metadata_item *item; 
    TAILQ_FOREACH(item, &req->metadata_head, entries) {
        // Remove Connection properties in header in case
        // there are any
        if(strcmp(item->key, "Connection") == 0 || 
            strcmp(item->key, "Proxy-Connection") == 0)
        {
            continue; 
        }

        size += strlen(item->key) + strlen(": ") + strlen(item->value) + strlen("\r\n");  
        request_buffer = realloc(request_buffer, size);
        strncat(request_buffer, item->key, strlen(item->key)); 
        strncat(request_buffer, ": ", 2);
        strncat(request_buffer, item->value, strlen(item->value));
        strncat(request_buffer, "\r\n", 2);
    }

    if(req->version == HTTP_VERSION_1_1)
    {
        size += strlen("Connection: close\r\n");
        request_buffer = realloc(request_buffer, size);
        strncat(request_buffer, "Connection: close\r\n", strlen("Connection: close\r\n"));
    }


    size += strlen("\r\n");
    request_buffer = realloc(request_buffer, size);
    strncat(request_buffer, "\r\n", 2);

    return request_buffer; 
}


const char *list_get_key(struct METADATA_HEAD *list, const char *key)
{
	http_metadata_item *item; 
	TAILQ_FOREACH(item, list, entries) {
		if(strcmp(item->key, key) == 0)
		{
			return item->value; 
		}
	}

	return NULL;
}

void list_add_key(struct METADATA_HEAD *list, const char *key, const char *value)
{
	http_metadata_item *item = (http_metadata_item*)malloc(sizeof(http_metadata_item));
	item->key = key; 
	item->value = value; 

	TAILQ_INSERT_TAIL(list, item, entries); 
}


char *read_line(int sockfd)
{
    int buffer_size = 2;
    char *line = (char*)malloc(sizeof(char)*buffer_size+1);
    char c;
    int length = 0;
    int counter = 0;

    while(1)
    {
        length = recv(sockfd, &c, 1, 0);
        line[counter++] = c;

        if(c == '\n')
        {
            line[counter] = '\0';
            return line;
        }

        // reallocate the buffer
        if(counter == buffer_size)
        {
            buffer_size *= 2;

            // TODO: should probably allocate +1 for the null terminator,
            // but not sure.
            line = (char*)realloc(line, sizeof(char)*buffer_size);
        }

    }

    return NULL;
}

int http_connect(http_request *req) 
{
	char *host = (char*)list_get_key(&req->metadata_head, "Host"); 
    char *port = strstr(host, ":");

    if(port == NULL)
    {
        // set port to default
        port = calloc(3, sizeof(char));
        strncat(port, "80", 2);

        LOG(LOG_TRACE, "Using default port\n");
    }
    else
    {
        // remove the port number from the host
        host = strtok(host, ":");

        // jump over the ':' char
        port++;

        LOG(LOG_TRACE, "Using port: %s\n", port);
    }
    

	LOG(LOG_TRACE, "Connecting to HTTP server: %s\n", host);

	if(host == NULL)
	{
		LOG(LOG_ERROR, "Could not find the Host property in the metadata\n");
		return -1; 
	}

	struct addrinfo hints, *servinfo, *p; 
	int sockfd, rv; 

	memset(&hints, 0, sizeof hints); 
	hints.ai_family = AF_UNSPEC; 
	hints.ai_socktype = SOCK_STREAM; 

	if((rv = getaddrinfo(host, port, &hints, &servinfo)) != 0)
	{
		LOG(LOG_ERROR, "Failed to lookup hostname\n");
		return -1; 
	}

	// loop through all the results and connect to the first we can
	for(p = servinfo; p != NULL; p = p->ai_next) {
		if ((sockfd = socket(p->ai_family, p->ai_socktype,
						p->ai_protocol)) == -1) {
			perror("client: socket");
			continue;
		}

		if (connect(sockfd, p->ai_addr, p->ai_addrlen) == -1) {
			close(sockfd);
			perror("client: connect");
			continue;
		}

		break;
	}

	if (p == NULL) {
		LOG(LOG_ERROR, "Failed to connect to HTTP server\n");
		return -1;
	}

	return sockfd;
}

/*
Read a HTTP header from the given socket and
returns a http_request*. 
*/
http_request *http_read_header(int sockfd)
{
	LOG(LOG_TRACE, "Reading header\n");
	http_request *req;
	http_request_init(&req); 

	char *line; 
	line = read_line(sockfd); 
	http_parse_method(req, line); 

	while(1) 
	{
		line = read_line(sockfd); 
		if(line[0] == '\r' && line[1] == '\n')
		{
			// We received the end of the HTTP header 
            LOG(LOG_TRACE, "Received header\n");
               
			break; 

		}

		http_parse_metadata(req, line); 

		free(line); 
	}

	return req;
}

/*
Read as much data as possible from the given socket
and returns it as a null terminated char pointer. Data 
returned from this function must be freed somewhere else. 
*/
char *http_read_chunk(int sockfd, ssize_t *length)
{
    if(length == NULL)
    {
        LOG(LOG_ERROR, "The length pointer supplied to http_read_chunk is NULL\n");
        return NULL;
    }

    if(sockfd == -1)
    {
        LOG(LOG_ERROR, "The socket given to http_read_chunk is invalid\n");
        return NULL;
    }

	char *buf = malloc(sizeof(char));
	memset(buf, '\0', sizeof(char));
	char c; 
	int current_size = 1; 

    time_t timeout = 5; 
    time_t start = time(NULL);

	ssize_t total_bytes = 0;
    ssize_t num_bytes = 0;

	while(1)
	{
        // check if we should timeout
        if(time(NULL) - start > timeout)
        {
            LOG(LOG_WARNING, "Request timed out\n");
            break; 
        }

		num_bytes = recv(sockfd, &c, 1, 0);

		if(num_bytes <= -1) 
		{
			break;
		}
		else if(num_bytes == 0)
		{
			break;
		}

        // reallocate the buffer so the new data will fit
		buf = realloc(buf, sizeof(char)*++current_size);
		buf[total_bytes] = c; 

		total_bytes += num_bytes; 
	}

	LOG(LOG_TRACE, "Received: %d\n", (int)total_bytes);

	*length = total_bytes; 

	return buf; 
}

int containing_forbidden_words(char str[]){

    // Forbidden words
    char *words[] = {"SpongeBob", "Britney Spears", "Paris Hilton", "Norrkӧping", "Norrk&ouml;ping", "Norrk%C3%B6ping"};
    int hits[] = {0, 0, 0, 0, 0, 0}; // Every forbidden word need to have a zero in this array to be able to count number of char hits.
    int numb_words = 6; // Number of forbidden words

    int str_length = strlen(str);
    int c, w;   // Index for char in str, and index for word in words

    // Search for forbidden words
    for (c = 0; c < str_length; c++)
    {
        for (w = 0; w < numb_words; w++)
        {
            if (tolower(words[w][ hits[w] ]) == tolower(str[c])){
                if(++hits[w] == strlen(words[w]))
                    return 1;
            }
            else if (hits[w] != 0)
                hits[w--] = 0;
        }
    }

    return 0;
}

int send_to_client(int client_sockfd, char data[], int packages_size, ssize_t length)
{
    // if packages_size is set to 0, then the function will try to send all data as one package.
    if(packages_size < 1)
		{
        if(send(client_sockfd, data, length, 0) == -1)
        {
            perror("Couldn't send data to the client.");
            return -1;
        }
    }
    else
    {
        int p;
        for(p = 0; p*packages_size + packages_size < length; p++){
            if(send(client_sockfd, (data + p*packages_size), packages_size, 0) == -1)
            {
                perror("Couldn't send any or just some data to the client. (loop)\n");
                return -1;
            }
        }

        if (p*packages_size < length)
        {
            if(send(client_sockfd, (data + p*packages_size), length - p*packages_size, 0) == -1)
            {
                perror("Couldn't send any or just some data to the client.\n");
                return -1;
            }
        }
    }

    return 0;
}

int http_request_send(int sockfd, http_request *req)
{
    LOG(LOG_TRACE, "Requesting: %s\n", req->search_path);

    char *request_buffer = http_build_request(req);

    // send the http request to the web server
    if(send(sockfd, request_buffer, strlen(request_buffer), 0) == -1)
    {
        free(request_buffer);
        perror("send");
        return 1;
    }
    free(request_buffer);

    LOG(LOG_TRACE, "Sent HTTP header to web server\n");

    return 0;
}

void handle_client(int client_sockfd)
{
    char *line;
    int server_sockfd;
    http_request *req;

    req = http_read_header(client_sockfd);
    if(req == NULL)
    {
        LOG(LOG_ERROR, "Failed to parse the header\n");
        return;
    }

    if (containing_forbidden_words((char*)req->search_path) || containing_forbidden_words((char*)list_get_key(&req->metadata_head, "Host"))){
        char *error1 = "HTTP/1.1 200 OK\r\nServer: Net Ninny\r\nContent-Type: text/html\r\n\r\n<html>\n\n<title>\nNet Ninny Error Page 1 for CPSC 441 Assignment 1\n</title>\n\n<body>\n<p>\nSorry, but the Web page that you were trying to access\nis inappropriate for you, based on the URL.\nThe page has been blocked to avoid insulting your intelligence.\n</p>\n\n<p>\nNet Ninny\n</p>\n\n</body>\n\n</html>\n";
        http_request_destroy(req);
        send_to_client(client_sockfd, error1, 0, strlen(error1));
        return;
    }

    server_sockfd = http_connect(req);
    if(server_sockfd == -1)
    {
        LOG(LOG_ERROR, "Failed to connect to host\n");
        http_request_destroy(req);
        return;
    }

    LOG(LOG_TRACE, "Connected to host\n");

    http_request_send(server_sockfd, req); 
    http_request_destroy(req);

    LOG(LOG_TRACE, "Beginning to retrieve the response header\n");
    int is_bad_encoding = 0;
    int is_text_content = 0;
    int line_length;
    while(1)
    {
        line = read_line(server_sockfd);
        line_length = strlen(line);
        send_to_client(client_sockfd, line, 0, line_length);

        if(line[0] == '\r' && line[1] == '\n')
        {
            // We received the end of the HTTP header
            LOG(LOG_TRACE, "Received the end of the HTTP response header\n");
            free(line);
            break;
        }
        else if(18 <= line_length)
        {
            line[18] = '\0'; // Destroys the data in the line, but is needed to check if in coming data will be text format.
            if (strcmp(line, "Content-Type: text") == 0)
                is_text_content = 1;
            else if (strcmp(line, "Content-Encoding: ") == 0)
                is_bad_encoding = 1;
        }

        free(line);
    }

    LOG(LOG_TRACE, "Beginning to retrieve content\n");
    ssize_t chunk_length;
    char *temp = http_read_chunk(server_sockfd, &chunk_length);
    LOG(LOG_TRACE, "Received the content, %d bytes\n", (int)chunk_length);

    if (is_text_content && !is_bad_encoding && containing_forbidden_words(temp))
    {
        LOG(LOG_TRACE, "Received data contains forbidden words!\n");
        char *error2 = "<html>\n<title>\nNet Ninny Error Page 3 for CPSC 441 Assignment 1\n</title>\n\n<body>\n<p>\nSorry, but the Web page that you were trying to access\nis inappropriate for you, based on some of the words it contains.\nThe page has been blocked to avoid insulting your intelligence.\n</p>\n\n<p>\nNet Ninny\n</p>\n\n</body>\n\n</html>\n";

        send_to_client(client_sockfd, error2, 0, strlen(error2));
    }
    else
        send_to_client(client_sockfd, temp, 0, chunk_length);
    free(temp);
    close(server_sockfd);
}


int main(int argc, char *argv[])
{

    printf("Starting server\n");

    int sockfd;
	int new_fd;
	sockfd=socket(AF_INET,SOCK_STREAM,0);
	if(sockfd<0){
		perror("fail to socket");
		exit(1);
	}
	struct sockaddr_in serveraddr;
	serveraddr.sin_family=AF_INET;
	serveraddr.sin_addr.s_addr=inet_addr("192.168.1.189");
	serveraddr.sin_port=htons(8888);
	socklen_t len;
	len=sizeof(serveraddr);
	if(bind(sockfd,(struct sockaddr *)&serveraddr,len)<0){
		perror("fail to bind");
		exit(1);
	}
	if(listen(sockfd,5)<0){
		perror("fail to listen");
		exit(1);
	}
	struct sockaddr_in clientaddr;
	socklen_t len1=sizeof(clientaddr);
	char buf[409600]={0};


    printf("server: waiting for connections..\n");
    while(1)
    {
        new_fd = accept(sockfd,(struct sockaddr *)&clientaddr,&len1);
				
		
        if(new_fd == -1)
        {
            perror("accept");
            continue;
        }

        printf("Receieved connection\n");

        signal(SIGCHLD, SIG_IGN);
        pid_t child_pid = fork();
        if(!child_pid)
        {
            handle_client(new_fd);

            close(new_fd);
            exit(0);
        }
        close(new_fd);
    }


    return 0;
}

 

 

 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值