《Intro to Computer Systems》(csapp)LAB7(proxylab)

15 篇文章 1 订阅
8 篇文章 0 订阅

网络编程与web服务器与并发

网络编程和并发可以说是计算机系统最难搞的问题了,socket很复杂,并发难度又很大。《csapp》11章和12章的内容对这两个问题做了介绍,做lab是肯定需要仔细看看的。

无关闲话

前几天看南大jyy的操作系统课时候 ,提到了自旋锁和互斥锁的实现,因为自旋锁需要关中断(保证公平性),因此对于不紧迫的,且长代码段不可用,甚至有时会出现问题,比如关中断以后io可能就永远收不到io完成产生的中断,引入互斥锁。
如果用自旋的方式实现互斥锁,可以再自旋失败时释放cpu,运行其他线程,但是会造成当锁争抢时把所有线程都自旋一遍或者多遍。
则考虑使用自旋锁协助实现互斥锁,可以看到下图,请求锁时,通过互斥锁进入临界区判拿不到锁则进去等待队列。释放锁时如果等待队列有线程就唤醒一个线程,这样就防止了cpu调度不可能拿到锁的线程浪费资源。互斥同步、条件变量如下图
在这里插入图片描述

另外信号量能不用就不用,试图使用信号量解决并发竞争问题的结果最终往往都是错的,更应该多用条件变量。

lab

首先需要阅读本次lab的writeup以及《CSAPP》的网络编程和并发两章,本次lab给我们提供了一个《CSAPP》 上实现的一个tiny(一个web服务器),我们需要做的是写一个代理服务器,用来接受请求,并且将请求发送给tiny启动的服务器。在PART 2和PART 3中添加线程并发访问以及缓存,来完善我们写的代理服务器。实验完成以后可以直接将代理服务器应用到浏览器上。实现主要参考了七 PROXY LAB
在实验之前,最好调明白tiny.c,同时使用telnet模拟试试http请求发送了哪些东西。也看看http请求获得的到底是个什么东西。
使用到了telnetcurllsof工具。

环境问题

可能由于环境问题,在执行测试时候遇到如下问题,翻了翻driver.sh才发现问题所在。

*** Concurrency ***
Starting tiny on port 3461
Starting proxy on port 32726
Starting the blocking NOP server on port 23220
Timeout waiting for the server to grab the port reserved for it
Terminated

在这里插入图片描述
修改一下脚本文件如下图将python 改成python3。
在这里插入图片描述

PART 1

代码如下,只考虑了url是http://hostname:port/path的情况。
proxy.c

#include "csapp.h"
#define DEBUG() { printf("wwwwqqqqq\n");}
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400

/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *conn_hdr = "Connection: close\r\n";
static const char *prox_hdr = "Proxy-Connection: close\r\n";
void doit(int connfd);
void parse_uri(char *uri, char *hostname, char *path, int *port);
void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg);
void build_requesthdrs(rio_t *rp, char *newreq, char *hostname, char* port);


int main(int argc, char **argv) {
    int listenfd, connfd;   //监听描述符、建立连接描述符
    char hostname[MAXLINE], port[MAXLINE];  //主机名、端口
    struct sockaddr_storage clientaddr;
    socklen_t clientlen; 
    /* 检查命令行参数 */
    if(argc != 2){
        fprintf(stderr,"usage :%s <port> \n",argv[0]);
        exit(1);
    }
    //打开一个监听套接字
    listenfd = Open_listenfd(argv[1]);
    while(1) {
        clientlen = sizeof(clientaddr);
        /* Accept不断接受连接请求 */
        connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen);
        /* 打印Accept相关信息 */
        Getnameinfo((SA*)&clientaddr, clientlen, hostname, MAXLINE, port, MAXLINE, 0);
        printf("Accepted connection from (%s %s).\n", hostname, port);
        /* 执行事务 */
        doit(connfd);
        /* 关闭连接 */
        Close(connfd);
    }
    return 0;
}

/*handle the client HTTP transaction*/
void doit(int connfd) {
    int endserver_fd;   /* 被代理服务器文件描述符 */

    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    char endserver_http_header [MAXLINE];
    /*store the request line arguments*/
    char hostname[MAXLINE], path[MAXLINE];
    int port;
    rio_t from_client, to_endserver;
    /* 建立rio关联,再读取请求行 */
    Rio_readinitb(&from_client, connfd);
    Rio_readlineb(&from_client, buf, MAXLINE);
    sscanf(buf, "%s %s %s", method, uri, version);
    if (strcasecmp(method, "GET")) {                     
        clienterror(connfd, method, "501", "Not Implemented",
                    "Proxy Server does not implement this method");
        return;
    }
    /*从uri中解析得到 hostname path和port(port默认是80)*/
    parse_uri(uri, hostname, path, &port);
    char port_str[10];
    sprintf(port_str, "%d", port);
    endserver_fd = Open_clientfd(hostname, port_str);
    Rio_readinitb(&to_endserver, endserver_fd);
    //newreq是新的请求头
    char newreq[MAXLINE];
    sprintf(newreq, "GET %s HTTP/1.0\r\n", path); 
    build_requesthdrs(&from_client, newreq, hostname, port_str);
    //发送请求行给被代理服务器
    Rio_writen(endserver_fd, newreq, strlen(newreq));
    int n;
    //从被代理服务器读取的信息发送给客户端
    while ((n = Rio_readlineb(&to_endserver, buf, MAXLINE)))
        Rio_writen(connfd, buf, n);
    Close(endserver_fd);
}

/*parse the uri to get hostname,file path ,port*/
void parse_uri(char *uri, char *hostname, char *path, int *port) {
    *port = 80;
    //uri   http://www.cmu.edu:8080/hub/index.html
    //pos1  www.cmu.edu'\0'
    //pos2  /hub/index.html'\0'
    char* pos1 = strstr(uri,"//");
    if (pos1 == NULL) 
        pos1 = uri;
    else 
        pos1 += 2;
    
    char* pos2 = strstr(pos1, ":");
    if (pos2 != NULL) {
        *pos2 = '\0';
        strncpy(hostname, pos1, MAXLINE);
        sscanf(pos2 + 1,"%d%s", port, path);
    } else {
        pos2 = strstr(pos1,"/");
        if (pos2 == NULL) {
            strncpy(hostname, pos1, MAXLINE);
            strcpy(path,"");
            return;
        }
        *pos2 = '\0';
        strncpy(hostname, pos1, MAXLINE);
        *pos2 = '/';
        strncpy(path, pos2, MAXLINE);
    }
    return;
}


void clienterror(int fd, char *cause, char *errnum, 
         char *shortmsg, char *longmsg) {
    char buf[MAXLINE], body[MAXBUF];

    /* Build the HTTP response body */
    sprintf(body, "<html><title>Proxy Error</title>");
    sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
    sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
    sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
    sprintf(body, "%s<hr><em>The Proxy Web server</em>\r\n", body);

    /* Print the HTTP response */
    sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: text/html\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
    Rio_writen(fd, buf, strlen(buf));
    Rio_writen(fd, body, strlen(body));
}

void build_requesthdrs(rio_t *rp, char *newreq, char *hostname, char* port) {
    //already have sprintf(newreq, "GET %s HTTP/1.0\r\n", path);
    char buf[MAXLINE];

    while(Rio_readlineb(rp, buf, MAXLINE) > 0) {          
        if (!strcmp(buf, "\r\n")) break;
        if (strstr(buf,"Host:") != NULL) continue;
        if (strstr(buf,"User-Agent:") != NULL) continue;
        if (strstr(buf,"Connection:") != NULL) continue;
        if (strstr(buf,"Proxy-Connection:") != NULL) continue;
        sprintf(newreq,"%s%s", newreq, buf);
    }
    sprintf(newreq, "%sHost: %s:%s\r\n", newreq, hostname, port);
    sprintf(newreq, "%s%s%s%s", newreq, user_agent_hdr, conn_hdr, prox_hdr);
    sprintf(newreq,"%s\r\n", newreq);
}


/* test
    telnet 127.0.0.1 4500
    GET http://www.cmu.edu/hub/index.html HTTP/1.1
    curl -v --proxy http://localhost:4501 http://localhost:4502/home.html
    GET http://www.cmu.edu:80/hub/index.html HTTP/1.1
    GET /hub/index.html HTTP/1.0
    
*/

PART 2

参考《csapp》,该部分可以用多进程或多线程实现并发。我使用多线程来实现。

int main(int argc, char **argv) {
    int listenfd, *connfd;   //监听描述符、建立连接描述符
    pthread_t tid;
    char hostname[MAXLINE], port[MAXLINE];  //主机名、端口
    struct sockaddr_storage clientaddr;
    socklen_t clientlen; 
    /* 检查命令行参数 */
    if(argc != 2){
        fprintf(stderr,"usage :%s <port> \n",argv[0]);
        exit(1);
    }
    //打开一个监听套接字
    listenfd = Open_listenfd(argv[1]);
    while(1) {
        clientlen = sizeof(clientaddr);
        /* Accept不断接受连接请求 */
        connfd = (int *)Malloc(sizeof(int));
        *connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen);
        /* 打印Accept相关信息 */
        Getnameinfo((SA*)&clientaddr, clientlen, hostname, MAXLINE, port, MAXLINE, 0);
        printf("Accepted connection from (%s %s).\n", hostname, port);
        Pthread_create(&tid, NULL, thread, connfd);
    }
    return 0;
}

void *thread(void *vargp) {
    int connfd = *((int *)vargp);
    Pthread_detach(pthread_self());
    Free(vargp);
    /* 执行事务 */
    doit(connfd);
    /* 关闭连接 */
    Close(connfd);
    return NULL;
}

分析:

  • 和part1的代码相比int *connfd;使用了指针,《csapp》中提到了,如果不是指针由于Pthread_create函数传递的参数是指针,因此connfd可能在主线程和子线程产生竞争。
  • Pthread_detach(pthread_self())使得子线程独立,线程结束自动释放内存,否则可能造成内存泄漏。

FireFox浏览器设置

设置代理

在这里插入图片描述

在这里插入图片描述

清除缓存

在这里插入图片描述

测试

在这里插入图片描述
在这里插入图片描述
可以看到尽管网页加载出来了,但是代理服务器已经挂掉了,跟踪了几步发现parse_uri函数逻辑不够完善,很多情况考虑不到只考虑了http://hostname:port/path的情况,如上图复杂的url就无法处理)。下图打印出的是实际从浏览器中收到的请求。
在这里插入图片描述

这里我有一个不明白的点,既然我已经Pthread_detach(pthread_self());将子线程隔离出去了,为什么子线程挂掉依然会让整个进程都挂掉。

PART 3

添加一个缓存进去,我缓存的key是完整url。这里有一个读写者并发的问题,可以用《csapp》提供的读写者信号量pv解决。但是还有一个问题就是多个读者时LRU中的cnt会产生竞争。课程文档里说该问题可以一定程度忽略,我就没有处理竞争。

#include "csapp.h"

#define DEBUG() { printf("wwwwqqqqq\n");}
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400

/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
static const char *conn_hdr = "Connection: close\r\n";
static const char *prox_hdr = "Proxy-Connection: close\r\n";

typedef struct {
    char *name; //url
    int *flag;  //isused
    int *cnt;   //count
    char *object;   //content
} CacheLine;

CacheLine *cache;
int readcnt; //用来记录读者的个数
sem_t mutex, w; //mutex用来给readcnt加锁,w用来给写操作加锁

void doit(int connfd);
void parse_uri(char *uri, char *hostname, char *path, int *port);
void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg);
void build_requesthdrs(rio_t *rp, char *newreq, char *hostname, char* port);
void *thread(void *vargp);
void init_cache();
int reader(int fd, char *url);
void writer(char *url, char *buf);

int main(int argc, char **argv) {
    init_cache();
    int listenfd, *connfd;   //监听描述符、建立连接描述符
    pthread_t tid;
    char hostname[MAXLINE], port[MAXLINE];  //主机名、端口
    struct sockaddr_storage clientaddr;
    socklen_t clientlen; 
    /* 检查命令行参数 */
    if(argc != 2){
        fprintf(stderr,"usage :%s <port> \n",argv[0]);
        exit(1);
    }
    //打开一个监听套接字
    listenfd = Open_listenfd(argv[1]);
    while(1) {
        clientlen = sizeof(clientaddr);
        /* Accept不断接受连接请求 */
        connfd = (int *)Malloc(sizeof(int));
        *connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen);
        /* 打印Accept相关信息 */
        Getnameinfo((SA*)&clientaddr, clientlen, hostname, MAXLINE, port, MAXLINE, 0);
        printf("Accepted connection from (%s %s).\n", hostname, port);
        Pthread_create(&tid, NULL, thread, connfd);
    }
    return 0;
}

/*handle the client HTTP transaction*/
void doit(int connfd) {
    int endserver_fd;   /* 被代理服务器文件描述符 */
    char object_buf[MAX_OBJECT_SIZE];
    char buf[MAXLINE], method[MAXLINE], url[MAXLINE], uri[MAXLINE], version[MAXLINE];
    /*store the request line arguments*/
    char hostname[MAXLINE], path[MAXLINE];
    int port;
    rio_t from_client, to_endserver;
    /* 建立rio关联,再读取请求行 */
    Rio_readinitb(&from_client, connfd);
    Rio_readlineb(&from_client, buf, MAXLINE);
    sscanf(buf, "%s %s %s", method, uri, version);
    strcpy(url, uri);
    if (strcasecmp(method, "GET")) {                     
        clienterror(connfd, method, "501", "Not Implemented",
                    "Proxy Server does not implement this method");
        return;
    }
    if (reader(connfd, url)) {
        fprintf(stdout, "%s from cache\n", url);
        return;
    }
    /*从uri中解析得到 hostname path和port(port默认是80)*/
    parse_uri(uri, hostname, path, &port);

    char port_str[10];
    sprintf(port_str, "%d", port);
    endserver_fd = Open_clientfd(hostname, port_str);
    Rio_readinitb(&to_endserver, endserver_fd);
    //newreq是新的请求头
    char newreq[MAXLINE];
    sprintf(newreq, "GET %s HTTP/1.0\r\n", path); 
    build_requesthdrs(&from_client, newreq, hostname, port_str);
    //发送请求行给被代理服务器

    // printf("%s %s\n", hostname, port_str);
    // printf("%s", newreq);
    // printf("-------------------------------------------");

    Rio_writen(endserver_fd, newreq, strlen(newreq));
    int n;
    //从被代理服务器读取的信息发送给客户端
    int total_size = 0;
    while ((n = Rio_readlineb(&to_endserver, buf, MAXLINE))) {
        Rio_writen(connfd, buf, n);
        if (total_size + n < MAX_OBJECT_SIZE)
            strcpy(object_buf + total_size, buf);
        total_size += n;
    }
    if (total_size < MAX_OBJECT_SIZE)
        writer(url, object_buf);
    
    Close(endserver_fd);
}

/*parse the uri to get hostname,file path ,port*/
/*无法处理这种带get参数的奇葩的uri
    http://znsv.baidu.com/customer_search/api/ping?logid=3148635800&version=1.0&prod_id=cse&plate_url=http://home.baidu.com/home
    /index/contact_us&referrer=&time=1604631268402&page_id=content_page&source=new&site_id=6706059176758103565
*/
void parse_uri(char *uri, char *hostname, char *path, int *port) {
    *port = 80;
    //uri   http://www.cmu.edu:8080/hub/index.html
    //pos1  www.cmu.edu'\0'
    //pos2  /hub/index.html'\0'

    char* pos1 = strstr(uri,"//");
    if (pos1 == NULL) 
        pos1 = uri;
    else 
        pos1 += 2;
    
    char* pos2 = strstr(pos1, ":");
    if (pos2 != NULL) {
        *pos2 = '\0';
        strncpy(hostname, pos1, MAXLINE);
        sscanf(pos2 + 1,"%d%s", port, path);
    } else {
        pos2 = strstr(pos1,"/");
        if (pos2 == NULL) {
            strncpy(hostname, pos1, MAXLINE);
            strcpy(path,"");
            return;
        }
        *pos2 = '\0';
        strncpy(hostname, pos1, MAXLINE);
        *pos2 = '/';
        strncpy(path, pos2, MAXLINE);
    }

    return;
}


void clienterror(int fd, char *cause, char *errnum, 
         char *shortmsg, char *longmsg) {
    char buf[MAXLINE], body[MAXBUF];

    /* Build the HTTP response body */
    sprintf(body, "<html><title>Proxy Error</title>");
    sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
    sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
    sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
    sprintf(body, "%s<hr><em>The Proxy Web server</em>\r\n", body);

    /* Print the HTTP response */
    sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: text/html\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
    Rio_writen(fd, buf, strlen(buf));
    Rio_writen(fd, body, strlen(body));
}

void build_requesthdrs(rio_t *rp, char *newreq, char *hostname, char* port) {
    //already have sprintf(newreq, "GET %s HTTP/1.0\r\n", path);
    char buf[MAXLINE];

    while(Rio_readlineb(rp, buf, MAXLINE) > 0) {          
        if (!strcmp(buf, "\r\n")) break;
        if (strstr(buf,"Host:") != NULL) continue;
        if (strstr(buf,"User-Agent:") != NULL) continue;
        if (strstr(buf,"Connection:") != NULL) continue;
        if (strstr(buf,"Proxy-Connection:") != NULL) continue;
        sprintf(newreq,"%s%s", newreq, buf);
    }
    sprintf(newreq, "%sHost: %s:%s\r\n", newreq, hostname, port);
    sprintf(newreq, "%s%s%s%s", newreq, user_agent_hdr, conn_hdr, prox_hdr);
    sprintf(newreq,"%s\r\n", newreq);
}

void *thread(void *vargp) {
    int connfd = *((int *)vargp);
    Pthread_detach(pthread_self());
    Free(vargp);
    /* 执行事务 */
    doit(connfd);
    /* 关闭连接 */
    Close(connfd);
    return NULL;
}

void init_cache() {
    Sem_init(&mutex, 0, 1);
    Sem_init(&w, 0, 1);
    readcnt = 0;
    cache = (CacheLine *)Malloc(sizeof(CacheLine) * 10);
    for (int i = 0; i < 10; i++) {
        cache[i].name = (char *)Malloc(sizeof(char) * 256);
        cache[i].flag = (int *)Malloc(sizeof(int));
        cache[i].cnt = (int *)Malloc(sizeof(int));
        cache[i].object = (char *)Malloc(sizeof(char) * MAX_OBJECT_SIZE);
        *(cache[i].flag) = 0;
        *(cache[i].cnt) = 0;
    }
}

int reader(int fd, char *url) {
    int in_cache = 0;
    P(&mutex);
    readcnt++;
    if (readcnt == 1) P(&w);
    V(&mutex);
    
    for (int i = 0; i < 10; ++i) {
        if (*(cache[i].flag) == 1 && !strcmp(cache[i].name, url)) { //命中
            Rio_writen(fd, cache[i].object, MAX_OBJECT_SIZE);
            in_cache = 1;
            *(cache[i].cnt) = 0;
            break;
        }
    }
    for (int i = 0; i < 10; i++)
        (*(cache[i].cnt))++;
    P(&mutex);
    readcnt--;
    if (readcnt == 0)
        V(&w);
    V(&mutex);
    return in_cache;
}

void writer(char *url, char *buf) {
    int in_cache = 0;
    P(&w);
    for (int i = 0; i < 10; ++i) {
        if (*(cache[i].flag) == 1 && !strcmp(cache[i].name, url)) { //命中
            in_cache = 1;
            *(cache[i].cnt) = 0;
            break;
        }
    }
    //未命中替换或者插入
    if (in_cache == 0) {
        int ind = 0;
        int max_cnt = 0;
        for (int i = 0; i < 10; ++i) {
            if (*(cache[i].flag) == 0) {    //unused
                ind = i;
                break;
            }
            if (*(cache[i].cnt) > max_cnt) {
                ind = i;
                max_cnt = *(cache[i].cnt);
            }
        }
        *(cache[ind].flag) = 1;
        strcpy(cache[ind].name, url);
        strcpy(cache[ind].object, buf);
        *(cache[ind].cnt) = 0;
    }
    for (int i = 0; i < 10; i++)
        (*(cache[i].cnt))++;
    V(&w);
}


/* test
    telnet 127.0.0.1 4500
    GET http://www.cmu.edu/hub/index.html HTTP/1.1
    curl -v --proxy http://localhost:4501 http://localhost:4502/home.html
    GET http://www.cmu.edu:80/hub/index.html HTTP/1.1
    GET /hub/index.html HTTP/1.0
 
*/

最终结果

在这里插入图片描述

  • 6
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 6
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 6
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值