CSAPP的proxy lab是和shelllab比较类似的情况,有大量的书中实例代码作为参考,所以整体撰写难度不高,照猫画虎即可。part1对应着第十一章网络部分,part2对应着12章并发部分,part3对应着之前的cache章。
part1 单线程代理
第一部分为单线程代理,只需要照着书本上的tiny服务器的代码稍微修改一下即可。
首先分析一下需求,整体需求可分为三部分:
- 接受客户的连接请求,接受客户的GET信息并储存。
- 分析客户的GET信息,获得客户需要连接的目标地址和目标文件,然后将自身作为客户去连接目标地址并获取目标文件。
- 将获得的目标地址回传给客户。
整体可按上述步骤编写代码如下:
// #include <stdio.h>
#include "csapp.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400
/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3";
static const char *connection_hdr = "Connection: close";
static const char *proxy_connection_hdr = "Proxy-Connection: close";
void handleClient(int connectFd);
void readHeaders(rio_t *rp);
void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg);
int main(int argc, char **argv)
{
int listenFd, connectFd;
char clientHostName[MAXLINE], clientPort[MAXLINE];
// check form
if (argc != 2)
{
fprintf(stderr, "usage: %s <port>\n", argv[0]);
exit(1);
}
listenFd = Open_listenfd(argv[1]);
socklen_t clientLen;
struct sockaddr_storage clientAddr;
while (1)
{
clientLen = sizeof(clientAddr);
connectFd = Accept(listenFd, (SA *)& clientAddr, &clientLen);
Getnameinfo((SA *)&clientAddr, clientLen, clientHostName, MAXLINE, clientPort, MAXLINE, 0);
// printf("\n########### new proxy#########################\n");
// printf("Accept connection from (%s,%s), content is :\n", clientHostName, clientPort);
handleClient(connectFd);
Close(connectFd);
}
// printf("%s", user_agent_hdr);
return 0;
}
void handleClient(int connectFd){
int i, n;
char buf[MAXLINE], contentProxyToServer[MAXLINE], request[MAXLINE], contentBackToClinet[MAXLINE];
char method[32], uri[1024], httpVersion[64], targetHostNameAndPort[128], fileName[128];
strcpy(fileName, "");
rio_t clientRio, targetRio;
Rio_readinitb(&clientRio, connectFd);
if (!Rio_readlineb(&clientRio, request, MAXLINE)) // GET requests
return; // empty requests
// printf("%s", request);
// readHeaders(&clientRio);
sscanf(request, "%s %s %s", method, uri, httpVersion);
if (strcasecmp(method, "GET")) // check if is GET method
{
printf("Proxy does not implement the method");
return;
}
strcpy(httpVersion, "HTTP/1.0");
char *p = strstr(uri, "http://");
if (!p) { // if "http://" not in uri
p = uri;
} else{
p += 7; // p skip "http://" to host name
}
i = 0;
while (*p != '/')
{
targetHostNameAndPort[i++] = *p;
p += 1;
}
targetHostNameAndPort[i] = '\0';
// finish the content from proxy to target server
strcpy(fileName, p);
sprintf(contentProxyToServer, "%s %s %s\r\n", method, fileName, httpVersion);
sprintf(contentProxyToServer, "%sHost: %s\r\n", contentProxyToServer,targetHostNameAndPort);
sprintf(contentProxyToServer, "%s%s\r\n", contentProxyToServer, user_agent_hdr);
sprintf(contentProxyToServer, "%s%s\r\n", contentProxyToServer, connection_hdr);
sprintf(contentProxyToServer, "%s%s\r\n\r\n", contentProxyToServer, proxy_connection_hdr);
char targetServerName[64], targetPort[16];
p = strstr(targetHostNameAndPort, ":");
if(!p){
// no port specifiction, then choose http port 80
strcpy(targetServerName, targetHostNameAndPort);
strcpy(targetPort, "80");
}
else{
strcpy(targetPort, p+1);
for ( i = 0; targetHostNameAndPort[i] != ':'; i++)
{
targetServerName[i] = targetHostNameAndPort[i];
}
targetServerName[i] = '\0';
}
// printf("open a connection to target\ncontent is\n%s", contentProxyToServer);
// open a connect from proxy to target
int connectToTargetFd = Open_clientfd(targetServerName, targetPort);
if (connectToTargetFd < 0){ // connect to server
clienterror(connectFd, method, "404", "connect failure",
"the proxy server connect to target server failure");
return;
}
Rio_writen(connectToTargetFd, contentProxyToServer, strlen(contentProxyToServer));
Rio_readinitb(&targetRio, connectToTargetFd);
while( (n = Rio_readlineb(&targetRio, buf, MAXLINE)) > 0 ){
Rio_writen(connectFd, buf, n);
}
// printf("\ncontent back to client is: \n%s", contentBackToClinet);
Close(connectToTargetFd);
}
void readHeaders(rio_t *rp){
// char buf[MAXLINE];
// Rio_readlineb(rp, buf, MAXLINE);
// printf("%s", buf);
// while (strcmp(buf, "\r\n"))
// {
// Rio_readlineb(rp, buf, MAXLINE);
// printf("%s", buf);
// }
return ;
}
void clienterror(int fd, char *cause, char *errnum,
char *shortmsg, char *longmsg)
{
char buf[MAXLINE], body[MAXBUF];
/* Build the HTTP response body */
sprintf(body, "<html><title>Tiny Error</title>");
sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
sprintf(body, "%s<hr><em>The Tiny Web server</em>\r\n", body);
/* Print the HTTP response */
sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
Rio_writen(fd, buf, strlen(buf));
sprintf(buf, "Content-type: text/html\r\n");
Rio_writen(fd, buf, strlen(buf));
sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
Rio_writen(fd, buf, strlen(buf));
Rio_writen(fd, body, strlen(body));
}
整体按照tiny服务器器代码改写,最大的区别是tiny服务器只写了Server端的代码,在代理程序代码中还有作为客户端去连接原先客户要连接的目标服务器的代码。
在这里有一个小坑,我之前是这么写的回写到客户fd的代码:
char contentToClient[1024];
strcpy(contentToClient, "");
while( (n = Rio_readlineb(&targetRio, buf, MAXLINE)) > 0 ){
strcat(contentToClient, buf);
}
Rio_writen(connectFd, contentToClient, n);
乍看没啥问题,但其实只对传输文本文件(如.html .txt)是正确的,对于.jpeg格式和.gif格式的图片等会出现问题。原因是图片类的二进制表示和ascii没啥关系,导致strcat出现错乱。
part2 并发代理
根据12章的内容,我们选择多线程并发,因为代码书写较为简洁、改造方便。
代码如下,只需要稍加改造,将线程的行为函数改写成线性时候的handleClient即可:
// #include <stdio.h>
#include "csapp.h"
/* Recommended max cache and object sizes */
#define MAX_CACHE_SIZE 1049000
#define MAX_OBJECT_SIZE 102400
/* You won't lose style points for including this long line in your code */
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3";
static const char *connection_hdr = "Connection: close";
static const char *proxy_connection_hdr = "Proxy-Connection: close";
void handleClient(int connectFd);
void readHeaders(rio_t *rp);
void clienterror(int fd, char *cause, char *errnum, char *shortmsg, char *longmsg);
void *threadHandle(void *vargp);
int main(int argc, char **argv)
{
int listenFd, *connectFdp;
char clientHostName[512], clientPort[8];
pthread_t pid;
// check form
if (argc != 2)
{
fprintf(stderr, "usage: %s <port>\n", argv[0]);
exit(1);
}
listenFd = Open_listenfd(argv[1]);
socklen_t clientLen;
struct sockaddr_storage clientAddr;
while (1)
{
clientLen = sizeof(clientAddr);
connectFdp = malloc(sizeof(int));
*connectFdp = Accept(listenFd, (SA *)& clientAddr, &clientLen);
Getnameinfo((SA *)&clientAddr, clientLen, clientHostName, 512, clientPort, 8, 0);
// printf("Accept connection from (%s,%s)\n", clientHostName, clientPort);
// handleClient(connectFd);
Pthread_create(&pid, NULL, threadHandle, connectFdp);
}
// printf("%s", user_agent_hdr);
return 0;
}
void *threadHandle(void *vargp){
Pthread_detach(pthread_self());
// printf("\n########### new proxy########################\n\n");
// printf("thread id %lu is handling the request\n", pthread_self());
int connectFd = *(int *)vargp;
int i, n;
char buf[MAXLINE], contentProxyToServer[MAXLINE], request[MAXLINE], contentBackToClinet[MAXLINE];
char method[32], uri[1024], httpVersion[64], targetHostNameAndPort[128], fileName[128];
strcpy(fileName, "");
rio_t clientRio, targetRio;
Rio_readinitb(&clientRio, connectFd);
if (!Rio_readlineb(&clientRio, request, MAXLINE)) // GET requests
return; // empty requests
// printf("%s", request);
readHeaders(&clientRio);
sscanf(request, "%s %s %s", method, uri, httpVersion);
if (strcasecmp(method, "GET"))
{
printf("Proxy does not implement the method");
return;
}
strcpy(httpVersion, "HTTP/1.0");
char *p = strstr(uri, "http://");
if (!p) { // if "http://" not in uri
p = uri;
} else{
p += 7; // p skip "http://" to host name
}
i = 0;
while (*p != '/')
{
targetHostNameAndPort[i++] = *p;
p += 1;
}
targetHostNameAndPort[i] = '\0';
// finish the content from proxy to target server
strcpy(fileName, p);
sprintf(contentProxyToServer, "%s %s %s\r\n", method, fileName, httpVersion);
sprintf(contentProxyToServer, "%sHost: %s\r\n", contentProxyToServer,targetHostNameAndPort);
sprintf(contentProxyToServer, "%s%s\r\n", contentProxyToServer, user_agent_hdr);
sprintf(contentProxyToServer, "%s%s\r\n", contentProxyToServer, connection_hdr);
sprintf(contentProxyToServer, "%s%s\r\n\r\n", contentProxyToServer, proxy_connection_hdr);
char targetServerName[64], targetPort[16];
p = strstr(targetHostNameAndPort, ":");
if(!p){
// no port specifiction, then choose http port 80
strcpy(targetServerName, targetHostNameAndPort);
strcpy(targetPort, "80");
}
else{
strcpy(targetPort, p+1);
for ( i = 0; targetHostNameAndPort[i] != ':'; i++)
{
targetServerName[i] = targetHostNameAndPort[i];
}
targetServerName[i] = '\0';
}
// printf("open a connection to target\ncontent is\n%s", contentProxyToServer);
// open a connect from proxy to target
int connectToTargetFd = Open_clientfd(targetServerName, targetPort);
if (connectToTargetFd < 0){ // connect to server
clienterror(connectFd, method, "404", "connect failure",
"the proxy server connect to target server failure");
return;
}
Rio_writen(connectToTargetFd, contentProxyToServer, strlen(contentProxyToServer));
Rio_readinitb(&targetRio, connectToTargetFd);
strcpy(contentBackToClinet, ""); // 先将其设置为空,避免出现之前的缓存没有分配的情况
while( (n = Rio_readlineb(&targetRio, buf, MAXLINE)) != 0 ){
// strcat(contentBackToClinet, buf);
Rio_writen(connectFd, buf, n);
}
// printf("\ncontent back to client is: \n%s", contentBackToClinet);
Close(connectToTargetFd);
Close(connectFd);
Free(vargp);
return NULL;
}
void readHeaders(rio_t *rp){
// char buf[MAXLINE];
// Rio_readlineb(rp, buf, MAXLINE);
// printf("%s", buf);
// while (strcmp(buf, "\r\n"))
// {
// Rio_readlineb(rp, buf, MAXLINE);
// printf("%s", buf);
// }
return ;
}
void clienterror(int fd, char *cause, char *errnum,
char *shortmsg, char *longmsg)
{
char buf[MAXLINE], body[MAXBUF];
/* Build the HTTP response body */
sprintf(body, "<html><title>Tiny Error</title>");
sprintf(body, "%s<body bgcolor=""ffffff"">\r\n", body);
sprintf(body, "%s%s: %s\r\n", body, errnum, shortmsg);
sprintf(body, "%s<p>%s: %s\r\n", body, longmsg, cause);
sprintf(body, "%s<hr><em>The Tiny Web server</em>\r\n", body);
/* Print the HTTP response */
sprintf(buf, "HTTP/1.0 %s %s\r\n", errnum, shortmsg);
Rio_writen(fd, buf, strlen(buf));
sprintf(buf, "Content-type: text/html\r\n");
Rio_writen(fd, buf, strlen(buf));
sprintf(buf, "Content-length: %d\r\n\r\n", (int)strlen(body));
Rio_writen(fd, buf, strlen(buf));
Rio_writen(fd, body, strlen(body));
}
需要注意的是,对每个新的连接请求都要malloc出来一个fd的位置,然后将该fd的指针传给线程函数作为参数,在线程函数中注意将线程detach。然后在最后结束时free掉为储存该客户fd malloc的位置。然后在线程中关闭对客户和目标的fd。
part3 加入缓存的并发代理
待我做完cache lab再来填坑~