微型Web服务器

本文介绍了作者为了工作需求实现的一个微型Web服务器,该服务器能接收并处理Get和Post请求。对于Get请求,服务器会转发给原始服务器,解析响应的HTML并插入一段脚本,用于在页面加载后分析结构并Post给服务器。文章详细描述了处理过程,包括URL处理、数据解码、注入脚本等步骤,并提供了部分源码。
摘要由CSDN通过智能技术生成

      工作需要,最近写了一个小的Web服务器,在指定端口监听,接收客户的Get和Post请求。收到客户的Get请求后将请求直接转发给原始服务器,然后将原始服务器返回的html文档解析,插入一段脚本后再返回给客户。这段脚本会在页面加载完毕时分析网页结构,将结构信息Post给Web服务器,然后再转发给处理程序。

      转发Get时,需要获取主机名和主机名下的网页地址,然后根据主机名获取主机IP地址,然后连接主机,如果URL上没有端口号则以默认端口号80连接。主机名需要赋值下来,因为注入脚本时需要在head中加base标签。Get请求需要修改两处:一是Get节的URL,一是Host节的主机名。将此请求发给服务器后,首先尝试接收一个回应包,寻找Content-Length这个节,这个节保存了HTML正文的长度,然后寻找http协议末尾标识符/r/n/r/n,一直接收完指定长度数据后关闭连接。如果没有长度则一直读到对方关闭连接为止。

      接收完正文后,首先需要检测Content-Type是否为text/html,这表示正文是一个html文档,不是图片等其他数据。如果是text/html则需要注入脚本。注入脚本前需要检测Transfer-Encoding是否为chunked,如果是则表示正文使用了chunked编码,需要先进行chunked解码。然后检测Content-Encoding是否为gzip,如果是则需要进行gzip解码。最后将脚本注入html文本的最前面,将<base href=http://hostname/ />注入head标记的最前面。必要的话需要对修改的文本进行gzip编码和chunked编码以及修正文本长度节Content-Length。

      chunked编码格式为: 文本长度+/r/n+文本+/r/n,以0/r/n/r/n结尾,文本长度为16进制数据。某些服务器返回的数据包在0/r/n/r/n的末尾可能还有数据,如http://www.ahut.edu.cn 。这些都是无效数据,需要滤除。gzip编解码可以使用开源项目Zlib的deflate和inflate,初始化时分别用windowBits为47的inflateInit2和windowBits为-15的deflateInit2。解码可以直接进行,编码时需要先加"/x1f/x8b/x08/x00/x00/x00/x00/x00/x00/xff"这十个字符数据作为开始,然后加入编码后的数据,最后在末尾加上4个字节的crc校验值和4个字节的数据真实长度作为结尾,以大端模式编码。

      注入脚本的base标签是为了防止页面中的相对地址无法解析,其他脚本的工作就是遍历DOM树,然后将数据编码后Post给Web服务器。DOM树中可能会出现树枝交叉现象,如http://www.google.com.hk ,所以当栈空时就要提前退出。脚本最好全部注入到head标记的最前面,如果注入到html标记的外面可能改变页面的显示。Post数据是直接附在http协议头的后面的,定位到协议头结尾标记/r/n/r/n后就可以获取数据了。某些浏览器在提交数据前会发送一个Option请求,如Google浏览器。

 

源码:

// PostDataReceiver.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"
#include <Winsock2.h>
#pragma comment(lib,"ws2_32.lib")

#include "GZip.h"

#ifndef ASSERT
#define ASSERT(x)
#endif

#ifndef VERIFY
#define VERIFY(x) ((void)(x))
#endif

CHAR cBuffer[1024000];
CHAR cScript[10240];

static PCHAR strstrnocase(IN PCHAR pStr,
        IN PCHAR pSub)
{
 UINT32 iSubLen = (UINT32)strlen(pSub);
 PCHAR pStart=pStr, pEnd=pStr+strlen(pStr)-iSubLen;

 for(PCHAR pScan=pStart; pScan<pEnd; ++pScan)
 {
  if(!_strnicmp(pScan, pSub, iSubLen))
  {
   return pScan;
  }
 }

 return NULL;
}

static UINT32 GetDecLen(IN UINT32 iData)
{
 UINT32 iLen = 0;

 while(iData > 0)
 {
  ++iLen;
  iData /= 10;
 }

 return iLen;
}

static UINT32 InjectScript(IN OUT PCHAR pHttpDataStart,
         IN UINT32 iHttpDataLen,
         IN LPCSTR lpszScript,
         IN LPCSTR lpszBaseUrl)
{
  UINT32 iScriptLen = (UINT32)strlen(lpszScript);
 UINT32 iBaseUrlLen = (UINT32)strlen(lpszBaseUrl);
 UINT32 iInjectLen = iScriptLen + iBaseUrlLen;
 PCHAR pInjectPos = pHttpDataStart;

 PCHAR pHeadSectionStart = strstrnocase(pHttpDataStart, "<head");
 if(pHeadSectionStart != NULL)
 {
  pInjectPos = strstr(pHeadSectionStart, ">") + 1;
 }

 for(PCHAR pTemp=&pHttpDataStart[iHttpDataLen]; pTemp>=pInjectPos; --pTemp)
 {
  *(pTemp+iInjectLen) = *pTemp;
 }

 memcpy_s(pInjectPos, iScriptLen, lpszScript, iScriptLen);
 memcpy_s(pInjectPos+iScriptLen, iBaseUrlLen, lpszBaseUrl, iBaseUrlLen);
 return iInjectLen;

}

BOOL DataChunk(IN OUT PCHAR pData,
      IN UINT32 iDataLen,
      OUT PUINT32 pChunkLen)
{
 CHAR pLenBuffer[MAX_PATH] = {0};
 sprintf_s(pLenBuffer, MAX_PATH, "%x", iDataLen);
 strcat_s(pLenBuffer, MAX_PATH, "/r/n");
 UINT32 iOffset = (UINT32)strlen(pLenBuffer);
 PCHAR pCopyEnd = pData + iDataLen;

 for(PCHAR pTemp=pCopyEnd; pTemp>=pData; --pTemp)
 {
  *(pTemp+iOffset) = *pTemp;
 }

 memcpy_s(pData, iOffset, pLenBuffer, iOffset);
 memcpy_s(pCopyEnd+iOffset, 7, "/r/n0/r/n/r/n", 7);
 *pChunkLen = iDataLen + iOffset + 7;
 return TRUE;
}

BOOL DataUnchunk(IN OUT PCHAR pData,
     IN UINT32 iDataLen,
     OUT PUINT32 pUnchunkLen)
{
 *pUnchunkLen = iDataLen;

 UINT32 iSectionLen = 0;
 PCHAR pScanPos = pData, pDataPos = pData, pEndPos = pData+iDataLen;

 while(pScanPos < pEndPos)
 {
  sscanf_s(pScanPos, "%x", &iSectionLen);

  if(iSectionLen==0 || pScanPos+iSectionLen>pEndPos)
  {
   break;
  }

  while(*pScanPos++ != '/n');

  UINT32 iOffset = (UINT32)(pScanPos - pDataPos);
  PCHAR pCopyEnd = pDataPos + iSectionLen;
  for(PCHAR pTemp=pDataPos; pTemp<pCopyEnd; ++pTemp)
  {
   *pTemp = *(pTemp+iOffset);
  }

  pDataPos += iSectionLen;
  pScanPos += iSectionLen + 2;
 }

 *pUnchunkLen = (UINT32)(pDataPos-pData);
 return TRUE;
}

BOOL RedirectGetRequest(IN OUT PCHAR pBuffer,
      OUT PUINT32 pGetLen,
      OUT PBOOL pNeedInject,
      OUT PCHAR pBaseUrl)
{
 *pGetLen = 0;
 *pNeedInject = FALSE;

 PCHAR pGetStart = pBuffer;
 PCHAR pHostStart = strstr(pBuffer, " ") + 2;
 PCHAR pPortStart = strstr(pHostStart, ":");
 PCHAR pUrlStart = strstr(pHostStart, "/");
 PCHAR pHttpStart = strstr(pHostStart, " ") + 1;
 PCHAR pNextSectionStart = strstr(pBuffer, "/r/n") + 2;
 PCHAR pRequestEnd = pBuffer + strlen(pBuffer);

 *(pHostStart-2) = '/0';
 *(pHttpStart-1) = '/0';
 *(pNextSectionStart-2) = '/0';

 if(pUrlStart > pHttpStart)
 {
  sprintf_s(pBaseUrl, MAX_PATH, "<base href=/"http://%s// " />", pHostStart);
  pUrlStart = "";
 }
 else
 {
  PCHAR pDirEnd = strrchr(pHostStart, '/');
  *pDirEnd = '/0';
  sprintf_s(pBaseUrl, MAX_PATH, "<base href=/"http://%s// " />", pHostStart);
  *pDirEnd = '/';
  *pUrlStart++ = '/0';
 }

 UINT32 iPort = 80;
 if(pPortStart!=NULL && pHostStart<pPortStart && pPortStart<pHttpStart)
 {
  *(pPortStart++) = '/0';
  sscanf_s(pPortStart, "%d", &iPort);
 }

 LPHOSTENT pHostEntry = gethostbyname(pHostStart);
 if(pHostEntry == NULL)
 {
  return FALSE;
 }

 struct sockaddr_in remoteAddr;
 remoteAddr.sin_addr.s_addr = *((u_long FAR *)(pHostEntry->h_addr));
 remoteAddr.sin_family = AF_INET;
 remoteAddr.sin_port = htons(iPort);

 SOCKET sSocket=socket(AF_INET, SOCK_STREAM, IPPROTO_IP);

 if(connect(sSocket, (LPSOCKADDR)&remoteAddr, sizeof(SOCKADDR)) != SOCKET_ERROR)
 {
  UINT32 iOffset = 0;
  UINT32 iHostLen = (UINT32)strlen(pHostStart);
  PCHAR pHostSectionStart = strstr(pNextSectionStart, "Host:") + 6;
  PCHAR pHostSectionEnd = strstr(pHostSectionStart, "/r/n");

  if((UINT32)(pHostSectionEnd-pHostSectionStart) > iHostLen)
  {
   iOffset = (UINT32)(pHostSectionEnd - pHostSectionStart) - iHostLen;
   for(PCHAR pTemp=pHostSectionEnd; pTemp<=pRequestEnd; ++pTemp)
   {
    *(pTemp-iOffset) = *pTemp;
   }
  }
  else if((UINT32)(pHostSectionEnd-pHostSectionStart) < iHostLen)
  {
   iOffset = iHostLen - (pHostSectionEnd - pHostSectionStart);
   for(PCHAR pTemp=pRequestEnd; pTemp>=pHostSectionEnd; --pTemp)
   {
    *(pTemp+iOffset) = *pTemp;
   }
  }

  memcpy(pHostSectionStart, pHostStart, iHostLen);

  CHAR cGetSectionBuffer[MAX_PATH];
  sprintf_s(cGetSectionBuffer, MAX_PATH, "%s /%s %s/r/n", pGetStart, pUrlStart, pHttpStart);
  UINT32 iGetSectionLen = (UINT32)strlen(cGetSectionBuffer);

  if((UINT32)(pNextSectionStart-pBuffer) > iGetSectionLen)
  {
   iOffset = (UINT32)(pNextSectionStart - pBuffer) - iGetSectionLen;
   for(PCHAR pTemp=pNextSectionStart; pTemp<=pRequestEnd; ++pTemp)
   {
    *(pTemp-iOffset) = *pTemp;
   }
  }
  else if((UINT32)(pNextSectionStart-pBuffer) < iGetSectionLen)
  {
   iOffset = iGetSectionLen - (UINT32)(pNextSectionStart - pBuffer);
   for(PCHAR pTemp=pRequestEnd; pTemp>=pNextSectionStart; --pTemp)
   {
    *(pTemp+iOffset) = *pTemp;
   }
  }

  memcpy(pBuffer, cGetSectionBuffer, iGetSectionLen);
  send(sSocket, cBuffer, (UINT32)(strlen(cBuffer)), 0);
  *pGetLen = recv(sSocket, cBuffer, sizeof(cBuffer)/sizeof(CHAR)-1, 0);

  UINT32 iResponseCode = 0;
  PCHAR pResponseCodeStart = strstr(cBuffer, " ") + 1;
  sscanf_s(pResponseCodeStart, "%d", &iResponseCode);

  if(iResponseCode == 200)
  {
    UINT32 iHttpDataLen = 0xFFFFFFFF;
    PCHAR pLengthStart = strstr(cBuffer, "Content-Length:") + 16;
    if((UINT32)(UINT64)pLengthStart != 16)
    {
     sscanf_s(pLengthStart, "%d", &iHttpDataLen);
    }
 
    PCHAR pHttpDataStart = strstr(cBuffer, "/r/n/r/n") + 4;
    UINT32 iHeadLen = (UINT32)(pHttpDataStart - cBuffer);
 
    while(*pGetLen-iHeadLen < iHttpDataLen)
    {
    UINT32 iRecvLen = recv(sSocket, cBuffer+*pGetLen, sizeof(cBuffer)/sizeof(CHAR)-*pGetLen-1, 0);

    if(iRecvLen==0 || iRecvLen==SOCKET_ERROR)
    {
     break;
    }

    *pGetLen += iRecvLen;
    } 

   PCHAR pContentTypeStart = strstr(cBuffer, "Content-Type:") + 14;
   if((UINT32)(UINT64)pContentTypeStart!=14 && !strncmp(pContentTypeStart, "text/html", 8))
   {
    *pNeedInject = TRUE;
   }
  }

  cBuffer[*pGetLen] = '/0';
  return TRUE;
 }

 return FALSE;
}

BOOL SolveResponse(IN OUT PCHAR pBuffer,
       IN OUT PUINT32 pGetLen,
       IN LPCSTR lpszScript,
       IN LPCSTR lpszBaseUrl)
{
 PCHAR pHttpDataStart = strstr(cBuffer, "/r/n/r/n") + 4;
 UINT32 iHeadLen = (UINT32)(pHttpDataStart - cBuffer);
 UINT32 iOldHttpDataLen = *pGetLen - iHeadLen;
 UINT32 iHttpDataLen = iOldHttpDataLen;

 PCHAR pTransferKindStart = strstr(cBuffer, "Transfer-Encoding:") + 19;
 if((UINT32)(UINT64)(pTransferKindStart) != 19
  && !strncmp(pTransferKindStart, "chunked", 7))
 {
  DataUnchunk(pHttpDataStart, iHttpDataLen, &iHttpDataLen);
 }

 PCHAR pEncodeKindStart = strstr(cBuffer, "Content-Encoding:") + 18;
 if((UINT32)(UINT64)(pEncodeKindStart) == 18)
 {
  iHttpDataLen += InjectScript(pHttpDataStart, iHttpDataLen, lpszScript, lpszBaseUrl);
 }
 else if(!strncmp(pEncodeKindStart, "gzip", 4))
 {
  PCHAR pUnPackedBuffer = new CHAR [iHttpDataLen * 8];
  ULONG iUnPackedLen = iHttpDataLen * 8;

  if(httpgzdecompress((PBYTE)pHttpDataStart, iHttpDataLen, (PBYTE)pUnPackedBuffer, &iUnPackedLen) == 0)
  {
   iUnPackedLen += InjectScript(pUnPackedBuffer, iUnPackedLen, lpszScript, lpszBaseUrl);

   if(httpgzcompress((PBYTE)pUnPackedBuffer, iUnPackedLen, (PBYTE)pHttpDataStart, (PULONG)&iUnPackedLen) == 0)
   {
    iHttpDataLen = iUnPackedLen;
   }
   else
   {
    printf("compress error!/r/n");
   }
  }
  else
  {
   printf("decompress error!/r/n");
  }

  delete [] pUnPackedBuffer;
 }
 else
 {
  printf("Unknown Content-Encoding: %4s!/r/n", pEncodeKindStart);
 }

 if((UINT32)(UINT64)(pTransferKindStart) != 19 && !strncmp(pTransferKindStart, "chunked", 7))
 {
  DataChunk(pHttpDataStart, iHttpDataLen, &iHttpDataLen);
 }

 PCHAR pLengthStart = strstr(pBuffer, "Content-Length:") + 16;
 if((UINT32)(UINT64)pLengthStart != 16)
 {
  UINT32 iOffset = GetDecLen(iHttpDataLen) - GetDecLen(iOldHttpDataLen);

  if(iOffset > 0)
  {
   for(PCHAR pTemp=&pBuffer[iHeadLen+iHttpDataLen]; pTemp>=pLengthStart; --pTemp)
   {
    *(pTemp+iOffset) = *pTemp;
   }
  }

  CHAR cLengthBuffer[MAX_PATH];
  sprintf_s(cLengthBuffer, MAX_PATH, "%ld", iHttpDataLen);
  memcpy_s(pLengthStart, MAX_PATH, cLengthBuffer, GetDecLen(iHttpDataLen));
  *pGetLen = iHeadLen + iOffset + iHttpDataLen;
 }
 else
 {
  *pGetLen = iHeadLen + iHttpDataLen;
 }

 return TRUE;
}

int _tmain(int argc, _TCHAR* argv[])
{
 FILE *fp = NULL;
 fopen_s(&fp, "Inject2.txt", "rb");
 fread(cScript, sizeof(cScript)/sizeof(CHAR), 1, fp);
 fclose(fp);

 WSADATA wsaData;
 WORD wVersionRequested = MAKEWORD(2, 2);
 VERIFY(WSAStartup(wVersionRequested, &wsaData) == 0);

 struct sockaddr_in localAddr;
 localAddr.sin_family = AF_INET;
 localAddr.sin_port = htons(1234);
 localAddr.sin_addr.s_addr = htonl(INADDR_ANY);

 SOCKET sServer;
 VERIFY((sServer=socket(AF_INET, SOCK_STREAM, IPPROTO_IP)) != INVALID_SOCKET);
 VERIFY(bind(sServer, (struct sockaddr *)&localAddr, sizeof(localAddr)) != SOCKET_ERROR);
 VERIFY(listen(sServer, 5) == 0);


 while(TRUE)
 {
  struct sockaddr addr;
  INT32 iAddrSize = sizeof(addr);
  SOCKET sClient = accept(sServer, &addr, &iAddrSize);

  memset(cBuffer, 0, sizeof(cBuffer)/sizeof(CHAR));
  cBuffer[recv(sClient, cBuffer, sizeof(cBuffer)/sizeof(CHAR)-1, 0)] = '/0';

  if(!strncmp(cBuffer, "GET", 3))
  {
   system("cls");
   printf_s("Connection Request Come!/r/n");

   UINT32 iGetLen = 0;
   BOOL bNeedInject = FALSE;
   CHAR cBaseUrl[MAX_PATH] = {0};
   if(RedirectGetRequest(cBuffer, &iGetLen, &bNeedInject, cBaseUrl))
   {
    if(bNeedInject)
    {
     SolveResponse(cBuffer, &iGetLen, cScript, cBaseUrl);
    }

    send(sClient, cBuffer, iGetLen, 0);
    printf_s("Redirect Data Sucessfully!/r/n");
   }
   else
   {
    printf_s("Redirect Data ERROR!/r/n");
   }
  }
  if(!strncmp(cBuffer, "POST", 4))
  {
   UINT32 iDataLen = (UINT32)strlen(cBuffer+5)+5;

   if(!strncmp(cBuffer+iDataLen-4, "/r/n/r/n", 4))
   {
    cBuffer[recv(sClient, cBuffer, sizeof(cBuffer)/sizeof(CHAR)-1, 0)] = '/0';
   }
   else
   {
    strcpy_s(cBuffer, iDataLen, strstr(cBuffer, "/r/n/r/n")+4);
   }

   for(PCHAR pPos=cBuffer; *pPos; ++pPos)
   {
    if(*pPos == -62)
    {
     *pPos = ' ';
    }
   }

   WCHAR wBuffer[10240];
   ::MultiByteToWideChar(CP_UTF8, 0, cBuffer, -1, wBuffer, 10000);
   ::WideCharToMultiByte(CP_ACP, 0, wBuffer, -1, cBuffer, 10000, NULL, NULL);
   printf_s("Data Received: %s/r/n", cBuffer);
  }

  shutdown(sClient, SD_RECEIVE);
  closesocket(sClient);
 }

 return 0;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值