工作需要,最近写了一个小的Web服务器,在指定端口监听,接收客户的Get和Post请求。收到客户的Get请求后将请求直接转发给原始服务器,然后将原始服务器返回的html文档解析,插入一段脚本后再返回给客户。这段脚本会在页面加载完毕时分析网页结构,将结构信息Post给Web服务器,然后再转发给处理程序。
转发Get时,需要获取主机名和主机名下的网页地址,然后根据主机名获取主机IP地址,然后连接主机,如果URL上没有端口号则以默认端口号80连接。主机名需要赋值下来,因为注入脚本时需要在head中加base标签。Get请求需要修改两处:一是Get节的URL,一是Host节的主机名。将此请求发给服务器后,首先尝试接收一个回应包,寻找Content-Length这个节,这个节保存了HTML正文的长度,然后寻找http协议末尾标识符/r/n/r/n,一直接收完指定长度数据后关闭连接。如果没有长度则一直读到对方关闭连接为止。
接收完正文后,首先需要检测Content-Type是否为text/html,这表示正文是一个html文档,不是图片等其他数据。如果是text/html则需要注入脚本。注入脚本前需要检测Transfer-Encoding是否为chunked,如果是则表示正文使用了chunked编码,需要先进行chunked解码。然后检测Content-Encoding是否为gzip,如果是则需要进行gzip解码。最后将脚本注入html文本的最前面,将<base href=http://hostname/ />注入head标记的最前面。必要的话需要对修改的文本进行gzip编码和chunked编码以及修正文本长度节Content-Length。
chunked编码格式为: 文本长度+/r/n+文本+/r/n,以0/r/n/r/n结尾,文本长度为16进制数据。某些服务器返回的数据包在0/r/n/r/n的末尾可能还有数据,如http://www.ahut.edu.cn 。这些都是无效数据,需要滤除。gzip编解码可以使用开源项目Zlib的deflate和inflate,初始化时分别用windowBits为47的inflateInit2和windowBits为-15的deflateInit2。解码可以直接进行,编码时需要先加"/x1f/x8b/x08/x00/x00/x00/x00/x00/x00/xff"这十个字符数据作为开始,然后加入编码后的数据,最后在末尾加上4个字节的crc校验值和4个字节的数据真实长度作为结尾,以大端模式编码。
注入脚本的base标签是为了防止页面中的相对地址无法解析,其他脚本的工作就是遍历DOM树,然后将数据编码后Post给Web服务器。DOM树中可能会出现树枝交叉现象,如http://www.google.com.hk ,所以当栈空时就要提前退出。脚本最好全部注入到head标记的最前面,如果注入到html标记的外面可能改变页面的显示。Post数据是直接附在http协议头的后面的,定位到协议头结尾标记/r/n/r/n后就可以获取数据了。某些浏览器在提交数据前会发送一个Option请求,如Google浏览器。
源码:
// PostDataReceiver.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include <Winsock2.h>
#pragma comment(lib,"ws2_32.lib")
#include "GZip.h"
#ifndef ASSERT
#define ASSERT(x)
#endif
#ifndef VERIFY
#define VERIFY(x) ((void)(x))
#endif
CHAR cBuffer[1024000];
CHAR cScript[10240];
static PCHAR strstrnocase(IN PCHAR pStr,
IN PCHAR pSub)
{
UINT32 iSubLen = (UINT32)strlen(pSub);
PCHAR pStart=pStr, pEnd=pStr+strlen(pStr)-iSubLen;
for(PCHAR pScan=pStart; pScan<pEnd; ++pScan)
{
if(!_strnicmp(pScan, pSub, iSubLen))
{
return pScan;
}
}
return NULL;
}
static UINT32 GetDecLen(IN UINT32 iData)
{
UINT32 iLen = 0;
while(iData > 0)
{
++iLen;
iData /= 10;
}
return iLen;
}
static UINT32 InjectScript(IN OUT PCHAR pHttpDataStart,
IN UINT32 iHttpDataLen,
IN LPCSTR lpszScript,
IN LPCSTR lpszBaseUrl)
{
UINT32 iScriptLen = (UINT32)strlen(lpszScript);
UINT32 iBaseUrlLen = (UINT32)strlen(lpszBaseUrl);
UINT32 iInjectLen = iScriptLen + iBaseUrlLen;
PCHAR pInjectPos = pHttpDataStart;
PCHAR pHeadSectionStart = strstrnocase(pHttpDataStart, "<head");
if(pHeadSectionStart != NULL)
{
pInjectPos = strstr(pHeadSectionStart, ">") + 1;
}
for(PCHAR pTemp=&pHttpDataStart[iHttpDataLen]; pTemp>=pInjectPos; --pTemp)
{
*(pTemp+iInjectLen) = *pTemp;
}
memcpy_s(pInjectPos, iScriptLen, lpszScript, iScriptLen);
memcpy_s(pInjectPos+iScriptLen, iBaseUrlLen, lpszBaseUrl, iBaseUrlLen);
return iInjectLen;
}
BOOL DataChunk(IN OUT PCHAR pData,
IN UINT32 iDataLen,
OUT PUINT32 pChunkLen)
{
CHAR pLenBuffer[MAX_PATH] = {0};
sprintf_s(pLenBuffer, MAX_PATH, "%x", iDataLen);
strcat_s(pLenBuffer, MAX_PATH, "/r/n");
UINT32 iOffset = (UINT32)strlen(pLenBuffer);
PCHAR pCopyEnd = pData + iDataLen;
for(PCHAR pTemp=pCopyEnd; pTemp>=pData; --pTemp)
{
*(pTemp+iOffset) = *pTemp;
}
memcpy_s(pData, iOffset, pLenBuffer, iOffset);
memcpy_s(pCopyEnd+iOffset, 7, "/r/n0/r/n/r/n", 7);
*pChunkLen = iDataLen + iOffset + 7;
return TRUE;
}
BOOL DataUnchunk(IN OUT PCHAR pData,
IN UINT32 iDataLen,
OUT PUINT32 pUnchunkLen)
{
*pUnchunkLen = iDataLen;
UINT32 iSectionLen = 0;
PCHAR pScanPos = pData, pDataPos = pData, pEndPos = pData+iDataLen;
while(pScanPos < pEndPos)
{
sscanf_s(pScanPos, "%x", &iSectionLen);
if(iSectionLen==0 || pScanPos+iSectionLen>pEndPos)
{
break;
}
while(*pScanPos++ != '/n');
UINT32 iOffset = (UINT32)(pScanPos - pDataPos);
PCHAR pCopyEnd = pDataPos + iSectionLen;
for(PCHAR pTemp=pDataPos; pTemp<pCopyEnd; ++pTemp)
{
*pTemp = *(pTemp+iOffset);
}
pDataPos += iSectionLen;
pScanPos += iSectionLen + 2;
}
*pUnchunkLen = (UINT32)(pDataPos-pData);
return TRUE;
}
BOOL RedirectGetRequest(IN OUT PCHAR pBuffer,
OUT PUINT32 pGetLen,
OUT PBOOL pNeedInject,
OUT PCHAR pBaseUrl)
{
*pGetLen = 0;
*pNeedInject = FALSE;
PCHAR pGetStart = pBuffer;
PCHAR pHostStart = strstr(pBuffer, " ") + 2;
PCHAR pPortStart = strstr(pHostStart, ":");
PCHAR pUrlStart = strstr(pHostStart, "/");
PCHAR pHttpStart = strstr(pHostStart, " ") + 1;
PCHAR pNextSectionStart = strstr(pBuffer, "/r/n") + 2;
PCHAR pRequestEnd = pBuffer + strlen(pBuffer);
*(pHostStart-2) = '/0';
*(pHttpStart-1) = '/0';
*(pNextSectionStart-2) = '/0';
if(pUrlStart > pHttpStart)
{
sprintf_s(pBaseUrl, MAX_PATH, "<base href=/"http://%s// " />", pHostStart);
pUrlStart = "";
}
else
{
PCHAR pDirEnd = strrchr(pHostStart, '/');
*pDirEnd = '/0';
sprintf_s(pBaseUrl, MAX_PATH, "<base href=/"http://%s// " />", pHostStart);
*pDirEnd = '/';
*pUrlStart++ = '/0';
}
UINT32 iPort = 80;
if(pPortStart!=NULL && pHostStart<pPortStart && pPortStart<pHttpStart)
{
*(pPortStart++) = '/0';
sscanf_s(pPortStart, "%d", &iPort);
}
LPHOSTENT pHostEntry = gethostbyname(pHostStart);
if(pHostEntry == NULL)
{
return FALSE;
}
struct sockaddr_in remoteAddr;
remoteAddr.sin_addr.s_addr = *((u_long FAR *)(pHostEntry->h_addr));
remoteAddr.sin_family = AF_INET;
remoteAddr.sin_port = htons(iPort);
SOCKET sSocket=socket(AF_INET, SOCK_STREAM, IPPROTO_IP);
if(connect(sSocket, (LPSOCKADDR)&remoteAddr, sizeof(SOCKADDR)) != SOCKET_ERROR)
{
UINT32 iOffset = 0;
UINT32 iHostLen = (UINT32)strlen(pHostStart);
PCHAR pHostSectionStart = strstr(pNextSectionStart, "Host:") + 6;
PCHAR pHostSectionEnd = strstr(pHostSectionStart, "/r/n");
if((UINT32)(pHostSectionEnd-pHostSectionStart) > iHostLen)
{
iOffset = (UINT32)(pHostSectionEnd - pHostSectionStart) - iHostLen;
for(PCHAR pTemp=pHostSectionEnd; pTemp<=pRequestEnd; ++pTemp)
{
*(pTemp-iOffset) = *pTemp;
}
}
else if((UINT32)(pHostSectionEnd-pHostSectionStart) < iHostLen)
{
iOffset = iHostLen - (pHostSectionEnd - pHostSectionStart);
for(PCHAR pTemp=pRequestEnd; pTemp>=pHostSectionEnd; --pTemp)
{
*(pTemp+iOffset) = *pTemp;
}
}
memcpy(pHostSectionStart, pHostStart, iHostLen);
CHAR cGetSectionBuffer[MAX_PATH];
sprintf_s(cGetSectionBuffer, MAX_PATH, "%s /%s %s/r/n", pGetStart, pUrlStart, pHttpStart);
UINT32 iGetSectionLen = (UINT32)strlen(cGetSectionBuffer);
if((UINT32)(pNextSectionStart-pBuffer) > iGetSectionLen)
{
iOffset = (UINT32)(pNextSectionStart - pBuffer) - iGetSectionLen;
for(PCHAR pTemp=pNextSectionStart; pTemp<=pRequestEnd; ++pTemp)
{
*(pTemp-iOffset) = *pTemp;
}
}
else if((UINT32)(pNextSectionStart-pBuffer) < iGetSectionLen)
{
iOffset = iGetSectionLen - (UINT32)(pNextSectionStart - pBuffer);
for(PCHAR pTemp=pRequestEnd; pTemp>=pNextSectionStart; --pTemp)
{
*(pTemp+iOffset) = *pTemp;
}
}
memcpy(pBuffer, cGetSectionBuffer, iGetSectionLen);
send(sSocket, cBuffer, (UINT32)(strlen(cBuffer)), 0);
*pGetLen = recv(sSocket, cBuffer, sizeof(cBuffer)/sizeof(CHAR)-1, 0);
UINT32 iResponseCode = 0;
PCHAR pResponseCodeStart = strstr(cBuffer, " ") + 1;
sscanf_s(pResponseCodeStart, "%d", &iResponseCode);
if(iResponseCode == 200)
{
UINT32 iHttpDataLen = 0xFFFFFFFF;
PCHAR pLengthStart = strstr(cBuffer, "Content-Length:") + 16;
if((UINT32)(UINT64)pLengthStart != 16)
{
sscanf_s(pLengthStart, "%d", &iHttpDataLen);
}
PCHAR pHttpDataStart = strstr(cBuffer, "/r/n/r/n") + 4;
UINT32 iHeadLen = (UINT32)(pHttpDataStart - cBuffer);
while(*pGetLen-iHeadLen < iHttpDataLen)
{
UINT32 iRecvLen = recv(sSocket, cBuffer+*pGetLen, sizeof(cBuffer)/sizeof(CHAR)-*pGetLen-1, 0);
if(iRecvLen==0 || iRecvLen==SOCKET_ERROR)
{
break;
}
*pGetLen += iRecvLen;
}
PCHAR pContentTypeStart = strstr(cBuffer, "Content-Type:") + 14;
if((UINT32)(UINT64)pContentTypeStart!=14 && !strncmp(pContentTypeStart, "text/html", 8))
{
*pNeedInject = TRUE;
}
}
cBuffer[*pGetLen] = '/0';
return TRUE;
}
return FALSE;
}
BOOL SolveResponse(IN OUT PCHAR pBuffer,
IN OUT PUINT32 pGetLen,
IN LPCSTR lpszScript,
IN LPCSTR lpszBaseUrl)
{
PCHAR pHttpDataStart = strstr(cBuffer, "/r/n/r/n") + 4;
UINT32 iHeadLen = (UINT32)(pHttpDataStart - cBuffer);
UINT32 iOldHttpDataLen = *pGetLen - iHeadLen;
UINT32 iHttpDataLen = iOldHttpDataLen;
PCHAR pTransferKindStart = strstr(cBuffer, "Transfer-Encoding:") + 19;
if((UINT32)(UINT64)(pTransferKindStart) != 19
&& !strncmp(pTransferKindStart, "chunked", 7))
{
DataUnchunk(pHttpDataStart, iHttpDataLen, &iHttpDataLen);
}
PCHAR pEncodeKindStart = strstr(cBuffer, "Content-Encoding:") + 18;
if((UINT32)(UINT64)(pEncodeKindStart) == 18)
{
iHttpDataLen += InjectScript(pHttpDataStart, iHttpDataLen, lpszScript, lpszBaseUrl);
}
else if(!strncmp(pEncodeKindStart, "gzip", 4))
{
PCHAR pUnPackedBuffer = new CHAR [iHttpDataLen * 8];
ULONG iUnPackedLen = iHttpDataLen * 8;
if(httpgzdecompress((PBYTE)pHttpDataStart, iHttpDataLen, (PBYTE)pUnPackedBuffer, &iUnPackedLen) == 0)
{
iUnPackedLen += InjectScript(pUnPackedBuffer, iUnPackedLen, lpszScript, lpszBaseUrl);
if(httpgzcompress((PBYTE)pUnPackedBuffer, iUnPackedLen, (PBYTE)pHttpDataStart, (PULONG)&iUnPackedLen) == 0)
{
iHttpDataLen = iUnPackedLen;
}
else
{
printf("compress error!/r/n");
}
}
else
{
printf("decompress error!/r/n");
}
delete [] pUnPackedBuffer;
}
else
{
printf("Unknown Content-Encoding: %4s!/r/n", pEncodeKindStart);
}
if((UINT32)(UINT64)(pTransferKindStart) != 19 && !strncmp(pTransferKindStart, "chunked", 7))
{
DataChunk(pHttpDataStart, iHttpDataLen, &iHttpDataLen);
}
PCHAR pLengthStart = strstr(pBuffer, "Content-Length:") + 16;
if((UINT32)(UINT64)pLengthStart != 16)
{
UINT32 iOffset = GetDecLen(iHttpDataLen) - GetDecLen(iOldHttpDataLen);
if(iOffset > 0)
{
for(PCHAR pTemp=&pBuffer[iHeadLen+iHttpDataLen]; pTemp>=pLengthStart; --pTemp)
{
*(pTemp+iOffset) = *pTemp;
}
}
CHAR cLengthBuffer[MAX_PATH];
sprintf_s(cLengthBuffer, MAX_PATH, "%ld", iHttpDataLen);
memcpy_s(pLengthStart, MAX_PATH, cLengthBuffer, GetDecLen(iHttpDataLen));
*pGetLen = iHeadLen + iOffset + iHttpDataLen;
}
else
{
*pGetLen = iHeadLen + iHttpDataLen;
}
return TRUE;
}
int _tmain(int argc, _TCHAR* argv[])
{
FILE *fp = NULL;
fopen_s(&fp, "Inject2.txt", "rb");
fread(cScript, sizeof(cScript)/sizeof(CHAR), 1, fp);
fclose(fp);
WSADATA wsaData;
WORD wVersionRequested = MAKEWORD(2, 2);
VERIFY(WSAStartup(wVersionRequested, &wsaData) == 0);
struct sockaddr_in localAddr;
localAddr.sin_family = AF_INET;
localAddr.sin_port = htons(1234);
localAddr.sin_addr.s_addr = htonl(INADDR_ANY);
SOCKET sServer;
VERIFY((sServer=socket(AF_INET, SOCK_STREAM, IPPROTO_IP)) != INVALID_SOCKET);
VERIFY(bind(sServer, (struct sockaddr *)&localAddr, sizeof(localAddr)) != SOCKET_ERROR);
VERIFY(listen(sServer, 5) == 0);
while(TRUE)
{
struct sockaddr addr;
INT32 iAddrSize = sizeof(addr);
SOCKET sClient = accept(sServer, &addr, &iAddrSize);
memset(cBuffer, 0, sizeof(cBuffer)/sizeof(CHAR));
cBuffer[recv(sClient, cBuffer, sizeof(cBuffer)/sizeof(CHAR)-1, 0)] = '/0';
if(!strncmp(cBuffer, "GET", 3))
{
system("cls");
printf_s("Connection Request Come!/r/n");
UINT32 iGetLen = 0;
BOOL bNeedInject = FALSE;
CHAR cBaseUrl[MAX_PATH] = {0};
if(RedirectGetRequest(cBuffer, &iGetLen, &bNeedInject, cBaseUrl))
{
if(bNeedInject)
{
SolveResponse(cBuffer, &iGetLen, cScript, cBaseUrl);
}
send(sClient, cBuffer, iGetLen, 0);
printf_s("Redirect Data Sucessfully!/r/n");
}
else
{
printf_s("Redirect Data ERROR!/r/n");
}
}
if(!strncmp(cBuffer, "POST", 4))
{
UINT32 iDataLen = (UINT32)strlen(cBuffer+5)+5;
if(!strncmp(cBuffer+iDataLen-4, "/r/n/r/n", 4))
{
cBuffer[recv(sClient, cBuffer, sizeof(cBuffer)/sizeof(CHAR)-1, 0)] = '/0';
}
else
{
strcpy_s(cBuffer, iDataLen, strstr(cBuffer, "/r/n/r/n")+4);
}
for(PCHAR pPos=cBuffer; *pPos; ++pPos)
{
if(*pPos == -62)
{
*pPos = ' ';
}
}
WCHAR wBuffer[10240];
::MultiByteToWideChar(CP_UTF8, 0, cBuffer, -1, wBuffer, 10000);
::WideCharToMultiByte(CP_ACP, 0, wBuffer, -1, cBuffer, 10000, NULL, NULL);
printf_s("Data Received: %s/r/n", cBuffer);
}
shutdown(sClient, SD_RECEIVE);
closesocket(sClient);
}
return 0;
}