CHttpConnection获取真实URL

//  /kjhz/dwhzjl/201011/../../stgz/201005/t20100526_534624.html
// => /kjhz/stgz/201005/t20100526_534624.html
bool GenPath(const string inPath, string &outPath){
	string path = inPath;
	outPath = "";
	if(path.empty() || path.length() <= 0) {
		return false;
	}
	stack<string> aStack;
	int npos = path.find_first_of('/'); 
	while(npos >= 0){
		if(npos == 0) {
			path = path.substr(1);
			npos = path.find_first_of('/');
			continue;
		}
		string strTmp = path.substr(0, npos);
		if(strTmp.compare("..") == 0) {
			if(aStack.empty()) {
				return false;
			}
			else{
				aStack.pop();
			}
		}
		else {
			aStack.push(strTmp);
		}
		path = path.substr(npos + 1);
		npos = path.find_first_of('/');
	} 
	if(path.length() > 0) {
		aStack.push(path);
	}
	if(!aStack.empty()){
		outPath = aStack.top();
		aStack.pop();
	}
	else{
		return false;
	}
	while(!aStack.empty()) {
		outPath = aStack.top() + '/' + outPath;
		aStack.pop();
	}
	outPath = '/' + outPath;
	return true;
}


bool GetRealUrl(const string url, string &realUrl){
	realUrl = url;
	CString strServer, strObject, strHeader;
	unsigned short nPort;
	DWORD dwServiceType;
	CString strURL(url.c_str());
	if(!AfxParseURL(strURL, dwServiceType, strServer, strObject, nPort)){
		return false;
	}
	try{
		CInternetSession iSession("HttpClient"); 
		CHttpConnection *pServerCon = iSession.GetHttpConnection(strServer, (INTERNET_PORT)80);
		CHttpFile* pFile = pServerCon->OpenRequest(1, strObject, NULL, 1, NULL, NULL,
			INTERNET_FLAG_EXISTING_CONNECT | INTERNET_FLAG_NO_AUTO_REDIRECT);
		if(pFile == NULL) {
			pServerCon->Close();
			delete pServerCon;
			iSession.Close();
			return false;
		}
		pFile->AddRequestHeaders(_T("Internet"));
		BOOL bRet = pFile->SendRequest();

		DWORD dwRet = 0;
		if(bRet){
			pFile->QueryInfoStatusCode(dwRet);
		}

		CString strNewLocation;
		if(dwRet == HTTP_STATUS_OK){
			int nPlace = strObject.Find(_T("/./"));
			while(nPlace >= 0) {
				strObject.Replace(_T("/./"), _T("/"));
				nPlace = strObject.Find(_T("/./"));
			}
			//将连续的两个“/”换为一个"/"
			// eg: /kjhz//kjrh/201106/t20110608_664194.html
			nPlace = strObject.Find(_T("//"));
			if(nPlace >= 0){
				strObject.Replace(_T("//"), _T("/"));
			}

			//将 "../"变成上级目录(去掉一层目录)
			//例如:/kjhz/dwhzjl/201011/../../stgz/201005/t20100526_534624.html
			nPlace = strObject.Find(_T("../"));
			if(nPlace >= 0){
				string outpath;
				bRet = GenPath((LPCTSTR)strObject, outpath);
				if(bRet == false) {
					pFile->Close();
					pServerCon->Close();
					delete pFile;
					delete pServerCon;
					iSession.Close();
					return false;
				}
				strObject = CString(outpath.c_str());
			}
			strNewLocation = "http://" + strServer + strObject;
			realUrl = (LPCTSTR)strNewLocation;
			pFile->Close();
			pServerCon->Close();
			delete pFile;
			delete pServerCon;
			iSession.Close();
			return true;
		}
		if( dwRet == HTTP_STATUS_MOVED || dwRet == HTTP_STATUS_REDIRECT ||
			dwRet == HTTP_STATUS_REDIRECT_METHOD){
				pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strNewLocation);

				int nPlace = strNewLocation.Find(_T("Location: "));
				if( nPlace == -1){
					cerr << "Error occurred in get new URL!" << endl;
					pFile->Close();
					pServerCon->Close();
					delete pFile;
					delete pServerCon;
					iSession.Close();
					return false;
				}

				strNewLocation = strNewLocation.Mid(nPlace + 10); //跳过"Location: "
				nPlace = strNewLocation.Find('\n');
				if(nPlace > 0){
					strNewLocation = strNewLocation.Left(nPlace);
				}
				nPlace = strNewLocation.Find('/');
				if(nPlace == 0){
					strNewLocation = "http://" + strServer + strNewLocation;
				}
				realUrl = (LPCTSTR)strNewLocation;

				pFile->Close();
				pServerCon->Close();
				delete pFile;
				delete pServerCon;
				iSession.Close();
				return true;
		}
	}
	catch(CException *e){
		char buf[128];
		e->GetErrorMessage(buf,120);
		cerr << url << ", fail to find real url: " << buf << endl;
	}
	return false;
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值