// /kjhz/dwhzjl/201011/../../stgz/201005/t20100526_534624.html
// => /kjhz/stgz/201005/t20100526_534624.html
bool GenPath(const string inPath, string &outPath){
string path = inPath;
outPath = "";
if(path.empty() || path.length() <= 0) {
return false;
}
stack<string> aStack;
int npos = path.find_first_of('/');
while(npos >= 0){
if(npos == 0) {
path = path.substr(1);
npos = path.find_first_of('/');
continue;
}
string strTmp = path.substr(0, npos);
if(strTmp.compare("..") == 0) {
if(aStack.empty()) {
return false;
}
else{
aStack.pop();
}
}
else {
aStack.push(strTmp);
}
path = path.substr(npos + 1);
npos = path.find_first_of('/');
}
if(path.length() > 0) {
aStack.push(path);
}
if(!aStack.empty()){
outPath = aStack.top();
aStack.pop();
}
else{
return false;
}
while(!aStack.empty()) {
outPath = aStack.top() + '/' + outPath;
aStack.pop();
}
outPath = '/' + outPath;
return true;
}
bool GetRealUrl(const string url, string &realUrl){
realUrl = url;
CString strServer, strObject, strHeader;
unsigned short nPort;
DWORD dwServiceType;
CString strURL(url.c_str());
if(!AfxParseURL(strURL, dwServiceType, strServer, strObject, nPort)){
return false;
}
try{
CInternetSession iSession("HttpClient");
CHttpConnection *pServerCon = iSession.GetHttpConnection(strServer, (INTERNET_PORT)80);
CHttpFile* pFile = pServerCon->OpenRequest(1, strObject, NULL, 1, NULL, NULL,
INTERNET_FLAG_EXISTING_CONNECT | INTERNET_FLAG_NO_AUTO_REDIRECT);
if(pFile == NULL) {
pServerCon->Close();
delete pServerCon;
iSession.Close();
return false;
}
pFile->AddRequestHeaders(_T("Internet"));
BOOL bRet = pFile->SendRequest();
DWORD dwRet = 0;
if(bRet){
pFile->QueryInfoStatusCode(dwRet);
}
CString strNewLocation;
if(dwRet == HTTP_STATUS_OK){
int nPlace = strObject.Find(_T("/./"));
while(nPlace >= 0) {
strObject.Replace(_T("/./"), _T("/"));
nPlace = strObject.Find(_T("/./"));
}
//将连续的两个“/”换为一个"/"
// eg: /kjhz//kjrh/201106/t20110608_664194.html
nPlace = strObject.Find(_T("//"));
if(nPlace >= 0){
strObject.Replace(_T("//"), _T("/"));
}
//将 "../"变成上级目录(去掉一层目录)
//例如:/kjhz/dwhzjl/201011/../../stgz/201005/t20100526_534624.html
nPlace = strObject.Find(_T("../"));
if(nPlace >= 0){
string outpath;
bRet = GenPath((LPCTSTR)strObject, outpath);
if(bRet == false) {
pFile->Close();
pServerCon->Close();
delete pFile;
delete pServerCon;
iSession.Close();
return false;
}
strObject = CString(outpath.c_str());
}
strNewLocation = "http://" + strServer + strObject;
realUrl = (LPCTSTR)strNewLocation;
pFile->Close();
pServerCon->Close();
delete pFile;
delete pServerCon;
iSession.Close();
return true;
}
if( dwRet == HTTP_STATUS_MOVED || dwRet == HTTP_STATUS_REDIRECT ||
dwRet == HTTP_STATUS_REDIRECT_METHOD){
pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strNewLocation);
int nPlace = strNewLocation.Find(_T("Location: "));
if( nPlace == -1){
cerr << "Error occurred in get new URL!" << endl;
pFile->Close();
pServerCon->Close();
delete pFile;
delete pServerCon;
iSession.Close();
return false;
}
strNewLocation = strNewLocation.Mid(nPlace + 10); //跳过"Location: "
nPlace = strNewLocation.Find('\n');
if(nPlace > 0){
strNewLocation = strNewLocation.Left(nPlace);
}
nPlace = strNewLocation.Find('/');
if(nPlace == 0){
strNewLocation = "http://" + strServer + strNewLocation;
}
realUrl = (LPCTSTR)strNewLocation;
pFile->Close();
pServerCon->Close();
delete pFile;
delete pServerCon;
iSession.Close();
return true;
}
}
catch(CException *e){
char buf[128];
e->GetErrorMessage(buf,120);
cerr << url << ", fail to find real url: " << buf << endl;
}
return false;
}
CHttpConnection获取真实URL
最新推荐文章于 2023-07-07 15:41:47 发布