C++爬虫,获取IP附属地

#include "testipbelong.h"
//#include <afxinet.h>
#include <windows.h>
#include <WinInet.h>
#include <fstream>
#include <string>
#include <iostream>
#include <QFile>
#include <QTextStream>
#include <regex>
#include <QtCore> 
#include <QtNetwork> 

#pragma comment(lib, "WinInet.lib")
各种转码
std::string UtfToGbk(const char* utf8)
{
	int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
	wchar_t* wstr = new wchar_t[len + 1];
	memset(wstr, 0, len + 1);
	MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
	len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
	char* str = new char[len + 1];
	memset(str, 0, len + 1);
	WideCharToMultiByte(CP_ACP, 0, wstr, -1, str, len, NULL, NULL);
	if (wstr) delete[] wstr;
	return str;
}

std::string GBKToUTF8(const std::string& strGBK)
{
	std::string strOutUTF8 = "";
	WCHAR * str1;
	int n = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, NULL, 0);
	str1 = new WCHAR[n];
	MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, str1, n);
	n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL);
	char * str2 = new char[n];
	WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL);
	strOutUTF8 = str2;
	delete[]str1;
	str1 = NULL;
	delete[]str2;
	str2 = NULL;
	return strOutUTF8;
}

std::wstring GBKToUnicode(const std::string& str)
{
	int  len = 0;
	len = str.length();
	int  unicodeLen = ::MultiByteToWideChar(CP_ACP,
		0,
		str.c_str(),
		-1,
		NULL,
		0);
	wchar_t *  pUnicode;
	pUnicode = new  wchar_t[unicodeLen + 1];
	memset(pUnicode, 0, (unicodeLen + 1)*sizeof(wchar_t));
	::MultiByteToWideChar(CP_ACP,
		0,
		str.c_str(),
		-1,
		(LPWSTR)pUnicode,
		unicodeLen);
	std::wstring  rt;
	rt = (wchar_t*)pUnicode;
	delete  pUnicode;

	return  rt;
}

std::wstring UTF8ToUnicode(const std::string& str)
{
	int  len = 0;
	len = str.length();
	int  unicodeLen = ::MultiByteToWideChar(CP_UTF8,
		0,
		str.c_str(),
		-1,
		NULL,
		0);
	wchar_t *  pUnicode;
	pUnicode = new  wchar_t[unicodeLen + 1];
	memset(pUnicode, 0, (unicodeLen + 1)*sizeof(wchar_t));
	::MultiByteToWideChar(CP_UTF8,
		0,
		str.c_str(),
		-1,
		(LPWSTR)pUnicode,
		unicodeLen);
	std::wstring  rt;
	rt = (wchar_t*)pUnicode;
	delete  pUnicode;

	return  rt;
}

std::string UnicodeToANSI(const std::wstring& str)
{
	char*     pElementText;
	int    iTextLen;
	// wide char to multi char
	iTextLen = WideCharToMultiByte(CP_ACP,
		0,
		str.c_str(),
		-1,
		NULL,
		0,
		NULL,
		NULL);
	pElementText = new char[iTextLen + 1];
	memset((void*)pElementText, 0, sizeof(char)* (iTextLen + 1));
	::WideCharToMultiByte(CP_ACP,
		0,
		str.c_str(),
		-1,
		pElementText,
		iTextLen,
		NULL,
		NULL);
	std::string strText;
	strText = pElementText;
	delete[] pElementText;
	return strText;
}

使用WININET爬,没有监控机制,不能设置超时

//QString GetIpAddress(const std::string  IP)
//{
//
//	HINTERNET hINet, hHttpFile;
//
//	char szSizeBuffer[2000];
//
//	DWORD dwLengthSizeBuffer = sizeof(szSizeBuffer);
//
//	//InternetOpen初始化WININET.DLL
//	hINet = InternetOpenA("IE6.0", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0);
//
//	std::string  url = "http://www.ip138.com/ips138.asp?ip=" + IP;
//	if (!hINet)
//	{
//		//QMessageBox::information(this , QString::fromLocal8Bit("操作结果"), QString::fromLocal8Bit("hINet错误!"));
//		return "hINet ERROR!";
//	}
//
//	//这个函数连接到一个网络服务器上并且从服务器上读取数据
//	hHttpFile = InternetOpenUrlA(hINet, url.c_str(), NULL, 0, 0, 0);
//	if (!hHttpFile)
//	{
//		//QMessageBox::information(g_DataStatisticalDlg, QString::fromLocal8Bit("操作结果"), QString::fromLocal8Bit("hHttpFile错误!"));
//		return "hHttpFile 错误!";
//	}
//
//	//得到关于文件的信息
//	BOOL bQuery = HttpQueryInfo(hHttpFile,
//		HTTP_QUERY_CONTENT_LENGTH,
//		szSizeBuffer,
//		&dwLengthSizeBuffer, NULL);
//
//	if (!bQuery)
//	{
//		//QMessageBox::information(g_DataStatisticalDlg, QString::fromLocal8Bit("操作结果"), QString::fromLocal8Bit("bQuery错误!"));
//		InternetCloseHandle(hINet);
//		return "bQuery ERROR!";
//	}
//
//	INT FileSize = atol(szSizeBuffer);
//	std::wstring  revData;
//	revData.resize(FileSize);
//
//	char buffer[1025];
//	DWORD dwBytesRead = 0;
//	BOOL bRead;
//
//	//web浏览器将在InternetReadFile上循环 ,不停地从Internet上读入数据块。
//	do
//	{
//		ZeroMemory(buffer, 1025);
//		bRead = InternetReadFile(hHttpFile,buffer, 1024, &dwBytesRead);
//		buffer[dwBytesRead] = '\0';
//		std::wstring str1 = GBKToUnicode(buffer);
//		//revData += buffer;
//		//revData += str1;
//		revData += str1;
//	} while (dwBytesRead);
//	
//	//std::ofstream   out_file("C:\\Users\\Administrator\\Desktop\\1.txt");
//	//out_file << revData;              //输出到文件
//	//BOOL bRead = InternetReadFile(hHttpFile, &revData[0], FileSize, &dwBytesRead);
//
//	if (!bRead)
//	{
//		//QMessageBox::information(g_DataStatisticalDlg, QString::fromLocal8Bit("操作结果"), QString::fromLocal8Bit("bRead错误!"));
//		return "bRead ERROR!";
//	}
//
//	InternetCloseHandle(hHttpFile);
//	InternetCloseHandle(hINet);
//
//	//std::smatch m;
//	//std::regex regexp("<td align=\"center\"><ul class=\"ul1\"><li>(?<title>.*?)</li>");
//
//	//while (std::regex_search(revData, m, regexp))
//	//{
//	//	revData = m.str();
//	//	std::ofstream   out_file2("C:\\Users\\Administrator\\Desktop\\2.txt");
//	//	out_file2 << revData;              //输出到文件
//	//}
//
//	QString ipBelong;
//	QRegularExpression regexp("<td align=\"center\"><ul class=\"ul1\"><li>(?<title>.*?)</li>");
//
// //   std::string str1 = GBKToUTF8(revData);
//	//std::wstring str2 = UTF8ToUnicode(revData);
//	QRegularExpressionMatch rm = regexp.match(QString::fromStdWString((revData)));
//	
//	if (rm.hasMatch())
//	{
//		ipBelong = rm.captured("title");
//		ipBelong = ipBelong.mid(5);
//		//revData = ipBelong.toLocal8Bit();
//		//std::wstring str1 = GBKToUnicode(revData);
//		//std::string str1 = GBKToUTF8(revData);
//		//std::wstring str2 = UTF8ToUnicode(str1);
//		//std::string std3 = UnicodeToANSI(str2);
//		//std::ofstream   out_file2("C:\\Users\\Administrator\\Desktop\\3.txt");
//		//out_file2 << revData;              //输出到文件
//	}
//
//	return ipBelong;
//	//return revData;
//}

线程监控超时

struct PARA
{
	std::string url = "";
	//std::string* requst = NULL;
	//bool result = false;

	HINTERNET hINet = NULL;
	HINTERNET hHttpFile = NULL;
	BOOL bQuery = TRUE;
	BOOL bRead = TRUE;
	QString ipBelong;
};

static DWORD WINAPI thread_access(LPVOID args)
{
	PARA* p;
	p = (PARA*)args;

	p->hINet = InternetOpenA("IE6.0", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0);

	char szSizeBuffer[2000];

	DWORD dwLengthSizeBuffer = sizeof(szSizeBuffer);

	INT FileSize = atol(szSizeBuffer);
	std::wstring  revData;
	revData.resize(FileSize);

	if (p->hINet != NULL)
	{
		p->hHttpFile = InternetOpenUrlA(p->hINet, p->url.c_str(), NULL, 0, INTERNET_FLAG_DONT_CACHE, 0);
		if (p->hHttpFile != NULL)
		{
			p->bQuery = HttpQueryInfo(p->hHttpFile,
				HTTP_QUERY_CONTENT_LENGTH,
				szSizeBuffer,
				&dwLengthSizeBuffer, NULL);

			if (!p->bQuery)
			{
				//QMessageBox::information(g_DataStatisticalDlg, QString::fromLocal8Bit("操作结果"), QString::fromLocal8Bit("bQuery错误!"));
				InternetCloseHandle(p->hINet);
				//return "bQuery ERROR!";
			}

			char buffer[1025];
			DWORD dwBytesRead = 0;

			//web浏览器将在InternetReadFile上循环 ,不停地从Internet上读入数据块。
			do
			{
				ZeroMemory(buffer, 1025);
				p->bRead = InternetReadFile(p->hHttpFile, buffer, 1024, &dwBytesRead);
				buffer[dwBytesRead] = '\0';
				std::wstring str1 = GBKToUnicode(buffer);
				revData += str1;
			} while (dwBytesRead);

			//if (!p->bRead)
			//{
			//	InternetCloseHandle(p->bS);
			//	p->bS = NULL;
			//	//InternetCloseHandle(p->session);
			//	//QMessageBox::information(g_DataStatisticalDlg, QString::fromLocal8Bit("操作结果"), QString::fromLocal8Bit("bRead错误!"));
			//	//return "bRead ERROR!";
			//}
				//p->requst = new std::string();
				//BYTE Temp[MAXSIZE];
				//ULONG Number = 1;
				//while (Number > 0)
				//{
				//	InternetReadFile(p->bS, Temp, MAXSIZE - 1, &Number);
				//	Temp[Number] = '\0';
				//	p->requst->append((char*)Temp);
				//}
			InternetCloseHandle(p->hHttpFile);
			p->hHttpFile = NULL;
				//p->result = true;
		}
		InternetCloseHandle(p->hINet);
		p->hINet = NULL;

		QRegularExpression regexp("<td align=\"center\"><ul class=\"ul1\"><li>(?<title>.*?)</li>");

		QRegularExpressionMatch rm = regexp.match(QString::fromStdWString((revData)));

		if (rm.hasMatch())
		{
			//p->requst->append((char*));
			p->ipBelong = rm.captured("title");
			p->ipBelong = p->ipBelong.mid(5);
		}
	}
	return 1;

}

static QString urlAccess(const char* url, int timeOutSecs)
{
	PARA *p1 = new PARA();
	p1->url = url;

	DWORD id;
	//int result = -1;
	HANDLE hThread = CreateThread(NULL, 0, thread_access, p1, 0, &id);

	if (WaitForSingleObject(hThread, timeOutSecs * 1000))
	{
		std::cout << "TIME" << std::endl;
		if (p1->hHttpFile != NULL)
		{
			InternetCloseHandle(p1->hHttpFile);
		}
		if (p1->hINet != NULL)
		{
			InternetCloseHandle(p1->hINet);
		}
		if (!p1->bQuery || !p1->bRead)
		{
			InternetCloseHandle(p1->hHttpFile);
			InternetCloseHandle(p1->hINet);
		}
	}

	//if (p1->result)
	//{
	//	request.append(*(p1->requst));
	//	result = 1;
	//}
	//result = 0;

	if (hThread)
	{
		CloseHandle(hThread);
	}

	return p1->ipBelong;
	//return result;
}

qt测试代码

testIPBelong::testIPBelong(QWidget *parent)
	: QMainWindow(parent)
{
	ui.setupUi(this);
	connect(ui.pushButton_IPBelong, SIGNAL(clicked()), this, SLOT(GetIPBelong()));
}

testIPBelong::~testIPBelong()
{

}

void testIPBelong::GetIPBelong()
{
 	QString IP = ui.lineEdit_IP->text();
	//std::string strip = GetIpAddress(IP.toStdString());
	//std::ofstream   out_file3("C:\\Users\\Administrator\\Desktop\\4.txt");
	//out_file3 << strip;              //输出到文件

	//QString IPBelong = QString::fromLocal8Bit(strip.data());
	
	//QString IPBelong = GetIpAddress(IP.toStdString());
	std::string url = "http://www.ip138.com/ips138.asp?ip=" + IP.toStdString();
	QTime time;
	time.start();
	QString IPBelong = urlAccess(url.c_str(), 5);
	int i = time.elapsed();
	ui.lineEdit_IBelong->setText(IPBelong + QString::number(i));

}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值