抓取网页exe程序原理就从上次完成的dll文件读取抓取内容,然后发送到discuz论坛,这样dll文件就相当于一个插件,如果需要抓取数据源,只需要对网页内容分析,形成标题–内容数组。这样就实现发送了,其实插件式开发也是这样实现的。
改程序是基于CHtmlView的多文档程序,根据每一个标题形成一个视图,这个视图的工作就是找到要发送的网址,模拟点击操作,最终实现内容的发送。
核心代码如下:
//实现遍历插件文件夹,生成标题—-内容数组,其中为避免重复发帖,为了简单通过mysql数据的查询判断。在mainframe类中实现
CString strPath; GetModuleFileName( NULL,strPath.GetBufferSetLength( MAX_PATH + 1 ), MAX_PATH );
int index = strPath.ReverseFind( ‘\\’ );
strPath = strPath.Left( index ); strPath.Append( _T(“\\Ext\\*.*”) );
CFileFind find; BOOL isFind = find.FindFile( strPath );
LONG needCount = 0; while ( isFind )
{
sFind = find.FindNextFile();
if ( find.IsDots() )continue;
if ( find.IsDirectory() )continue;
CString strDll = find.GetFilePath();
int dllIndex = strDll.ReverseFind( ‘.’ );
CString strExt = strDll.Right( strDll.GetLength() – dllIndex );
if ( _T(“.dll”) != strExt )continue;
HMODULE hModule = ::LoadLibrary( strDll );
getDBCount pGetCount = ( getDBCount )GetProcAddress( hModule, “getDBCount” );
getDB pGetDB= ( getDB )GetProcAddress( hModule, “getDB” );
if ( !pGetCount )return; int nCount = pGetCount(); for ( int i = 0; i < nCount; i++ )
{ CDataBase* pDataBase = pGetDB( i ); vector< Info > infos = pDataBase->getInfo();
CString strLink = pDataBase->getLink();
CString strDBName = pDataBase->getDBName();
delete pDataBase; pDataBase = NULL;
BOOL isNeed = FALSE;
for ( size_t j = 0; j < infos.size(); j++ )
{
isNeed = needSend( infos[ j ].subject );
if ( !isNeed )continue; needCount++;
//打开窗口 CDocTemplate* pDocTemp = NULL;
POSITION pos = AfxGetApp()->GetFirstDocTemplatePosition();
CDocument* pDoc = NULL;
while (NULL != pos)
{ pDocTemp = theApp.GetNextDocTemplate(pos);
if (NULL != (pDoc = pDocTemp->OpenDocumentFile(NULL)))
{ pDoc->SetTitle( strDBName + _T(“–”) + infos[ j ].subject ); break; } }
CChildFrame* pChildFrame = ( CChildFrame* )GetActiveFrame();
CWebView* pView = ( CWebView* )pChildFrame->GetActiveView();
pView->m_strLink = strLink;
pView->m_subject = infos[ j ].subject;
pView->m_message = infos[ j ].message; pView->m_Status = EStatus_Null;
pView->Start();
}
}
} if ( 0 == needCount ) { MessageBox( _T(“没有要发送的内容!”), _T(“提示”) ); }
//发送消息主要在CHtmlView中实现
重点需要注意的是需要重载CHtmlView中的函数,来判断一个网页的彻底加载,然后做进一步的操作,实现同步。
DocumentComplete(LPDISPATCH pDisp, VARIANT* URL)
值得一提的是早上老婆没有起床我花了10分钟时间实现了博客园博客的抓取。插件开发真好,编程万岁!!!