测试用例
测试用例
<
html
>
< head >
< title >
Just a Test
</ title >
</ head >
< body >
gaofeng hello!!
< div >
< table bgcolor ="red" >
< tr >
< td bgcolor ="yellow" border ="2" > Name </ td >
< td id ="qualify1" border ="1" class ="blueBorder" bgcolor =blue ></ td >
</ tr >
< tr >
< td >< p id ="qualify2" class ="blueBorder" bgcolor ="blue" border ="1" > Surname </ p ></ td >
< td ></ td >
</ tr >
< tr >
< td > address </ td >
< td ></ td >
</ tr >
</ table >
</ div >
</ body >
</ html >
< head >
< title >
Just a Test
</ title >
</ head >
< body >
gaofeng hello!!
< div >
< table bgcolor ="red" >
< tr >
< td bgcolor ="yellow" border ="2" > Name </ td >
< td id ="qualify1" border ="1" class ="blueBorder" bgcolor =blue ></ td >
</ tr >
< tr >
< td >< p id ="qualify2" class ="blueBorder" bgcolor ="blue" border ="1" > Surname </ p ></ td >
< td ></ td >
</ tr >
< tr >
< td > address </ td >
< td ></ td >
</ tr >
</ table >
</ div >
</ body >
</ html >
头文件
#include
<
iostream
>
#include < comdef.h >
#include < mshtml.h >
#include < string >
#include < fstream >
#include < vector >
#include < map >
#import < mshtml.tlb > no_auto_exclude
#include < comdef.h >
#include < mshtml.h >
#include < string >
#include < fstream >
#include < vector >
#include < map >
#import < mshtml.tlb > no_auto_exclude
测试代码
代码
//
TestMSHTML.cpp : 定义控制台应用程序的入口点。
//
#include " stdafx.h "
#include " TestMSHTML.h "
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
// 唯一的应用程序对象
CWinApp theApp;
FILE * fout;
using namespace std;
// OLECHAR szHTML[] = OLESTR("<HTML><BODY>Hello World!</BODY></HTML>");
typedef int BorderAttribute;
void FindAllElementHavingBg(IHTMLDocument2 * pNewDoc,map < BorderAttribute,IHTMLElement *>& borderValue2ElementMap)
{
IHTMLElement * pBody;
pNewDoc -> get_body( & pBody);
pBody -> Release();
}
void PrintTabs( int n)
{
for ( int i = 0 ;i < n;i ++ )
{
// cout << '\t';
fwprintf(fout,_T( " \t " ));
}
}
void VisitNode(IHTMLElement * pElement, int level)
{
BSTR strName,strId,strTag;
PrintTabs(level);
pElement -> get_className( & strName);
pElement -> get_id( & strId);
pElement -> get_tagName( & strTag);
if (strTag != NULL)
{
fwprintf(fout,_T( " TagName:%s " ),strTag);
}
if (strName != NULL)
{
fwprintf(fout,_T( " className:%s " ),strName);
}
if (strId != NULL)
{
fwprintf(fout,_T( " Id:%s " ),strId);
}
SysFreeString(strName);
SysFreeString(strId);
SysFreeString(strTag);
BSTR strAttrName1 = _T( " border " );
BSTR strAttrName2 = _T( " bgcolor " );
VARIANT val;
pElement -> getAttribute(strAttrName1, 2 , & val);
if (val.vt != VT_NULL)
{
if (val.bstrVal != NULL)
{
fwprintf(fout,_T( " border:%s " ),val.bstrVal);
}
}
pElement -> getAttribute(strAttrName2, 2 , & val);
if (val.vt != VT_NULL)
{
if (val.bstrVal != NULL)
{
fwprintf(fout,_T( " bgcolor:%s " ),val.bstrVal);
}
}
fwprintf(fout,_T( " \n " ));
}
// 将DOM树打印出来
void Run(IHTMLElement * pElement, int level)
{
IHTMLElementCollection * children;
VisitNode(pElement,level);
IDispatch * pDisp;
pElement -> get_children( & pDisp);
pDisp -> QueryInterface(IID_IHTMLElementCollection,( void ** ) & children);
pDisp -> Release();
long len;
children -> get_length( & len);
VARIANT dummy;
dummy.vt = VT_I4;
for ( int i = 0 ;i < len;i ++ )
{
IHTMLElement * child;
dummy.intVal = i;
children -> item(dummy,dummy,(IDispatch ** ) & pDisp);
pDisp -> QueryInterface(IID_IHTMLElement,( void ** ) & child);
pDisp -> Release();
Run(child,level + 1 );
child -> Release();
}
children -> Release();
}
void TestParse(IHTMLDocument2 * pNewDoc)
{
BSTR strText;
IHTMLElement * pBody;
pNewDoc -> get_body( & pBody);
pBody -> get_innerText( & strText);
wprintf(_T( " %s\n " ),strText);
SysFreeString(strText);
pNewDoc -> get_title( & strText);
wprintf(_T( " %s\n " ),strText);
SysFreeString(strText);
cout << " Run begin.... " << endl;
Run(pBody, 0 );
cout << " Run end.... " << endl;
pBody -> Release();
// FindAllElementHavingBg(pNewDoc);
}
void TestMSHTML(wchar_t * wcontent)
{
IHTMLDocument2 * pDoc = NULL;
CoInitialize(NULL);
CoCreateInstance(CLSID_HTMLDocument,
NULL,
CLSCTX_INPROC_SERVER,
IID_IHTMLDocument2,
(LPVOID * ) & pDoc);
if (pDoc)
{
IPersistStreamInit * pPersist = NULL;
pDoc -> QueryInterface(IID_IPersistStreamInit,
(LPVOID * ) & pPersist);
if (pPersist)
{
IMarkupServices * pMS = NULL;
pPersist -> InitNew();
pPersist -> Release();
pDoc -> QueryInterface(IID_IMarkupServices,
(LPVOID * ) & pMS);
if (pMS)
{
IMarkupContainer * pMC = NULL;
IMarkupPointer * pMkStart = NULL;
IMarkupPointer * pMkFinish = NULL;
pMS -> CreateMarkupPointer( & pMkStart);
pMS -> CreateMarkupPointer( & pMkFinish);
pMS -> ParseString(wcontent,
0 ,
& pMC,
pMkStart,
pMkFinish);
if (pMC)
{
IHTMLDocument2 * pNewDoc = NULL;
pMC -> QueryInterface(IID_IHTMLDocument,
(LPVOID * ) & pNewDoc);
if (pNewDoc)
{
// do anything with pNewDoc, in this case
// get the body innerText.
TestParse(pNewDoc);
pNewDoc -> Release();
}
pMC -> Release();
}
if (pMkStart)
pMkStart -> Release();
if (pMkFinish)
pMkFinish -> Release();
pMS -> Release();
}
}
pDoc -> Release();
}
CoUninitialize();
}
inline wchar_t * AnsiToUnicode( const char * szStr )
{
int nLen = MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, szStr, - 1 , NULL, 0 );
if (nLen == 0 )
{
return NULL;
}
wchar_t * pResult = new wchar_t[nLen + 1 ];
MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, szStr, - 1 , pResult, nLen );
pResult[nLen] = L ' \0 ' ;
return pResult;
}
// 调用者负责delete wcontent
wchar_t * ReadFromHtmlFile( string str, string & content)
{
ifstream fin(str.c_str());
string line;
while (getline(fin,line))
{
// cout << line << endl;
content = content + line;
}
// cout << content << endl;
// cout << content.size() << endl;
// printf("original html code\n%s\n",content.c_str());
wchar_t * wcontent = AnsiToUnicode(content.c_str());
// wprintf(L"after transferred\n%s\n",wcontent);
// delete[] wcontent;
fin.close();
fin.clear();
return wcontent;
}
int _tmain( int argc, TCHAR * argv[], TCHAR * envp[])
{
int nRetCode = 0 ;
// 初始化 MFC 并在失败时显示错误
if ( ! AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0 ))
{
// TODO: 更改错误代码以符合您的需要
_tprintf(_T( " 错误: MFC 初始化失败\n " ));
nRetCode = 1 ;
}
else
{
fout = fopen( " out.txt " , " w " );
string str = " test.html " ;
string content;
wchar_t * wcontent = ReadFromHtmlFile(str,content);
int len = wcslen(wcontent);
// cout << len << endl;
TestMSHTML(wcontent);
delete[] wcontent;
fclose(fout);
}
return nRetCode;
}
//
#include " stdafx.h "
#include " TestMSHTML.h "
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
// 唯一的应用程序对象
CWinApp theApp;
FILE * fout;
using namespace std;
// OLECHAR szHTML[] = OLESTR("<HTML><BODY>Hello World!</BODY></HTML>");
typedef int BorderAttribute;
void FindAllElementHavingBg(IHTMLDocument2 * pNewDoc,map < BorderAttribute,IHTMLElement *>& borderValue2ElementMap)
{
IHTMLElement * pBody;
pNewDoc -> get_body( & pBody);
pBody -> Release();
}
void PrintTabs( int n)
{
for ( int i = 0 ;i < n;i ++ )
{
// cout << '\t';
fwprintf(fout,_T( " \t " ));
}
}
void VisitNode(IHTMLElement * pElement, int level)
{
BSTR strName,strId,strTag;
PrintTabs(level);
pElement -> get_className( & strName);
pElement -> get_id( & strId);
pElement -> get_tagName( & strTag);
if (strTag != NULL)
{
fwprintf(fout,_T( " TagName:%s " ),strTag);
}
if (strName != NULL)
{
fwprintf(fout,_T( " className:%s " ),strName);
}
if (strId != NULL)
{
fwprintf(fout,_T( " Id:%s " ),strId);
}
SysFreeString(strName);
SysFreeString(strId);
SysFreeString(strTag);
BSTR strAttrName1 = _T( " border " );
BSTR strAttrName2 = _T( " bgcolor " );
VARIANT val;
pElement -> getAttribute(strAttrName1, 2 , & val);
if (val.vt != VT_NULL)
{
if (val.bstrVal != NULL)
{
fwprintf(fout,_T( " border:%s " ),val.bstrVal);
}
}
pElement -> getAttribute(strAttrName2, 2 , & val);
if (val.vt != VT_NULL)
{
if (val.bstrVal != NULL)
{
fwprintf(fout,_T( " bgcolor:%s " ),val.bstrVal);
}
}
fwprintf(fout,_T( " \n " ));
}
// 将DOM树打印出来
void Run(IHTMLElement * pElement, int level)
{
IHTMLElementCollection * children;
VisitNode(pElement,level);
IDispatch * pDisp;
pElement -> get_children( & pDisp);
pDisp -> QueryInterface(IID_IHTMLElementCollection,( void ** ) & children);
pDisp -> Release();
long len;
children -> get_length( & len);
VARIANT dummy;
dummy.vt = VT_I4;
for ( int i = 0 ;i < len;i ++ )
{
IHTMLElement * child;
dummy.intVal = i;
children -> item(dummy,dummy,(IDispatch ** ) & pDisp);
pDisp -> QueryInterface(IID_IHTMLElement,( void ** ) & child);
pDisp -> Release();
Run(child,level + 1 );
child -> Release();
}
children -> Release();
}
void TestParse(IHTMLDocument2 * pNewDoc)
{
BSTR strText;
IHTMLElement * pBody;
pNewDoc -> get_body( & pBody);
pBody -> get_innerText( & strText);
wprintf(_T( " %s\n " ),strText);
SysFreeString(strText);
pNewDoc -> get_title( & strText);
wprintf(_T( " %s\n " ),strText);
SysFreeString(strText);
cout << " Run begin.... " << endl;
Run(pBody, 0 );
cout << " Run end.... " << endl;
pBody -> Release();
// FindAllElementHavingBg(pNewDoc);
}
void TestMSHTML(wchar_t * wcontent)
{
IHTMLDocument2 * pDoc = NULL;
CoInitialize(NULL);
CoCreateInstance(CLSID_HTMLDocument,
NULL,
CLSCTX_INPROC_SERVER,
IID_IHTMLDocument2,
(LPVOID * ) & pDoc);
if (pDoc)
{
IPersistStreamInit * pPersist = NULL;
pDoc -> QueryInterface(IID_IPersistStreamInit,
(LPVOID * ) & pPersist);
if (pPersist)
{
IMarkupServices * pMS = NULL;
pPersist -> InitNew();
pPersist -> Release();
pDoc -> QueryInterface(IID_IMarkupServices,
(LPVOID * ) & pMS);
if (pMS)
{
IMarkupContainer * pMC = NULL;
IMarkupPointer * pMkStart = NULL;
IMarkupPointer * pMkFinish = NULL;
pMS -> CreateMarkupPointer( & pMkStart);
pMS -> CreateMarkupPointer( & pMkFinish);
pMS -> ParseString(wcontent,
0 ,
& pMC,
pMkStart,
pMkFinish);
if (pMC)
{
IHTMLDocument2 * pNewDoc = NULL;
pMC -> QueryInterface(IID_IHTMLDocument,
(LPVOID * ) & pNewDoc);
if (pNewDoc)
{
// do anything with pNewDoc, in this case
// get the body innerText.
TestParse(pNewDoc);
pNewDoc -> Release();
}
pMC -> Release();
}
if (pMkStart)
pMkStart -> Release();
if (pMkFinish)
pMkFinish -> Release();
pMS -> Release();
}
}
pDoc -> Release();
}
CoUninitialize();
}
inline wchar_t * AnsiToUnicode( const char * szStr )
{
int nLen = MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, szStr, - 1 , NULL, 0 );
if (nLen == 0 )
{
return NULL;
}
wchar_t * pResult = new wchar_t[nLen + 1 ];
MultiByteToWideChar( CP_ACP, MB_PRECOMPOSED, szStr, - 1 , pResult, nLen );
pResult[nLen] = L ' \0 ' ;
return pResult;
}
// 调用者负责delete wcontent
wchar_t * ReadFromHtmlFile( string str, string & content)
{
ifstream fin(str.c_str());
string line;
while (getline(fin,line))
{
// cout << line << endl;
content = content + line;
}
// cout << content << endl;
// cout << content.size() << endl;
// printf("original html code\n%s\n",content.c_str());
wchar_t * wcontent = AnsiToUnicode(content.c_str());
// wprintf(L"after transferred\n%s\n",wcontent);
// delete[] wcontent;
fin.close();
fin.clear();
return wcontent;
}
int _tmain( int argc, TCHAR * argv[], TCHAR * envp[])
{
int nRetCode = 0 ;
// 初始化 MFC 并在失败时显示错误
if ( ! AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0 ))
{
// TODO: 更改错误代码以符合您的需要
_tprintf(_T( " 错误: MFC 初始化失败\n " ));
nRetCode = 1 ;
}
else
{
fout = fopen( " out.txt " , " w " );
string str = " test.html " ;
string content;
wchar_t * wcontent = ReadFromHtmlFile(str,content);
int len = wcslen(wcontent);
// cout << len << endl;
TestMSHTML(wcontent);
delete[] wcontent;
fclose(fout);
}
return nRetCode;
}
输入结果
TagName:BODY
TagName:DIV
TagName:TABLE bgcolor:#ff0000
TagName:TBODY
TagName:TR
TagName:TD border:2 bgcolor:#ffff00
TagName:TD className:blueBorder Id:qualify1 border:1 bgcolor:#0000ff
TagName:TR
TagName:TD
TagName:P className:blueBorder Id:qualify2 border:1 bgcolor:blue
TagName:TD
TagName:TR
TagName:TD
TagName:TD
TagName:DIV
TagName:TABLE bgcolor:#ff0000
TagName:TBODY
TagName:TR
TagName:TD border:2 bgcolor:#ffff00
TagName:TD className:blueBorder Id:qualify1 border:1 bgcolor:#0000ff
TagName:TR
TagName:TD
TagName:P className:blueBorder Id:qualify2 border:1 bgcolor:blue
TagName:TD
TagName:TR
TagName:TD
TagName:TD