要求:
dom基础+javascript脚本
应用:
自动填充webform
抓取webpage数据
添加WebBrowser控件
private
WebBrowser webBrowser1;
引用页面的document对象
HtmlDocument doc
=
webBrowser1.Document;
//
get web document
有了document对象,就可以像js一样操作doc,访问页面的所有对象。
HtmlElementCollection htmlElements
=
webBrowser1.Document.GetElementsByTag(
"
input
"
);
//
get all input elements
// access every input element in web form
foreach (HtmlElement el in htmlElements)
{
strInputName = el.GetAttribute("name").ToString();//get input element's name
strInputValue = el.GetAttribute("value").ToString();//get input element's value
}
winForm调用webpage的函数
// access every input element in web form
foreach (HtmlElement el in htmlElements)
{
strInputName = el.GetAttribute("name").ToString();//get input element's name
strInputValue = el.GetAttribute("value").ToString();//get input element's value
}
/**/
/*web page function*/
< script >
function jsMethod(var jsParam)
{
alert(param);
}
</ script >
/**/ /*call jsMethod from winForm*/
private void callJsMethod( string Param)
{
HtmlDocument doc = webBrowser1.Document;
doc.InvokeScript("jsMethod",new object[]{"called by winForm"});
}
webPage调用winForm方法
< script >
function jsMethod(var jsParam)
{
alert(param);
}
</ script >
/**/ /*call jsMethod from winForm*/
private void callJsMethod( string Param)
{
HtmlDocument doc = webBrowser1.Document;
doc.InvokeScript("jsMethod",new object[]{"called by winForm"});
}
//
winform code
[PermissionSet(SecurityAction.Demand, Name = " FullTrust " )] //
[System.Runtime.InteropServices.ComVisibleAttribute(true)] // This property lets you integrate dynamic HTML (DHTML) code with your client application code
public partial class Form2 : Form
{
public void winFormMethod(string param)
{
MessageBox.Show(param);
}
private void Form2_Load(object sender, EventArgs e)
{
webBrowser1.ObjectForScripting = this;//important
}
}
// web page code
< input name = " callWinMethod " onclick = " window.external.winFormMethod('called from DHTML') " >
[PermissionSet(SecurityAction.Demand, Name = " FullTrust " )] //
[System.Runtime.InteropServices.ComVisibleAttribute(true)] // This property lets you integrate dynamic HTML (DHTML) code with your client application code
public partial class Form2 : Form
{
public void winFormMethod(string param)
{
MessageBox.Show(param);
}
private void Form2_Load(object sender, EventArgs e)
{
webBrowser1.ObjectForScripting = this;//important
}
}
// web page code
< input name = " callWinMethod " onclick = " window.external.winFormMethod('called from DHTML') " >
要调用winform的方法,这两个属性是必须的
[PermissionSet(SecurityAction.Demand, Name = "FullTrust")]
[System.Runtime.InteropServices.ComVisibleAttribute(true)]
还有必须设置webBrowser1.ObjectForScripting = this,被调用的方法是public的。
有了上面这些准备要实现一些简单应用就很简单啦,不妨自己动手试试。
实例一
下面结合一个简单例子,使用webbrowser自动登录。
先分析webform的结构,下面这个登录页面包括两个输入框:用户名和密码,以及一个登录按钮。
<
HTML
>
< HEAD >
< title > test html </ title >
</ HEAD >
< body background = " /bugnet/graphics/back2.gif " >
< form name = " mainform " method = " post " action = " bugl_login.aspx " id = " mainform " >
< b > Enter name </ b >< input id = " uid " type = " text " maxLength = " 50 " size = " 25 " name = " uid " >< br >
< b > Enter Password </ b >< input type = " password " maxLength = " 20 " size = " 25 " name = " pwd " >
< input type = " submit " value = " go " name = " go " >
</ form >
</ body >
</ HTML >
< HEAD >
< title > test html </ title >
</ HEAD >
< body background = " /bugnet/graphics/back2.gif " >
< form name = " mainform " method = " post " action = " bugl_login.aspx " id = " mainform " >
< b > Enter name </ b >< input id = " uid " type = " text " maxLength = " 50 " size = " 25 " name = " uid " >< br >
< b > Enter Password </ b >< input type = " password " maxLength = " 20 " size = " 25 " name = " pwd " >
< input type = " submit " value = " go " name = " go " >
</ form >
</ body >
</ HTML >
在页面载入webbrowser之后,程序自动填充用户名和密码,触发登陆按钮。
private
void
webBrowser1_DocumentCompleted(
object
sender, WebBrowserDocumentCompletedEventArgs e)
{
string strUID = "userName@sdccn.com";
string strPWD = "PWD";
webBrowser1.Document.GetElementById("uid").InnerText = strUID;//fill name
webBrowser1.Document.GetElementById("pwd").InnerText = strPWD;//fill pwd
webBrowser1.Document.GetElementById("go").InvokeMember("click");//click go
}
{
string strUID = "userName@sdccn.com";
string strPWD = "PWD";
webBrowser1.Document.GetElementById("uid").InnerText = strUID;//fill name
webBrowser1.Document.GetElementById("pwd").InnerText = strPWD;//fill pwd
webBrowser1.Document.GetElementById("go").InvokeMember("click");//click go
}
自动登录就这样实现,利用这些可以完成一些重复登录工作,还可以使用来自动化测试webpage程序。
抓取页面数据,下面的页面有一个表格,如何把里面的数据提取出来?
看看页面DOM结构,一个table,三行两列
<!
DOCTYPE HTML PUBLIC
"
-//W3C//DTD HTML 4.0 Transitional//EN
"
>
< HTML >
< HEAD >
< TITLE > New Document </ TITLE >
< META NAME = " Generator " CONTENT = " EditPlus " >
< META NAME = " Author " CONTENT = "" >
< META NAME = " Keywords " CONTENT = "" >
< META NAME = " Description " CONTENT = "" >
</ HEAD >
< BODY >
< TABLE border = 1 >
< TR >
< TD > name </ TD >
< TD > age </ TD >
< TD > score </ TD >
</ TR >
< TR >
< TD > agan </ TD >
< TD > 18 </ TD >
< TD > 99 </ TD >
</ TR >
< TR >
< TD > asca </ TD >
< TD > 18 </ TD >
< TD > 88 </ TD >
</ TR >
</ TABLE >
</ BODY >
</ HTML >
了解这个表格结构就可以开始导入到winform中的DataTable中,然后在DataGridView中展示出来
< HTML >
< HEAD >
< TITLE > New Document </ TITLE >
< META NAME = " Generator " CONTENT = " EditPlus " >
< META NAME = " Author " CONTENT = "" >
< META NAME = " Keywords " CONTENT = "" >
< META NAME = " Description " CONTENT = "" >
</ HEAD >
< BODY >
< TABLE border = 1 >
< TR >
< TD > name </ TD >
< TD > age </ TD >
< TD > score </ TD >
</ TR >
< TR >
< TD > agan </ TD >
< TD > 18 </ TD >
< TD > 99 </ TD >
</ TR >
< TR >
< TD > asca </ TD >
< TD > 18 </ TD >
< TD > 88 </ TD >
</ TR >
</ TABLE >
</ BODY >
</ HTML >
private
DataTable ImportToDataTable()
{
HtmlElementCollection htmlTabs = webBrowser1.Document.GetElementsByTagName("table");//get all tables in the dom
DataTable dt = null;
DataRow dr = null;
string strValue = "";
int intII=0;
if(htmlTabs!=null&&htmlTabs.length>0)
{
HtmlElement htmlTable = htmlElements[0];
HtmlElementCollection htmlRows = htmlElement.GetElementsByTagName("tr");//get all rows
HtmlElementCollection htmlCells = null;
foreach (HtmlElement htmlRow in htmlRows)
{
if (htmlRow == htmlRows[0])//build table header
{
BuildHeader(ref dt, htmlCells)
}
else
{
htmlCells = htmlRow.GetElementsByTagName("td");
dr = dt.NewRow();
foreach (HtmlElement htmlCell in htmlCells)
{
if (htmlCell.InnerText!=null)
{
strValue = htmlCell.InnerText.Trim();
dr[intII++] = strValue;
}
}
dt.Rows.Add(dr);
}
}
}
return dt;
}
private void BuildHeader( ref DataTable dt, HtmlElementCollection htmlCells)
{
int intCols = htmlCells.Count;
if (dt == null)
{
dt = new DataTable();
for (int i = 0; i < intCols; i++)
dt.Columns.Add("col" + i, Type.GetType("System.String"));
}
}
例子对导入的数据简单的以string来处理,其实可以做一些深入处理,比如使用正则表达式识别不同的数据类型,希望这个例子能起到抛砖引玉的作用。
{
HtmlElementCollection htmlTabs = webBrowser1.Document.GetElementsByTagName("table");//get all tables in the dom
DataTable dt = null;
DataRow dr = null;
string strValue = "";
int intII=0;
if(htmlTabs!=null&&htmlTabs.length>0)
{
HtmlElement htmlTable = htmlElements[0];
HtmlElementCollection htmlRows = htmlElement.GetElementsByTagName("tr");//get all rows
HtmlElementCollection htmlCells = null;
foreach (HtmlElement htmlRow in htmlRows)
{
if (htmlRow == htmlRows[0])//build table header
{
BuildHeader(ref dt, htmlCells)
}
else
{
htmlCells = htmlRow.GetElementsByTagName("td");
dr = dt.NewRow();
foreach (HtmlElement htmlCell in htmlCells)
{
if (htmlCell.InnerText!=null)
{
strValue = htmlCell.InnerText.Trim();
dr[intII++] = strValue;
}
}
dt.Rows.Add(dr);
}
}
}
return dt;
}
private void BuildHeader( ref DataTable dt, HtmlElementCollection htmlCells)
{
int intCols = htmlCells.Count;
if (dt == null)
{
dt = new DataTable();
for (int i = 0; i < intCols; i++)
dt.Columns.Add("col" + i, Type.GetType("System.String"));
}
}