对与网页来说,可见的字符串是什么概念, 样式不能是Display: none 和 visibility: hidden;所有字符串是什么概念包括在容器标签中的如
<td>hello</td> <div>hello</div> <span>hello</span>和控件的标题如<input type="btn" value="确定">这样的字符串。我采用了两个函数来分别处理这两种功能:
1
/**/
/// <summary>
2
/// traverse the html tree and seek string which in the container
3
/// </summary>
4
/// <param name="container"></param>
5
/// <param name="showIframeIndex"></param>
6
private
void
SeekStringsInContianer(mshtml.IHTMLElement container,
ref
int
showIframeIndex)
7![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
{
8
try
9![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
10
if (null == container || null == container.innerHTML)
11
return;
12![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
13
// if the container is hidden
14
if (null != container.style &&
15
((null != container.style.display && container.style.display.Equals("none")) ||
16
(null != container.style.visibility && container.style.visibility.Equals("hidden"))))
17
return;
18![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
19
if (container.tagName.ToLower().Equals("iframe"))
20
return;
21
22
// if the container isn't a container
23
if (null != container.innerText &&
24
container.innerHTML.IndexOf("<IFRAME") == -1 &&
25
container.innerHTML.IndexOf("<TABLE") == -1 &&
26
container.innerHTML.IndexOf("<TD") == -1 &&
27
container.innerHTML.IndexOf("<TR") == -1 &&
28
container.innerHTML.IndexOf("<DIV") == -1 )
29![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
30
if (((mshtml.IHTMLElementCollection)container.children).length == 0)
31
mHuntedStringsList.Add(container.innerText);
32
else
33
mHuntedStringsList.Add(SpanFilters(container));
34
}
35
else
36![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
37
mshtml.IHTMLElementCollection collection = (mshtml.IHTMLElementCollection)container.children;
38![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
39
// traverse container's childs
40
int i = 0;
41
foreach (mshtml.IHTMLElement elem in collection)
42![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
43
// jump over the hidden iframes
44
if (elem.tagName.ToLower().Equals("iframe"))
45![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
46
if (null != elem.style &&
47
null != elem.style.cssText &&
48
elem.style.cssText.IndexOf("none") != -1)
49
i++;
50
else
51
showIframeIndex = i;
52
}
53
else
54![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
55
SeekStringsInContianer(elem, ref showIframeIndex);
56
}
57
}
58
}
59
}
60
catch (ArgumentNullException e)
61![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
62
throw new Exception("null container!", e);
63
}
64
}
65![None.gif](/Images/OutliningIndicators/None.gif)
66![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
/**/
/// <summary>
67
/// get the caption of the control , for example button
68
/// </summary>
69
/// <param name="doc"></param>
70
private
void
SeekStringsInControl(mshtml.IHTMLElement container)
71![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
{
72
try
73![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
74
mshtml.IHTMLElementCollection collection = (mshtml.IHTMLElementCollection)container.all;
75
foreach (mshtml.IHTMLElement elem in collection)
76![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
77
if (elem.tagName.ToLower().Equals("input"))
78![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
79
string attr = (string)elem.getAttribute("type", 0);
80
string id = (string)elem.getAttribute("id", 0);
81
if ( attr.Equals("submit") || ( null != id && id.IndexOf("txt") != -1))
82
mHuntedStringsList.Add((string)elem.getAttribute("value", 0));
83
}
84
}
85
}
86
catch (ArgumentNullException e)
87![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
88
throw new Exception("null document during seeking string in control!", e);
89
}
90
}
![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
![ContractedBlock.gif](/Images/OutliningIndicators/ContractedBlock.gif)
2
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
3
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
4
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
5
![ExpandedBlockEnd.gif](/Images/OutliningIndicators/ExpandedBlockEnd.gif)
6
![None.gif](/Images/OutliningIndicators/None.gif)
7
![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
![ContractedBlock.gif](/Images/OutliningIndicators/ContractedBlock.gif)
![dot.gif](/Images/dot.gif)
8
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
9
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
10
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
11
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
12
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
13
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
14
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
15
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
16
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
17
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
18
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
19
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
20
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
21
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
22
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
23
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
24
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
25
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
26
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
27
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
28
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
29
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
30
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
31
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
32
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
33
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
34
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
35
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
36
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
37
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
38
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
39
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
40
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
41
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
42
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
43
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
44
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
45
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
46
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
47
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
48
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
49
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
50
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
51
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
52
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
53
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
54
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
55
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
56
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
57
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
58
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
59
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
60
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
61
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
62
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
63
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
64
![ExpandedBlockEnd.gif](/Images/OutliningIndicators/ExpandedBlockEnd.gif)
65
![None.gif](/Images/OutliningIndicators/None.gif)
66
![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
![ContractedBlock.gif](/Images/OutliningIndicators/ContractedBlock.gif)
67
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
68
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
69
![ExpandedBlockEnd.gif](/Images/OutliningIndicators/ExpandedBlockEnd.gif)
70
![None.gif](/Images/OutliningIndicators/None.gif)
71
![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
![ContractedBlock.gif](/Images/OutliningIndicators/ContractedBlock.gif)
![dot.gif](/Images/dot.gif)
72
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
73
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
74
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
75
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
76
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
77
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
78
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
79
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
80
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
81
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
82
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
83
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
84
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
85
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
86
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
87
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
88
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
89
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
90
![ExpandedBlockEnd.gif](/Images/OutliningIndicators/ExpandedBlockEnd.gif)
这两个函数主要用到了递归操作,由于 html代码具有嵌套性,并且可以不规范的书写所以中间做了一些额外的工作在去的可见字符串的时候。我使用下面的函数来完成这个工作:
1
/**/
/// <summary>
2
/// filter the span which is hidden in the element
3
/// </summary>
4
/// <param name="element"></param>
5
/// <returns></returns>
6
private
string
SpanFilters(mshtml.IHTMLElement element)
7![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
{
8
string resultStr = element.innerText;
9![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
10
if (((mshtml.IHTMLElementCollection)element.children).length == 0)
11
return resultStr;
12![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
13
FilterHiddenText(element, ref resultStr);
14
return resultStr;
15
}
16![None.gif](/Images/OutliningIndicators/None.gif)
17
private
void
FilterHiddenText(mshtml.IHTMLElement element,
ref
string
srcStr)
18![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
{
19
if (null != element &&
20
null != element.innerText &&
21
null != element.style &&
22
(( null != element.style.visibility && element.style.visibility.Equals("hidden") ) ||
23
(null != element.style.display && element.style.display.Equals("none"))))
24![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
25
int pos = srcStr.IndexOf(element.innerText);
26
srcStr = srcStr.Remove(pos, element.innerText.Length);
27
return;
28
}
29![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
30
mshtml.IHTMLElementCollection collection = (mshtml.IHTMLElementCollection)element.children;
31
if (collection.length != 0)
32![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
33
foreach (mshtml.IHTMLElement elem in collection)
34![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
{
35
FilterHiddenText(elem, ref srcStr);
36
}
37
}
38
}
![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
![ContractedBlock.gif](/Images/OutliningIndicators/ContractedBlock.gif)
2
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
3
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
4
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
5
![ExpandedBlockEnd.gif](/Images/OutliningIndicators/ExpandedBlockEnd.gif)
6
![None.gif](/Images/OutliningIndicators/None.gif)
7
![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
![ContractedBlock.gif](/Images/OutliningIndicators/ContractedBlock.gif)
![dot.gif](/Images/dot.gif)
8
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
9
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
10
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
11
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
12
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
13
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
14
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
15
![ExpandedBlockEnd.gif](/Images/OutliningIndicators/ExpandedBlockEnd.gif)
16
![None.gif](/Images/OutliningIndicators/None.gif)
17
![None.gif](/Images/OutliningIndicators/None.gif)
18
![ExpandedBlockStart.gif](/Images/OutliningIndicators/ExpandedBlockStart.gif)
![ContractedBlock.gif](/Images/OutliningIndicators/ContractedBlock.gif)
![dot.gif](/Images/dot.gif)
19
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
20
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
21
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
22
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
23
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
24
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
25
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
26
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
27
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
28
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
29
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
30
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
31
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
32
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
33
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
34
![ExpandedSubBlockStart.gif](/Images/OutliningIndicators/ExpandedSubBlockStart.gif)
![ContractedSubBlock.gif](/Images/OutliningIndicators/ContractedSubBlock.gif)
![dot.gif](/Images/dot.gif)
35
![InBlock.gif](/Images/OutliningIndicators/InBlock.gif)
36
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
37
![ExpandedSubBlockEnd.gif](/Images/OutliningIndicators/ExpandedSubBlockEnd.gif)
38
![ExpandedBlockEnd.gif](/Images/OutliningIndicators/ExpandedBlockEnd.gif)