前段时间在写web页面,为了方便用selenium写了一个级联打开页面上的所有超链接,代码如下
import com.thoughtworks.selenium.DefaultSelenium;
import com.thoughtworks.selenium.Selenium;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class OpenLinkTestThread extends Thread {
private EnviromentPara ep;
public OpenLinkTestThread(EnviromentPara ep) {
this.ep = ep;
}
@Override
public void run() {
openLinkTest();
}
public void openLinkTest() {
try {
Selenium selenium = new DefaultSelenium("localhost", Constant.port, ep.getBrowser(), ep.getUrl()); //4444 is default server port, ep.getBrowser():"*firefox","*googlechrome","*iexplore",不过ie支持不是很好
selenium.start();
//selenium.open(ep.getUrl());
selenium.openWindow(ep.getUrl(), ep.getUrl());//打开一个新的窗口,窗口id:ep.getUrl()
selenium.waitForPopUp(ep.getUrl(), "100000");
openLinkForOnePage(selenium, ep.getBrowser(), ep.getUrl(), ep.isIsRecursion(),1);//ep.isIsRecursion()是否递归打开链接
if (ep.isCloseOnFinish()) {
Thread.sleep(10000);
selenium.stop();
}
} catch (Exception e) {
e.printStackTrace();
}
}
public void openLinkForOnePage(Selenium selenium, String browser,
String url, boolean recursion, int recursionDeep) {
selenium.selectWindow(url);
String htmlSource = selenium.getHtmlSource();
List list = getAllLinkForOnePage(htmlSource);
for (int i = 0; list != null && i < list.size(); i++) {
selenium.openWindow((String) list.get(i), (String) list.get(i));
selenium.waitForPopUp((String) list.get(i), "100000");
if (recursion) {
if(recursionDeep < ep.getRecursionDeep())
openLinkForOnePage(selenium, browser, (String) list.get(i), recursion,recursionDeep+1);
}
}
}
public List getAllLinkForOnePage(String htmlSource) {//提取页面上的所有超链接
List list = new ArrayList();
Pattern linkElementPattern = Pattern.compile("<a\\s.*?href=\"([^\"]+)\"[^>]*>(.*?)</a>");
Matcher linkElementMatcher = linkElementPattern.matcher(htmlSource);
while (linkElementMatcher.find()) {
String temp = linkElementMatcher.group(1);
if(temp!=null)
temp=temp.trim();
if (temp==null||temp.startsWith("#") || temp.toLowerCase().startsWith("javascript:")) {//
continue;
}
temp = temp.replace("&", "&");
list.add(temp);
}
return list;
}
}