7.3 点击Element和填写表单
我们可以安全的加载页面,抽取信息,但是有时浏览网页是需要填写表单并提交或者点击某个元素来触发onclick以便执行一段
JavaScript代码。
嗯,要想点击一个element需要获得这个element(前面的xpathNodes也许可以帮我们做到这点),如果是input element,我们需
要调用它的submit方法,如果是anchor的话,我们家需要访问它的href里的url。如果这个元素有onclick属性,这段脚本必需首先被
执行。
填写text field,需要修改DOM tree,设置属性的值(比如HTMLInputElement)或者插入一个文本子节点(比如
HTMLTextAreaElement)。看看下面的例子就会发现这并不困难。
译注:主要有2个方法 enter 和 click。
enter给text和textArea设置值,text直接设置value属性就行了,而textArea先要删除所有子节点,然后增加一个新的text
node来包含我们想要输入的内容。
click点击一个元素,比如element, anchor submit按钮等等。
package es.ladyr.javaxpcom.browser;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.eclipse.swt.SWT;
import org.eclipse.swt.SWTError;
import org.eclipse.swt.browser.Browser;
import org.eclipse.swt.browser.ProgressEvent;
import org.eclipse.swt.browser.ProgressListener;
import org.eclipse.swt.widgets.Display;
import org.eclipse.swt.widgets.Shell;
import org.mozilla.dom.html.HTMLDocumentImpl;
import org.mozilla.dom.NodeFactory;
import org.mozilla.interfaces.nsIComponentManager;
import org.mozilla.interfaces.nsIDOMDocument;
import org.mozilla.interfaces.nsIDOMHTMLDocument;
import org.mozilla.interfaces.nsIDOMNode;
import org.mozilla.interfaces.nsIDOMWindow;
import org.mozilla.interfaces.nsIDOMXPathEvaluator;
import org.mozilla.interfaces.nsIDOMXPathNSResolver;
import org.mozilla.interfaces.nsIDOMXPathResult;
import org.mozilla.interfaces.nsISupports;
import org.mozilla.interfaces.nsIWebBrowser;
import org.mozilla.xpcom.Mozilla;
import org.w3c.dom.DOMException;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.html.HTMLAnchorElement;
import org.w3c.dom.html.HTMLDocument;
import org.w3c.dom.html.HTMLElement;
import org.w3c.dom.html.HTMLFormElement;
import org.w3c.dom.html.HTMLInputElement;
import org.w3c.dom.html.HTMLTextAreaElement;
public class SimpleBrowserWithClick {
private final static String NS_IDOMXPATHEVALUATOR_CONTRACTID = "@mozilla.org/dom/xpath-evaluator;1";
private Browser browser;
// We will need SWT display to execute methods
// into the SWT event thread.
private Display display;
// Latch used to manage page loading
// Uses a count of 1, so when the browser starts loading
// a page, we create a new latch, which will be
// decremented when the page is loaded.
private CountDownLatch latch;
// Default timeout to 60 seconds
private long defaultTimeout = 60000;
// XPath evaluator
private nsIDOMXPathEvaluator xpathEval;
/**
* Creates a web browser which is able to load pages waiting until
* the page is completely loaded and solve xpaths returning
* the corresponding nodes.
*
*/
public SimpleBrowserWithClick (final String xulrunnerPath) {
// Use a latch to wait for the browser initialization.
final CountDownLatch initLatch = new CountDownLatch(1);
// MozillaBrowser needs a window manager to work. We are using SWT
// for the graphical interface, so we need to execute MozillaBrowser
// methods into the SWT event thread. If we were use another thread,
// that methods could not work properly and throw an exception,
// breaking the execution flow and crashing our application.
new Thread("SWT-Event-Thread") {
@Override
public void run() {
display = new Display();
Shell shell = new Shell(display);
shell.setSize(800, 600);
shell.open();
// If you have XULRunner installed, you can call the constructor without
// the last parameter:
//
// final MozillaBrowser browser = new MozillaBrowser(shell,SWT.BORDER);
//
// That last parameter is the path for XULRunner files
// (where you have uncompressed downloaded XULRunner package).
try {
browser = new Browser(shell, SWT.MOZILLA);
} catch (SWTError e) {
System.out.println("Could not instantiate Browser: " + e.getMessage
());
return;
}
// Adapt browser size to shell size
browser.setBounds(shell.getClientArea());
// Listens for page loading status.
browser.addProgressListener(new ProgressListener() {
public void changed(ProgressEvent event) {
}
public void completed(ProgressEvent event) {
// When a page is loaded, decrement the latch,
// which count will be 0 after this call.
latch.countDown();
}
});
// Realease the initialization latch, which has value 1,
// so after this call its value will be 0.
initLatch.countDown();
while (!shell.isDisposed()) {
if (!display.readAndDispatch()) {
display.sleep();
}
}
System.exit(0);
}
}.start();
try {
// Waits until the initialization latch is released.
initLatch.await();
} catch (InterruptedException e) {
Thread.interrupted();
}
// Creates the XPath evaluator XPCOM component
Mozilla moz = Mozilla.getInstance();
nsIComponentManager componentManager = moz.getComponentManager();
xpathEval = (nsIDOMXPathEvaluator) componentManager.createInstanceByContractID(
NS_IDOMXPATHEVALUATOR_CONTRACTID, null,
nsIDOMXPathEvaluator.NS_IDOMXPATHEVALUATOR_IID);
}
/**
* Loads an URL into the browser and waits until the page is
* totally loaded.
* @param url
* @throws SimpleBrowserException
*/
public void go(final String url) throws SimpleBrowserException {
// Creates a latch with count 1
latch = new CountDownLatch(1);
// Uses the SWT event thread to execute the method to
// load an URL in the browser.
display.syncExec(new Runnable() {
public void run() {
browser.setUrl(url);
}
});
// Waits for the finish of the page loading, or for a given
// timeout in case that the loading doesn't finish in a
// reasonable time.
boolean timeout = waitLoad(defaultTimeout);
if (timeout) {
throw new SimpleBrowserException("Timeout waiting page loading.");
}
}
/**
*
* @return an W3C HTML Document implementation corresponding to
* the Mozilla DOM HTML document currently loaded in the browser.
* @throws SimpleBrowserException
*/
public HTMLDocument getW3CDocument() {
class DocumentGetter implements Runnable {
private nsIDOMHTMLDocument htmldoc;
public void run(){
nsIWebBrowser webBrowser = (nsIWebBrowser)browser.getWebBrowser();
if (webBrowser == null) {
System.out.println("Could not get the nsIWebBrowser from the Browser
widget");
}
nsIDOMWindow dw = webBrowser.getContentDOMWindow();
nsIDOMDocument nsDoc = dw.getDocument();
htmldoc = (nsIDOMHTMLDocument) nsDoc
.queryInterface
(nsIDOMHTMLDocument.NS_IDOMHTMLDOCUMENT_IID);
}
public nsIDOMHTMLDocument getHtmldoc() {
return htmldoc;
}}
DocumentGetter dg = new DocumentGetter();
display.syncExec(dg);
return HTMLDocumentImpl.getDOMInstance(dg.getHtmldoc());
}
/**
*
* @param xpath
* @return a list with the nodes corresponding to a given xpath.
* @throws SimpleBrowserException
*/
public List<Node> xpathNodes(String xpath) {
return xPathNodes(xpath,
((HTMLDocumentImpl) getW3CDocument()).getInstance());
}
/**
*
* @param <T>
* @param xpath
* @param nodeClass
* @return a list of <code>nodeClass</code> nodes corresponding
* to a given xpath.
* @throws SimpleBrowserException
*/
public <T extends Node> List<T> xpathNodes(String xpath, Class<T> nodeClass) {
return (List<T>)xPathNodes(xpath,
((HTMLDocumentImpl) getW3CDocument()).getInstance());
}
/**
* Enters the given text in a W3C input node. If the node is not
* a HTMLInputElement or a HTMLTextAreaElement instance, then
* a exception is thrown.
*
* @param node
* @param text
* @throws SimpleBrowserException
*/
public void enter(final Node node, final String text) throws SimpleBrowserException {
if (node instanceof HTMLInputElement) {
HTMLInputElement textComponent = (HTMLInputElement) node;
enter(textComponent, text);
} else if (node instanceof HTMLTextAreaElement) {
HTMLTextAreaElement textComponent = (HTMLTextAreaElement) node;
enter(textComponent, text);
} else {
throw new SimpleBrowserException(
"enter only works with textfield (HTMLInputElement) or textarea
(HTMLTextAreaElement)");
}
}
/**
* Enters the given text in a HTMLInputElement. If text is
* <code>null</code>, then an empty string will be inserted.
*
* @param inputElement
* @param text
* @throws SimpleBrowserException
*/
public void enter(final HTMLInputElement inputElement, String text) {
final String inputText;
if (text == null) {
inputText = "";
} else {
inputText = text;
}
display.syncExec(new Runnable() {
public void run() {
inputElement.setValue(inputText);
}
});
}
/**
* Enters the given text in a HTMLTextAreaElement. If text is
* <code>null</code>, then an empty string will be inserted.
*
* @param textArea
* @param text
* @throws SimpleBrowserException
*/
public void enter(final HTMLTextAreaElement textArea, String text) {
final String inputText;
if (text == null) {
inputText = "";
} else {
inputText = text;
}
display.syncExec(new Runnable() {
public void run() {
// Empty the text area
NodeList nodeList = textArea.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
textArea.removeChild(nodeList.item(i));
}
// Fill the text area with a new text node containing the given text
try {
textArea.appendChild(getW3CDocument().createTextNode(inputText));
} catch (DOMException e) {
System.err.println("Problems inserting the new child node.");
e.printStackTrace();
}
}
});
}
/**
* Clicks on a W3C node. If the HTML element has an attribute 'oncllick',
* first try to execute the script and then click the element. If the node
* is not an instance of HTMLInputElement, HTMLAnchorElement or HTMLElement,
* then an exception will be thrown.
*
* @param node
* @throws SimpleBrowserException
*/
public void click(Node node) throws SimpleBrowserException {
// If the node is a instance of HTMLElement and contains an
// 'onclick' attribute, then we must execute the script
if ( node instanceof HTMLElement ){
final HTMLElement ele = (HTMLElement) node;
display.syncExec(new Runnable() {
public void run() {
String onclick = ele.getAttribute("onclick");
if ( onclick != null && !onclick.equals("") ) {
browser.execute(onclick);
}
}
});
}
// If the node is an instance of HTMLInputElement, then could be
// a submit button (corresponding to types submit and image) then
// we must submit the form only if it has the required attribute
// 'action'.
if (node instanceof HTMLInputElement) {
HTMLInputElement button = (HTMLInputElement) node;
if ( button.getType().equalsIgnoreCase("submit") ||
button.getType().equalsIgnoreCase("image") ){
String formAction = button.getForm().getAction();
if ( formAction != null && !formAction.equals("") ){
submitForm(button.getForm());
}
}
// If the node is an instance of HTMLAnchorElement we only
// need to call 'go' method for the 'href' attribute.
} else if (node instanceof HTMLAnchorElement) {
HTMLAnchorElement link = (HTMLAnchorElement) node;
if (link.getHref() != null && !link.getHref().equals("")) {
go(link.getHref());
}
// If the node is not an instance of HTMLElement class, then we
// cannot click on it.
} else if ( !(node instanceof HTMLElement) ) {
throw new SimpleBrowserException(
"Click only works with HTMLElements with onclick "
+ " attribute or links (HTMLAnchorElement) or buttons (HTMLButtonElement)");
}
}
private boolean waitLoad(long millis) {
try {
// Uses the latch, created by 'go' method to wait for
// the finish of the page loading (it will occurs when
// our 'progressListener' receives a event for its method
// 'completed'), or for a given timeout in case that the
// loading doesn't finish in a reasonable time.
boolean timeout;
timeout = !latch.await(millis,TimeUnit.MILLISECONDS);
if (timeout) {
// If the timeout expired, then we will stop
// page loading.
display.syncExec(new Runnable() {
public void run() {
browser.stop();
}
});
// Waits for the loading is stopped
latch.await(millis,TimeUnit.MILLISECONDS);
}
return timeout;
} catch (InterruptedException e) {
throw new Error(e);
}
}
private List<Node> xPathNodes(String xpath, nsIDOMNode context) {
// Obtain the Mozilla DOM HTML document
HTMLDocumentImpl documentImpl = (HTMLDocumentImpl) getW3CDocument();
nsIDOMHTMLDocument document = documentImpl.getInstance();
// Creates a name space resolver for the document
nsIDOMXPathNSResolver res = xpathEval.createNSResolver(document);
List<Node> resultNodes = null;
// Evaluates given XPath in a given context, using the resolver created
// for the current document as an ordered iterator
nsISupports obj = xpathEval.evaluate(xpath, context, res,
nsIDOMXPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
// Obtain the interface corresponding to the XPath XPCOM results object
nsIDOMXPathResult result = (nsIDOMXPathResult) obj.queryInterface(
nsIDOMXPathResult.NS_IDOMXPATHRESULT_IID);
try {
// Extract result nodes for the XPath and add them
// to a standard List.
resultNodes = getNodes(result);
} catch(org.mozilla.xpcom.XPCOMException e){
throw e;
}
return resultNodes;
}
private <T> List<T> getNodes(nsIDOMXPathResult result) {
List<T> nodes = new ArrayList<T>();
nsIDOMNode node;
while((node = result.iterateNext()) != null){
// Use the functionality provided by the mozdom4java
// (in our case, patched) library to obtain the corresponding
// W3C implementation of a node.
nodes.add((T)NodeFactory.getNodeInstance(node));
}
return nodes;
}
private void submitForm(final HTMLFormElement form) throws SimpleBrowserException {
// Uses the latch to wait for response page loading when the form
// is submitted.
latch = new CountDownLatch(1);
// Submits the form.
display.syncExec(new Runnable() {
public void run() {
form.submit();
}
});
// Waits for the server response, that is, until the response
// page finish loading.
boolean timeout = waitLoad(defaultTimeout);
if (timeout) {
throw new SimpleBrowserException("Timeout waiting page loading.");
}
}
public static void main(String[] args) {
String xulrunnerPath = null;
if ( args.length > 0 ) {
xulrunnerPath = args[0];
}
// Instantiate our simple web browser
final SimpleBrowserWithClick simpleBrowser = new SimpleBrowserWithClick(xulrunnerPath);
try{
// Load a web page
simpleBrowser.go("http://www.my400800.cn
");
Thread.sleep(3000);
// Get the W3C DOM anchor element containing the text 'Noticias'
HTMLAnchorElement a =
simpleBrowser.xpathNodes("//a[contains(text(),'Noticias')]",
HTMLAnchorElement.class).get(0);
// Click on the anchor previously obtained
simpleBrowser.click(a);
Thread.sleep(2000);
// Get the input field to write search terms
simpleBrowser.display.syncExec(new Runnable() {
public void run(){
try{
Node node = simpleBrowser.xpathNodes("//input[@name='q']").get(0);
simpleBrowser.enter(node, "nasdaq");
}catch (SimpleBrowserException sbe){
sbe.printStackTrace();
}
}
});
// Node node = simpleBrowser.xpathNodes("//input[@name='q']").get(0);
// Enter the text 'nasdaq' in the input field
Thread.sleep(2000);
// Get the input button used to submit the form
HTMLInputElement e =
simpleBrowser.xpathNodes("//input[@value='Buscar en Noticias']",
HTMLInputElement.class).get(0);
// Click the input buuton and start the search for the term 'nasdaq'
// in news section
simpleBrowser.click(e);
Thread.sleep(3000);
// Load a different page with javascript examples
simpleBrowser.go("http://www.codearchive.com/code/0300/0309-acces009.htm");
// Get a W3C anchor element containing an 'onlick' attribute
a = simpleBrowser.xpathNodes("//a[contains(text(),'4')]",
HTMLAnchorElement.class).get(0);
// Click the anchor and then the javascript will be executed by
// our browser
simpleBrowser.click(a);
Thread.sleep(3000);
} catch (SimpleBrowserException e) {
System.err.println("Problems calling go method.");
e.printStackTrace();
} catch (InterruptedException e) {
System.err.println("Problems calling sleep.");
e.printStackTrace();
}
Runtime.getRuntime().halt(0);
}
}