QWebPage* page = new QWebPage();
this->webpage = page->mainFrame();
// this->webpage->load(url);
this->webpage->setHtml("<font color='red' style='size:14px;'> text in font . <a href='#'>testlink</font>");
QWebElementCollection list = this->webpage->findAllElements("*");
qDebug() << "elements count : " << list.count() << endl;
QWebElement e;
for(int i = 0; i < list.count(); i++){
e = list.at(i);
displayNode(e);
}
以上测试代码,会在console打印出 HEAD BODY FONT A。
发现无法取得html dom tree 中的 Text类型的节点。
研读源码:
in QWebElement.h
QWebElementCollection QWebElement::findAll(const QString &selectorQuery) const
{
return QWebElementCollection(*this, selectorQuery);
}
QWebElementCollection::QWebElementCollection(const QWebElement &contextElement, const QString &query)
{
d = QExplicitlySharedDataPointer<QWebElementCollectionPrivate>(QWebElementCollectionPrivate::create(contextElement.m_element, query));
}
QWebElementCollectionPrivate* QWebElementCollectionPrivate::create(const PassRefPtr<Node> &context, const QString &query)
{
if (!context)
return 0;
// Let WebKit do the hard work hehehe
ExceptionCode exception = 0; // ###
RefPtr<NodeList> nodes = context->querySelectorAll(query, exception);
if (!nodes)
return 0;
QWebElementCollectionPrivate* priv = new QWebElementCollectionPrivate;
priv->m_result = nodes;
return priv;
}
webcore/dom/Node.h 对文本类型的节点是有定义的,也就是说,在select的时候,qt将该类型的节点做了过滤处理。
enum NodeType {
ELEMENT_NODE = 1,
ATTRIBUTE_NODE = 2,
TEXT_NODE = 3,
CDATA_SECTION_NODE = 4,
ENTITY_REFERENCE_NODE = 5,
ENTITY_NODE = 6,
PROCESSING_INSTRUCTION_NODE = 7,
COMMENT_NODE = 8,
DOCUMENT_NODE = 9,
DOCUMENT_TYPE_NODE = 10,
DOCUMENT_FRAGMENT_NODE = 11,
NOTATION_NODE = 12,
XPATH_NAMESPACE_NODE = 13
};
Node.cpp
PassRefPtr<NodeList> Node::querySelectorAll(const String& selectors, ExceptionCode& ec)
{
................................................
return createSelectorNodeList(this, querySelectorList);
}
SelectorNodeList.cpp
PassRefPtr<StaticNodeList> createSelectorNodeList(Node* rootNode, const CSSSelectorList& querySelectorList)
{
Vector<RefPtr<Node> > nodes;
Document* document = rootNode->document();
CSSSelector* onlySelector = querySelectorList.hasOneSelector() ? querySelectorList.first() : 0;
bool strictParsing = !document->inCompatMode();
CSSStyleSelector::SelectorChecker selectorChecker(document, strictParsing);
if (strictParsing && rootNode->inDocument() && onlySelector && onlySelector->m_match == CSSSelector::Id && !document->containsMultipleElementsWithId(onlySelector->m_value)) {
Element* element = document->getElementById(onlySelector->m_value);
if (element && (rootNode->isDocumentNode() || element->isDescendantOf(rootNode)) && selectorChecker.checkSelector(onlySelector, element))
nodes.append(element);
} else {
for (Node* n = rootNode->firstChild(); n; n = n->traverseNextNode(rootNode)) {
if (n->isElementNode()) {
Element* element = static_cast<Element*>(n);
for (CSSSelector* selector = querySelectorList.first(); selector; selector = CSSSelectorList::next(selector)) {
if (selectorChecker.checkSelector(selector, element)) {
nodes.append(n);
break;
}
}
}
}
}
return StaticNodeList::adopt(nodes);
}
最终将问题锁定在
if (n->isElementNode()) {
inline bool Node::isElement(ConstructionType type)
{
switch (type) {
case CreateContainer:
case CreateOther:
case CreateText:
return false;
case CreateElement:
case CreateElementZeroRefCount:
return true;
}
ASSERT_NOT_REACHED();
return false;
}
这里在执行selectors的时候,将
CreateText
过滤掉了。
最终解决方案:
修改qwebelement.cpp
QString QWebElement::toPlainText() const
{
/************modified by alex
if (!m_element || !m_element->isHTMLElement())
return QString();
return static_cast<HTMLElement*>(m_element)->innerText();
*/
if (!m_element)
return QString();
if (m_element->isHTMLElement())
return static_cast<HTMLElement*>(m_element)->innerText();
return m_element->textContent(true);
}
修改SelectorNodeList.cpp
for (Node* n = rootNode->firstChild(); n; n = n->traverseNextNode(rootNode)) {
if (n->isElementNode()) {
Element* element = static_cast<Element*>(n);
for (CSSSelector* selector = querySelectorList.first(); selector; selector = CSSSelectorList::next(selector)) {
if (selectorChecker.checkSelector(selector, element)) {
nodes.append(n);
break;
}
}
}
/*add by alex*/
if (n->isTextNode()) {
nodes.append(n);
}
/*end*/
}
make
make install
build and run project
output :
elem is : "#text" / "font text "
补充,20130725
最近遇到需要给text node 赋值的问题, 需要做改动:
修改qwebelement.cpp
void QWebElement::setPlainText(const QString &text)
{
/** alex
if (!m_element || !m_element->isHTMLElement())
return;
ExceptionCode exception = 0;
static_cast<HTMLElement*>(m_element)->setInnerText(text, exception);
*/
if (!m_element)
return;
if (m_element->isHTMLElement()) {
ExceptionCode exception = 0;
static_cast<HTMLElement*>(m_element)->setInnerText(text, exception);
return;
}
ExceptionCode exception = 0;
m_element->setTextContent(text, exception);
}