解决QtWebKit中QWebElement无法取到TextNode的问题

    QWebPage* page = new QWebPage();
    this->webpage = page->mainFrame();
//    this->webpage->load(url);
    this->webpage->setHtml("<font color='red' style='size:14px;'> text in font . <a href='#'>testlink</font>");
    QWebElementCollection list = this->webpage->findAllElements("*");
    qDebug() << "elements count : " << list.count() << endl;
    QWebElement e;
    for(int i = 0; i < list.count(); i++){
        e = list.at(i);
        displayNode(e);
    }

以上测试代码,会在console打印出 HEAD BODY FONT A。

发现无法取得html dom tree 中的 Text类型的节点。

研读源码:

in QWebElement.h

QWebElementCollection QWebElement::findAll(const QString &selectorQuery) const
{
    return QWebElementCollection(*this, selectorQuery);
}

QWebElementCollection::QWebElementCollection(const QWebElement &contextElement, const QString &query)
{
    d = QExplicitlySharedDataPointer<QWebElementCollectionPrivate>(QWebElementCollectionPrivate::create(contextElement.m_element, query));
}

QWebElementCollectionPrivate* QWebElementCollectionPrivate::create(const PassRefPtr<Node> &context, const QString &query)
{
    if (!context)
        return 0;

    // Let WebKit do the hard work hehehe
    ExceptionCode exception = 0; // ###
    RefPtr<NodeList> nodes = context->querySelectorAll(query, exception);
    if (!nodes)
        return 0;

    QWebElementCollectionPrivate* priv = new QWebElementCollectionPrivate;
    priv->m_result = nodes;
    return priv;
}


webcore/dom/Node.h 对文本类型的节点是有定义的,也就是说,在select的时候,qt将该类型的节点做了过滤处理。

    enum NodeType {
        ELEMENT_NODE = 1,
        ATTRIBUTE_NODE = 2,
        TEXT_NODE = 3,
        CDATA_SECTION_NODE = 4,
        ENTITY_REFERENCE_NODE = 5,
        ENTITY_NODE = 6,
        PROCESSING_INSTRUCTION_NODE = 7,
        COMMENT_NODE = 8,
        DOCUMENT_NODE = 9,
        DOCUMENT_TYPE_NODE = 10,
        DOCUMENT_FRAGMENT_NODE = 11,
        NOTATION_NODE = 12,
        XPATH_NAMESPACE_NODE = 13
    };

Node.cpp

PassRefPtr<NodeList> Node::querySelectorAll(const String& selectors, ExceptionCode& ec)
{
    ................................................

    return createSelectorNodeList(this, querySelectorList);
}


SelectorNodeList.cpp

PassRefPtr<StaticNodeList> createSelectorNodeList(Node* rootNode, const CSSSelectorList& querySelectorList)
{
    Vector<RefPtr<Node> > nodes;
    Document* document = rootNode->document();
    CSSSelector* onlySelector = querySelectorList.hasOneSelector() ? querySelectorList.first() : 0;
    bool strictParsing = !document->inCompatMode();

    CSSStyleSelector::SelectorChecker selectorChecker(document, strictParsing);

    if (strictParsing && rootNode->inDocument() && onlySelector && onlySelector->m_match == CSSSelector::Id && !document->containsMultipleElementsWithId(onlySelector->m_value)) {
        Element* element = document->getElementById(onlySelector->m_value);
        if (element && (rootNode->isDocumentNode() || element->isDescendantOf(rootNode)) && selectorChecker.checkSelector(onlySelector, element))
            nodes.append(element);
    } else {
        for (Node* n = rootNode->firstChild(); n; n = n->traverseNextNode(rootNode)) {
            if (n->isElementNode()) {
                Element* element = static_cast<Element*>(n);
                for (CSSSelector* selector = querySelectorList.first(); selector; selector = CSSSelectorList::next(selector)) {
                    if (selectorChecker.checkSelector(selector, element)) {
                        nodes.append(n);
                        break;
                    }
                }
            }
        }
    }
    
    return StaticNodeList::adopt(nodes);
}

最终将问题锁定在

if (n->isElementNode()) {

inline bool Node::isElement(ConstructionType type)
{
    switch (type) {
        case CreateContainer:
        case CreateOther:
        case CreateText:
            return false;
        case CreateElement:
        case CreateElementZeroRefCount:
            return true;
    }
    ASSERT_NOT_REACHED();
    return false;
}

这里在执行selectors的时候,将

CreateText
过滤掉了。


最终解决方案:


修改qwebelement.cpp

QString QWebElement::toPlainText() const
{
/************modified by alex 
    if (!m_element || !m_element->isHTMLElement())
        return QString();
    return static_cast<HTMLElement*>(m_element)->innerText();
*/
    if (!m_element)
        return QString();
    if (m_element->isHTMLElement())
        return static_cast<HTMLElement*>(m_element)->innerText();
    return m_element->textContent(true);    
}

修改SelectorNodeList.cpp

        for (Node* n = rootNode->firstChild(); n; n = n->traverseNextNode(rootNode)) {
            if (n->isElementNode()) {
                Element* element = static_cast<Element*>(n);
                for (CSSSelector* selector = querySelectorList.first(); selector; selector = CSSSelectorList::next(selector)) {
                    if (selectorChecker.checkSelector(selector, element)) {
                        nodes.append(n);
                        break;
                    }   
                }   
            }   
            /*add by alex*/
            if (n->isTextNode()) {
                nodes.append(n);
            }   
            /*end*/
        }

make 

make install

build and run project


output :

elem is :  "#text"  /  "font text "


补充,20130725

最近遇到需要给text node 赋值的问题, 需要做改动:

修改qwebelement.cpp

void QWebElement::setPlainText(const QString &text)
{
/** alex
    if (!m_element || !m_element->isHTMLElement())
        return;
    ExceptionCode exception = 0;
    static_cast<HTMLElement*>(m_element)->setInnerText(text, exception);
*/

    if (!m_element)  
        return;  
    if (m_element->isHTMLElement()) {
        ExceptionCode exception = 0;
        static_cast<HTMLElement*>(m_element)->setInnerText(text, exception);        
        return;
    }
    
    ExceptionCode exception = 0;
    m_element->setTextContent(text, exception); 

}


  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值