要从下列的html中提取链接,使用qt进行操作
<html>
<body>
<table>
<tr><th>sender</th><th>receiver</th><th>name</th><th>downloaded</th><th>type</th><th>time</th></tr>
<tr><th>sender</th><th>leader</th><th><A href="http://127.0.0.1:8080/getfile?md5=dca45b3f7735051a5eac8c158ca2fdb7">qt.conf</A></th><th>0</th><th>pic</th><th>2014-02-22/12:14:01</th></tr>
<tr><th>sender</th><th>leader</th><th><A href="http://127.0.0.1:8080/getfile?md5=aa803bf8ec4e0a92b74dd6c274acee84">qt中文.conf</A></th><th>0</th><th>pic</th><th>2014-02-22/14:21:41</th></tr>
</table>
</body>
</html>
test.pro
TEMPLATE = app
TARGET = html_parse
QT += xml console
SOURCES += main.cpp
OBJECTS_DIR = .obj
MOC_DIR = .moc
#include <iostream>
#include <fstream>
#include <QFile>
#include <QDomDocument>
#include <QDebug>
#define strcasecmp _stricmp
using namespace std;
void parseElement(const QDomElement &element)
{
qDebug()<<element.tagName();
qDebug()<<element.text();
if(element.tagName() == "A")//检查根结点是否为<A href="XX">
{
QString href=element.toElement().attribute("href");
QString fileName = element.toElement().text();
cout<<href.toStdString()<<endl;
cout<<fileName.toStdString()<<endl;
return ;
}
QDomNode child = element.firstChild();//检查第一个子树
if(!child.isNull())
{
parseElement(child.toElement());
while(!child.nextSibling().isNull())// 如果第二个子树存在继续递归检查
{
parseElement( child.nextSibling().toElement());
child = child.nextSibling(); //child赋值为下一个兄弟结点
}
}
}
int main(int argc, char* argv[])
{
QFile file("C:/Users/ddf/Desktop/ddd.xml");
if (!file.open(QFile::ReadOnly))
{
return -1;
}
QDomDocument doc;
if(!doc.setContent(&file, false))
{
return -1;
}
QDomElement root = doc.documentElement();
parseElement(root);
return 0;
}
中间用到了一个递归函数,用QDomDocument 树进行遍历。