爬取的目标网页
网页源码
运行示例
项目结构
源代码
robotsandre.h
#ifndef ROBOTSANDRE_H
#define ROBOTSANDRE_H
#endif // ROBOTSANDRE_H
#include<QCoreApplication>
#include<QRegularExpression>
#include<QRegularExpressionMatch>
#include<QRegularExpressionMatchIterator>
#include<QString>
#include<QDebug>
#include <QCoreApplication>
#include<QtCore>
#include<QNetworkAccessManager>
#include<QUrl>
#include<QNetworkRequest>
#include<QNetworkReply>
#include<QObject>
QString Robots(QString url);//爬取页面
QString RegularExpression(QString HTML,QString re);//正则提取爬到的页面
robotsandre.cpp
#include "robotsandre.h"
QString Robots(QString url)//爬取页面
{
QUrl URL=url;
QNetworkAccessManager manager;
QEventLoop Loop;
QNetworkReply *reply=manager.get(QNetworkRequest(URL));
QObject::connect(reply,SIGNAL(finished()),&Loop,SLOT(quit()));
Loop.exec();
QString HtmlText=reply->readAll();
return HtmlText;
}
QString RegularExpression(QString HTML,QString re)//正则提取爬到的页面正文
{
QString TextAfterRe=" ";
QRegularExpression Re(re);
QRegularExpressionMatchIterator Matchs=Re.globalMatch(HTML);
QRegularExpressionMatch match=Matchs.next();
TextAfterRe=TextAfterRe+match.captured("bt");//标题
TextAfterRe=TextAfterRe+"\n"+"\n";
while(Matchs.hasNext()==true)
{
match=Matchs.next();
QString temp=match.captured("zw");
TextAfterRe=TextAfterRe+temp;
}
return TextAfterRe;
}
提示:请勿将此文章内容用于非法用途,此文仅供学习参考.