Qt实践| HTTP知识点-Qt抓CSDN博文及防CSDN反爬虫(思路+代码)

583 篇文章 127 订阅
39 篇文章 1 订阅

目录

 

 

Fidder分析HTTP数据包

Qt代码思路


 

Fidder分析HTTP数据包

首先使用Fidder抓包,可以看见当访问CSDN博客的时候,会有2个请求:

第一个请求的响应如下:

这里我先说明一个地方,就是这个body采用了gzip压缩,他的文件头为1F 8B 08,通过解压就可以得到这个文件。

这里是一个很重要的地方,在后面将会单独拿个博文来将,本次博文,只要记录下cookie和添加一个头部,就能实现功能,这个Qt调用JavaScript脚本,以及解压gzip将会在下个博文给出!

从中我们可以看到,在脚本里面有个reload,这样还可以看到他设置了cookie,这个是关键!!

下面来看下第二个包。

从这里可以看到他被设置了好多cookie,都是之前的页面设置的。并且还加了一个头部Cache-Control: max-age=0,这个说明浏览器不拿缓存,需要服务器请求,这样就抓到了。

 

 

Qt代码思路

所以可以把这个cookie设置上去(这个cookie是有时效的从上面的max-age可以看出来)。

比如下面的代码!

#include <QCoreApplication>
#include <QSslConfiguration>
#include <QFile>
#include <QNetworkReply>
#include <QNetworkRequest>
#include <QUrl>
#include <windows.h>
#include <QNetworkAccessManager>
#include <QEventLoop>
#include <QDebug>
#include <QObject>

int main(int argc, char *argv[])
{
    QCoreApplication a(argc, argv);

    QNetworkAccessManager *manager = new QNetworkAccessManager;
    QNetworkRequest *request = new QNetworkRequest;
    int count = 0;
    while(count < 100){

        QSslConfiguration config;
        QSslConfiguration conf = request->sslConfiguration();
        conf.setPeerVerifyMode(QSslSocket::VerifyNone);
        conf.setProtocol(QSsl::TlsV1SslV3);
        request->setSslConfiguration(conf);

        request->setUrl(QUrl("https://blog.csdn.net/qq78442761/"));
        request->setRawHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0");
        request->setRawHeader("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
        request->setRawHeader("Content-Type", "text/html; charset=UTF-8");
        request->setRawHeader("Vary", "Accept-Encoding");
        request->setRawHeader("Accept-Encoding", "deflate");
        request->setRawHeader("Cookie", "acw_tc=2760825315663094974916102ecc820ef9e0c8e6eb333e6389414ce3be50de; acw_sc__v3=5d5bfc79f10274ca8314102ce6399cc234a707c4; uuid_tt_dd=10_37194620530-1566309498145-537548; dc_session_id=10_1566309498145.279932; acw_sc__v2=5d5bfc7965ec27c1205990c1d07e1686fe0cc5ec; dc_tos=pwjg7j; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1566297492,1566298161,1566309532,1566309584; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1566309584; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_37194620530-1566309498145-537548!1788*1*PC_VC; c-login-auto=1");
        request->setRawHeader("Cache-Control", "max-age=0");

        QNetworkReply *reply = manager->get(*request);

        QEventLoop loop;
        QObject::connect(reply, SIGNAL(finished()), &loop, SLOT(quit()));
        loop.exec();


        qDebug() << "over! The size is " << reply->size();

        //if(reply->size() < 1024 * 18){

          //  delete request;
          //  delete manager;
          //  manager = new QNetworkAccessManager;
          //  request = new QNetworkRequest;
          //  qDebug() << "new request!";
        //}

        count++;
        Sleep(200);
    }

    delete request;
    delete manager;

    return a.exec();
}

但是会存在一个问题

抓着抓住突然间就抓不了!估计是最后正常的那一次有问题,看看他的包头!

发现这该死的竟然把Cookie给打乱了!

修改下代码!当出现问题时,关闭以前的链接,创建新连接

#include <QCoreApplication>
#include <QSslConfiguration>
#include <QFile>
#include <QNetworkReply>
#include <QNetworkRequest>
#include <QUrl>
#include <windows.h>
#include <QNetworkAccessManager>
#include <QEventLoop>
#include <QDebug>
#include <QObject>

int main(int argc, char *argv[])
{
    QCoreApplication a(argc, argv);

    QNetworkAccessManager *manager = new QNetworkAccessManager;
    QNetworkRequest *request = new QNetworkRequest;
    int count = 0;
    while(count < 100){

        QSslConfiguration config;
        QSslConfiguration conf = request->sslConfiguration();
        conf.setPeerVerifyMode(QSslSocket::VerifyNone);
        conf.setProtocol(QSsl::TlsV1SslV3);
        request->setSslConfiguration(conf);

        request->setUrl(QUrl("https://blog.csdn.net/qq78442761/"));
        request->setRawHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0");
        request->setRawHeader("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
        request->setRawHeader("Content-Type", "text/html; charset=UTF-8");
        request->setRawHeader("Vary", "Accept-Encoding");
        request->setRawHeader("Accept-Encoding", "deflate");
        request->setRawHeader("Cookie", "acw_tc=2760825315663094974916102ecc820ef9e0c8e6eb333e6389414ce3be50de; acw_sc__v3=5d5bfc79f10274ca8314102ce6399cc234a707c4; uuid_tt_dd=10_37194620530-1566309498145-537548; dc_session_id=10_1566309498145.279932; acw_sc__v2=5d5bfc7965ec27c1205990c1d07e1686fe0cc5ec; dc_tos=pwjg7j; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1566297492,1566298161,1566309532,1566309584; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1566309584; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_37194620530-1566309498145-537548!1788*1*PC_VC; c-login-auto=1");
        request->setRawHeader("Cache-Control", "max-age=0");

        QNetworkReply *reply = manager->get(*request);

        QEventLoop loop;
        QObject::connect(reply, SIGNAL(finished()), &loop, SLOT(quit()));
        loop.exec();

//        QFile file("content.txt");
//        if(!file.open(QIODevice::WriteOnly))
//            return 0;
//        file.write(reply->readAll());
//        file.close();

        qDebug() << "over! The size is " << reply->size();

        if(reply->size() < 1024 * 18){

            delete request;
            delete manager;
            manager = new QNetworkAccessManager;
            request = new QNetworkRequest;
            qDebug() << "new request!";
        }

        count++;
        Sleep(200);
    }
    
    delete request;
    delete manager;

    return a.exec();
}

最后运行截图如下:

来一张程序的图!

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

IT1995

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值