C++ Linux实现HTTPS请求(支持超时设置)

我突发奇想,想用C++写一个爬虫(真该死,为啥我的想法这么丰富呢)于是在收集大量资料后,成功写出了支持HTTP和HTTPS协议请求的代码,并能支持解析请求内容
用的OpenSSL和Socket实现的HTTPS请求

支持库

#include <cstring>
#include <string>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <vector>
#include <unistd.h>
#include <fcntl.h>
#include <chrono>
#include <sys/time.h>
#include <openssl/ssl.h>
#include <openssl/err.h>

如何实现?

首先要设计一个类,用来封装请求。

class HttpsAndHttpRequest
{
private:
    sockaddr_in serverAddr;
    int socketFD = 0;
    int timeOutVal;
 
    struct UrlStructure
    {
        std::string agreement;
        std::string host;
        hostent *ip;
        std::string port;
        std::string path;
        std::string param;
    };

    // 解析url
    UrlStructure urlAnalysis(std::string url);
 
    // https
    SSL_CTX *ctx = NULL;
    SSL *ssl = NULL;
 
    std::string httpResponse;
 
    struct httpProtocol
    {
        std::string allContent;
        std::string option;
        std::string content;
    };
    bool noBlock = true;
    // 解析网页数据
    void analysisHttpProtocol();
    // http请求
    std::string httpRequest(std::string url);
    // https请求
    std::string httpsRequest(std::string url);
 
public:

    bool useHandleProtocol = false;
 
    HttpsAndHttpRequest(int timeoutSeconds = 5, bool noBlockSet = true) : timeOutVal(timeoutSeconds)
    {
        // SSL初始化
        SSL_library_init();
        OpenSSL_add_all_algorithms();
        SSL_load_error_strings();
 
        ctx = SSL_CTX_new(SSLv23_client_method());
        // 设置请求模式
        noBlock = noBlockSet;
    }
 
    std::string ConnectWebsite(const std::string &url, bool analysis = false);
    // 设置
    void setTimeoutSeconds(int time);
 
    ~HttpsAndHttpRequest()
    {
        if (ssl)
        {
            SSL_shutdown(ssl);
            SSL_free(ssl);
            ssl = NULL;
        }
        if (ctx)
            SSL_CTX_free(ctx);
        
    }
 
protected:
    // 提供接口
    std::vector<httpProtocol> protocol;
    virtual void handleProtocol() = 0;
};

首先我们先解析url

HttpsAndHttpRequest::UrlStructure HttpsAndHttpRequest::urlAnalysis(std::string url)
{
    UrlStructure result;
    std::string partText[3] = {"://", "/", "?"};
    int start = 0, end;
 
    std::string host, path;
 
    bool isParam = false;
    bool notPath = false;
 
    for (size_t i = 0; i < 2; i++)
    {
        end = url.find(partText[i], start);
        if (end == std::string::npos)
        {
            if (partText[i + 1] == "/")
                continue;
            end = url.find(partText[i + 1], start);
            if (end == std::string::npos)
            {
                notPath = true;
                break;
            }
 
            if (partText[i + 1] == "?")
                isParam = true;
        }
 
        if (i == 0)
            result.agreement = url.substr(start, end - start);
        else if (i == 1)
            host = url.substr(start, end - start);
        else if (i == 2)
            path = url.substr(start, end - start);
        
        start = end + partText[i].length();
    }
 
    if (start != 0)
    {
        if (notPath)
            host = url.substr(start);
        else
            path = url.substr(start);
    }
    else
    {
        host = url;
    }
 
    start = 0;
    end = host.find(":", start);
    if (end != std::string::npos)
    {
        result.host = host.substr(start, end);
        result.port = host.substr(end + 1);
    }
    else
        result.host = host;
 
    end = path.find("?", start);
    if (isParam)
        result.param = path;
    else if (end != std::string::npos)
    {
        result.path = path.substr(start, end);
        result.param = path.substr(end + 1);
    }
    else
        result.path = path;
 
    result.ip = gethostbyname(result.host.c_str());
 
    return result;
}

要实现HTTPS请求,就要先实现最基本的HTTP请求,这里使用socket实现HTTP请求

std::string HttpsAndHttpRequest::httpRequest(std::string url)
{
    if (socketFD)
        close(socketFD);
 
    UrlStructure urlResource = urlAnalysis(url);
 
    if (urlResource.port.empty())
        urlResource.port = "80";
 
    if (!urlResource.ip)
        return "";
 
    socketFD = socket(AF_INET, SOCK_STREAM, 0);
    if (noBlock)
    {
        int flags = fcntl(socketFD, F_GETFL, 0);
        fcntl(socketFD, F_SETFL, flags | O_NONBLOCK);
    }
    // 设置连接地址
    serverAddr.sin_family = AF_INET;
    serverAddr.sin_port = htons(atoi(urlResource.port.c_str()));
    serverAddr.sin_addr = *(in_addr *)urlResource.ip->h_addr_list[0];
 
    // 构建http请求
    std::string request = "GET /" + urlResource.path + " HTTP/1.1\r\n";
    request += "Host: " + urlResource.host + "\r\n";
    request += "Connection: close\r\n";
    request += "\r\n";
 
    int nRet = connect(socketFD, (sockaddr *)&serverAddr, sizeof(serverAddr));
 
    timeval timeout;
    timeout.tv_sec = timeOutVal;
 
    fd_set wait;
    FD_ZERO(&wait);
    FD_SET(socketFD, &wait);
 
    nRet = select(socketFD + 1, NULL, &wait, NULL, &timeout);
    if (nRet <= 0)
        return "";
 
    // 发送HTTP请求
    if (write(socketFD, request.c_str(), strlen(request.c_str())) < 0)
        return "";
 
    std::string response = "";
    char buffer[1024];
    int len = 0;
 
    timeout.tv_sec = timeOutVal;
    FD_ZERO(&wait);
    FD_SET(socketFD, &wait);
    while (true)
    {
        int ready = select(socketFD + 1, &wait, NULL, NULL, &timeout);
        if (ready > 0)
        {
            if (FD_ISSET(socketFD, &wait))
            {
                len = read(socketFD, buffer, sizeof(buffer));
                if (len > 0)
                    response.append(buffer, len);
                else
                    break;
            }
        }
        else
            break;
    }
    close(socketFD);
    return response;
}

在HTTPS基础上实现HTTPS

std::string HttpsAndHttpRequest::httpsRequest(std::string url)
{
    if (socketFD)
        close(socketFD);
 
    UrlStructure urlResource = urlAnalysis(url);
    if (urlResource.port.empty())
    {
        urlResource.port = "443";
    }
 
    if (!urlResource.ip)
    {
        return "";
    }
 
    socketFD = socket(AF_INET, SOCK_STREAM, 0);
    if (noBlock)
    {
        int flags = fcntl(socketFD, F_GETFL, 0);
        fcntl(socketFD, F_SETFL, flags | O_NONBLOCK);
    }
 
    // 设置连接地址
    serverAddr.sin_family = AF_INET;
    serverAddr.sin_port = htons(atoi(urlResource.port.c_str()));
    serverAddr.sin_addr = *(in_addr *)urlResource.ip->h_addr_list[0];
 
    // 构建http请求
    std::string request = "GET /" + urlResource.path + " HTTP/1.1\r\n";
    request += "Host: " + urlResource.host + "\r\n";
    request += "Connection: close\r\n";
    request += "\r\n";
 
    int nRet = connect(socketFD, (sockaddr *)&serverAddr, sizeof(serverAddr));
 
    timeval timeout;
    timeout.tv_sec = timeOutVal;
 
    fd_set wait;
    FD_ZERO(&wait);
    FD_SET(socketFD, &wait);
 
    nRet = select(socketFD + 1, NULL, &wait, NULL, &timeout);
    if (nRet <= 0)
        return "";
 
    SSL_CTX_set_timeout(ctx, timeOutVal);
    ssl = SSL_new(ctx);
    SSL_set_fd(ssl, socketFD);
 
    timeval startTime;
    gettimeofday(&startTime, NULL);
    timeval nowTime;
    int cutTime = 0;
    while ((nRet = SSL_connect(ssl)) != 1)
    {
        int sslError = SSL_get_error(ssl, nRet);
        if ((sslError != SSL_ERROR_WANT_READ && sslError != SSL_ERROR_WANT_WRITE) || cutTime > timeOutVal)
            return "";
        gettimeofday(&nowTime, NULL);
        cutTime = nowTime.tv_sec - startTime.tv_sec;
        usleep(100000);
    }
 
    if (SSL_write(ssl, request.c_str(), request.size()) <= 0)
    {
        return "";
    }
 
    std::string response = "";
    char buffer[1024];
    int len = 0;
 
    FD_ZERO(&wait);
    FD_SET(socketFD, &wait);
 
    while (true)
    {
        int ready = select(socketFD + 1, &wait, NULL, NULL, &timeout);
        if (ready > 0)
        {
            if (FD_ISSET(socketFD, &wait))
            {
                len = SSL_read(ssl, buffer, sizeof(buffer));
                if (len > 0)
                    response.append(buffer, len);
                else
                    break;
            }
        }
        else
            break;
    }
    return response;
}

然后就是解析返回的网页数据,设置超时时间

void HttpsAndHttpRequest::analysisHttpProtocol()
{
    if (httpResponse.empty())
        return;
    if (protocol.size() > 0)
    {
        protocol.clear();
        protocol.shrink_to_fit();
    }
    size_t start, end;
    std::string value = "";
    start = 0;
    do
    {
        end = httpResponse.find("\r\n", start);
        value = httpResponse.substr(start, end - start);
        if (value == "")
        {
            start = end + 2;
            value = httpResponse.substr(start);
            protocol.push_back({"request-date : ...", "request-date", value});
            break;
        }
        std::string option = "";
        std::string content = "";
        int tempStart = 0, tempEnd;
        if (start == 0)
        {
            tempStart = value.find(" ", tempStart) + 1;
            option = "request-result";
            tempEnd = value.find(" ", tempStart);
            content = value.substr(tempStart, tempEnd - tempStart);
        }
        else
        {
            tempEnd = value.find(":", tempStart);
            option = value.substr(tempStart, tempEnd - tempStart);
            tempStart = tempEnd + 1;
            content = value.substr(tempStart);
        }
        protocol.push_back({value, option, content});
        start = end + 2;
    } while (start > end);
    useHandleProtocol = false;
}
 
void HttpsAndHttpRequest::setTimeoutSeconds(int time)
{
    timeOutVal = time;
}

最后在做个汇总

std::string HttpsAndHttpRequest::ConnectWebsite(const std::string &url, bool analysis)
{
    UrlStructure urlResource = urlAnalysis(url);
    bool NoAgreement = false;
    bool existPort = true;
    if (urlResource.agreement != "https" || urlResource.agreement != "http")
    {
        urlResource.agreement = "http";
        NoAgreement = true;
    }
 
    if (urlResource.port == "")
    {
        existPort = false;
        if (urlResource.agreement == "https")
            urlResource.port = "443";
        else
            urlResource.port = "80";
    }
    std::string urlContent = urlResource.agreement + "://" + urlResource.host + ":" + urlResource.port + "/" + urlResource.path + "?" + urlResource.param;
 
    std::string result = "";
 
    if (NoAgreement)
    {
        result = httpsRequest(urlContent);
        if (result.empty())
        {
            if (existPort)
                urlContent = "http://" + urlResource.host + ":80" + "/" + urlResource.path + "?" + urlResource.param;
 
            result = httpRequest(urlContent);
        }
    }
    else if (urlResource.agreement == "https")
        result = httpsRequest(urlContent);
    else
        result = httpRequest(urlContent);
 
    if (analysis)
    {
        httpResponse = result;
        analysisHttpProtocol();
    }
 
    return result;
}

以上就完成了一个封装好的HTTPS请求

示例

//上面的类和函数

class HttpsAndHttpRequests : public HttpsAndHttpRequest
{
public:
    //重写那个虚函数
    void handleProtocol()
    {
        //可以进行处理
    }
};
 
int main(){
    HttpsAndHttpRequests request
    std::string response = request.ConnectWebsite("https://sober-up.cn/");
    std::cout << response << std::endl;
}

总结

这个代码还有很多可以优化的地方,为啥不优化一下?
作者能力有限,不会优化(懒不想优化,目前够用)
有不懂的地方在评论区提出,不想写注释。

具体函数介绍:C++ Linux实现HTTPS请求(支持超时设置) - 晨醒云烟 (sober-up.cn)

  • 10
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

STHUDY

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值