重构Webbench学习http代理请求实现
http proxy代理请求实现方式
https://datatracker.ietf.org/doc/html/rfc7230
https://datatracker.ietf.org/doc/html/rfc7231
https://datatracker.ietf.org/doc/html/draft-luotonen-web-proxy-tunneling-01
- 普通代理,代理服务器充当中间人,客户端–》代理–》服务端
- 隧道代理,基于http的connect方法(2014HTTP/1.1修订版)
普通代理
- 非代理http请求
- 向host建立tcp连接
- start-line:method SP request-target(相对url资源路径) SP HTTP-version CRLF
- host_header:host:hostname
- 例如目标请求url:http://test.com/books.html,request-target为books.html,host header为test.com
- 普通代理
- start-line:method SP request-target(绝对url资源路径) SP HTTP-version CRLF
- host_header:不发送host
- 代理服务器根据绝对url资源路径,转化为相对url资源路径和host,代理完成中间人角色
demo(c++重构封装webbench)
http://home.tiscali.cz/~cz210552/webbench.html
- 普通代理处理demo
- 注意重构的param_http_path,param_http_host方法
// option.h
#ifndef WEBBENCH_OPTION_H_
#define WEBBENCH_OPTION_H_
#include <string>
using std::string;
class WebbenchOption {
public:
WebbenchOption();
~WebbenchOption();
static void usage(void);
void print_option() const;
void param_option(const int argc, char *argv[]);
const string ¶m_http_method() const;
const string ¶m_http_version() const;
const string param_http_path() const;
const string param_http_host() const;
const string build_http_headers() const;
static const string shortopts;
private:
void fix_option();
bool force;
bool force_reload;
int benchtime;
string proxyhost;
int proxyport;
int clients;
int http10;
int method;
string url;
};
#endif
// options.cc
#include "option.h"
#include <iostream>
#include <unordered_map>
#include <getopt.h>
using std::cout;
using std::endl;
using std::ios_base;
// using std::npos;
using std::unordered_map;
#define METHOD_GET 0
#define METHOD_HEAD 1
#define METHOD_OPTIONS 2
#define METHOD_TRACE 3
#define HTTP_09 0
#define HTTP_10 1
#define HTTP_11 2
static unordered_map<int, string> method_map = {{METHOD_GET, "GET"},
{METHOD_HEAD, "HEAD"},
{METHOD_OPTIONS, "OPTIONS"},
{METHOD_TRACE, "TRACE"}};
static unordered_map<int, string> http_version_map = {
{HTTP_09, "HTTP/0.9"}, {HTTP_10, "HTTP/1.0"}, {HTTP_11, "HTTP/1.1"}};
const string WebbenchOption::shortopts = "frt:p:c:h";
WebbenchOption::WebbenchOption() {
force = false;
force_reload = false;
benchtime = 30;
proxyhost = "";
proxyport = 80;
clients = 1;
http10 = HTTP_09;
method = METHOD_GET;
url = "";
}
WebbenchOption::~WebbenchOption() {}
void WebbenchOption::usage(void) {
// 原来的9,1,2代表http09,http10,http11短格式和长格式差异大,强制使用长格式
cout << "webbench [option]... URL" << endl;
cout << " -f|--force Don't wait for reply from server."
<< endl;
cout << " -r|--reload Send reload request - Pragma: no-cache."
<< endl;
cout << " -t|--time <sec> Run benchmark for <sec> seconds. Default "
"30."
<< endl;
cout << " -p|--proxy <server:port> Use proxy server for request." << endl;
cout << " -c|--clients <n> Run <n> HTTP clients at once. Default "
"one."
<< endl;
cout << " --http09 Use HTTP/0.9 style requests." << endl;
cout << " --http10 Use HTTP/1.0 protocol." << endl;
cout << " --http11 Use HTTP/1.1 protocol." << endl;
cout << " --get Use GET request method." << endl;
cout << " --head Use HEAD request method." << endl;
cout << " --options Use OPTIONS request method." << endl;
cout << " --trace Use TRACE request method." << endl;
cout << " -h|--help This information." << endl;
}
void WebbenchOption::print_option() const {
cout.setf(ios_base::boolalpha);
cout << "webbench option:" << endl;
// proxy
cout << "\t--proxy ";
if (proxyhost != "") {
cout << proxyhost << ":" << proxyport;
} else {
cout << "no";
}
cout << endl;
// force
cout << "\t--force early socket close " << force << endl;
// reload
cout << "\t--reload forcing reload " << force_reload << endl;
// http version
cout << "\t--http using " << param_http_version() << endl;
// method
cout << "\t--method " << param_http_method() << endl;
// timeout
cout << "\t--time " << benchtime << endl;
// clients
cout << "\t--clients " << clients << endl;
// url
cout << "\t--url " << url << endl;
}
void WebbenchOption::param_option(const int argc, char *argv[]) {
if (argc == 1) {
usage();
exit(2);
}
// 长命令选项
int opt = 0;
string tmp;
int i;
const struct option longopts[] = {
{"help", no_argument, NULL, 'h'},
{"force", no_argument, NULL, 'f'},
{"reload", no_argument, NULL, 'r'},
{"time", required_argument, NULL, 't'},
{"proxy", required_argument, NULL, 'p'},
{"clients", required_argument, NULL, 'c'},
{"http09", no_argument, &http10, HTTP_09},
{"http10", no_argument, &http10, HTTP_10},
{"http11", no_argument, &http10, HTTP_11},
{"get", no_argument, &method, METHOD_GET},
{"head", no_argument, &method, METHOD_HEAD},
{"options", no_argument, &method, METHOD_OPTIONS},
{"trace", no_argument, &method, METHOD_TRACE},
{NULL, 0, NULL, 0}};
while ((opt = getopt_long(argc, argv, WebbenchOption::shortopts.c_str(),
longopts, NULL)) != EOF) {
switch (opt) {
case 0:
break;
case '?':
break;
case 'f':
force = true;
break;
case 'r':
force_reload = true;
break;
case 't':
benchtime = atoi(optarg);
break;
case 'p':
// server:port
tmp = optarg;
// tmp = strrchr(optarg, ':');
if (tmp == "") {
break;
}
i = tmp.rfind(":");
if (i < 0) {
cout << "Error in option --proxy " << tmp << ": Not valid proxy."
<< endl;
exit(2);
}
if (i == 0) {
cout << "Error in option --proxy " << tmp << ": Missing hostname."
<< endl;
exit(2);
}
if (i == tmp.size() - 1) {
cout << "Error in option --proxy " << tmp << " Port number is missing."
<< endl;
exit(2);
}
proxyhost = tmp.substr(0, i);
proxyport = atoi(tmp.substr(i + 1).c_str());
break;
case 'c':
clients = atoi(optarg);
break;
case 'h':
WebbenchOption::usage();
exit(0);
break;
default:
break;
}
}
// url
if (optind == argc) {
cout << "webbench: Missing URL!" << endl;
usage();
exit(2);
}
url = argv[optind];
i = url.find("://");
if (i < 0) {
cout << url << ": is not a valid URL." << endl;
exit(2);
}
i = url.find("/", url.find("://") + 3);
if (i < 0) {
cout << "Invalid URL syntax - hostname don't ends with '/'." << endl;
exit(2);
}
if (url.size() > 1500) {
cout << "URL is too long." << endl;
exit(2);
}
if (proxyhost == "") {
// 只有http协议可以直连,其他协议要代理支持
if (0 != url.find_first_of("http://")) {
cout << "Only HTTP protocol is directly supported, set --proxy for "
"others."
<< endl;
exit(2);
}
}
// option
cout << "----------cmdline option----------" << endl;
print_option();
fix_option();
// fix option
cout << "--------fix cmdline option--------" << endl;
print_option();
cout << "----------------ok----------------" << endl;
}
void WebbenchOption::fix_option() {
if (clients == 0) {
clients = 1;
}
if (benchtime == 0) {
benchtime = 30;
}
// http/0.9不支持缓冲控制
if (force_reload && proxyhost != "" && http10 < HTTP_10) {
http10 = HTTP_10;
}
// http/0.9只支持get请求
if (method == METHOD_HEAD && http10 < HTTP_10) {
http10 = HTTP_10;
}
if (method == METHOD_OPTIONS && http10 < HTTP_11) {
http10 = HTTP_11;
}
if (method == METHOD_TRACE && http10 < HTTP_11) {
http10 = HTTP_11;
}
if (proxyhost != "") {
http10 = HTTP_11;
}
}
const string &WebbenchOption::param_http_method() const {
return method_map[method];
}
const string &WebbenchOption::param_http_version() const {
return http_version_map[http10];
}
const string WebbenchOption::param_http_path() const {
if (proxyhost == "") {
return url.substr(url.find("/", url.find("://") + 3));
} else {
return url;
}
}
const string WebbenchOption::param_http_host() const {
/* protocol/host delimiter */
if (proxyhost == "") {
// 是否包含端口号
int i = url.find("://") + 3;
int j = url.find(":", i);
if (j >= 0) {
return url.substr(i, j - i);
} else {
int k = url.find("/", i);
return url.substr(i, k - i);
}
} else {
return "";
}
}
// http_headers与options强耦合
const string WebbenchOption::build_http_headers() const {
string request;
// http start-line
request += param_http_method() + " " + param_http_path() + " " +
param_http_version() + "\r\n";
// http headers
if (http10 > 0) {
request += "User-Agent: WebBench\r\n";
}
if (proxyhost == "" && http10 > 0) {
request += "Host: " + param_http_host() + "\r\n";
}
if (force_reload && proxyhost != "") {
request += "Pragma: no-cache\r\n";
}
if (http10 > 1) {
request += "Connection: close\r\n";
}
/* add empty line at end */
if (http10 > 0) {
request += "\r\n";
}
return request;
}
// webbench.cc
/*
* Simple forking WWW Server benchmark
*/
#include "option.h"
#include <iostream>
using std::cout;
using std::endl;
/*
* Return codes:
* 0 - sucess
* 1 - benchmark failed (server is not on-line)
* 2 - bad param
* 3 - internal error, fork failed
*/
int main(int argc, char *argv[]) {
WebbenchOption options;
options.param_option(argc, argv);
cout << options.build_http_headers() << endl;
return 0;
}
// 运行 ./webbench --http11 -p "test.com:8888" -t 60 --trace "https://httpbin.org/get"
// TRACE https://httpbin.org/get HTTP/1.1\r\nUser-Agent: WebBench\r\nConnection: close\r\n\r\n
// 运行 ./webbench --http11 -t 60 --trace "https://httpbin.org/get"
// TRACE /get HTTP/1.1\r\nUser-Agent: WebBench\r\nHost: httpbin.org\r\nConnection: close\r\n\r\n"