正则表达式解析Http请求

正则表达式解析Http请求

1 代码

#pragma once
#include<string>
#include<unordered_map>
#include<regex>
#include<vector>

using namespace std;
//method
const char methodGet[] = "GET";
const char methodPost[] = "POST";
const char methodDelete[] = "DELETE";
const char methodPUT[] = "PUT";
const char methodOption[] = "OPTION";

//enter, newline
const int oneEnter = 2;

class Request
{
public:
	Request(const char* buf,int len,bool& ok)
		:m_buf(buf),
		m_len(len)
	{
		ok=rootParse();
		parseURL();
	}

	~Request()
	{

	}

	string getData()
	{
		return m_data;
	}

	int getDataSize()
	{
		return m_DataSize;
	}

	string getURL()
	{
		return m_URL;
	}

	string getMethod()
	{
		return m_Method;
	}

	string getProtocol()
	{
		return m_Protocol;
	}

	const char* getBuf()
	{
		return m_buf;
	}

	int getLen()
	{
		return m_len;
	}

	string getHostname()
	{
		return m_Hostname;
	}

	string getPath()
	{
		return m_Path;
	}

	string findParameter(string name)
	{
		if (m_Parameters.find(name) != m_Parameters.end())
		{
			return m_Parameters.at(name);
		}
		return "";
	}

	string findHeader(string name)
	{
		if (m_Headers.find(name) != m_Headers.end())
		{
			return m_Headers.at(name);
		}
		return "";
	}

private:

	void parseURL()
	{
		regex findQuestion;
		try
		{
			findQuestion.assign("\\?");
		}
		catch(const std::regex_error& err)
		{
			printf("error regex:%s\n", err.what());
		}	
		//有参数
		if (regex_search(m_URL, findQuestion))
		{
			printf("find parameters \n");
			vector<string> name;
			vector<string> value;
			smatch result;
			regex findStartParameter("\\?(.*?)=(.*?)&");			
			auto ret = regex_search(m_URL, result, findStartParameter);
			//printf("ret%s\n", result.str().c_str());
			printf("first name %s\n", result.str(1).c_str());
			//printf("ret%s\n", result.str(2).c_str());
			name.push_back(result.str(1));			

			regex findParameterName("&(.*?)=");
			//ret = regex_search(m_URL, result, findMiddleParameter);
			std::sregex_iterator iter(m_URL.begin(),m_URL.end(), findParameterName);
			std::sregex_iterator end;				
			for (iter; iter != end;iter++)
			{					
				printf("name:%s\n", (*iter).str(1).c_str());
				name.push_back((*iter).str(1));
			}

			regex findParameterValue("=(.*?)&");
			std::sregex_iterator iter1(m_URL.begin(), m_URL.end(), findParameterValue);
			//std::sregex_iterator end;
			for (iter1; iter1 != end; iter1++)
			{
				printf("value:%s\n", (*iter1).str(1).c_str());
				value.push_back((*iter1).str(1));
			}
			
			string reverseURL=m_URL;
			reverse(reverseURL.begin(), reverseURL.end());
			regex findParameterLastValue("^(.*?)=");
			ret = regex_search(reverseURL, result, findParameterLastValue);			
			printf("last value %s\n", result.str(1).c_str());	
			string lastValue = result.str(1);
			reverse(lastValue.begin(), lastValue.end());
			value.push_back(lastValue);
			
			//
			for (int i = 0; i < name.size(); i++)
			{
				m_Parameters[name[i]] = value[i];
			}
			for (auto parameter : m_Parameters)
			{
				printf("name %s  value %s\n", parameter.first.c_str(), parameter.second.c_str());
			}
		}
		//Hostname
		regex findHostname("(.*?)://(.*?)/");
		smatch result;
		auto ret=regex_search(m_URL,result,findHostname);
		//printf("ret%s\n", result.str(1).c_str());
		printf("Hostname %s\n", result.str(2).c_str());
		m_Hostname = result.str(2);
		//path
		regex findPath("(1\\d{2}|2[0-4]\\d|25[0-5]|[1-9]\\d|[1-9])\\."

			"(1\\d{2}|2[0-4]\\d|25[0-5]|[1-9]\\d|\\d)\\."

			"(1\\d{2}|2[0-4]\\d|25[0-5]|[1-9]\\d|\\d)\\."

			"(1\\d{2}|2[0-4]\\d|25[0-5]|[1-9]\\d|\\d)"			
			"(/.*?)\\?");
		ret = regex_search(m_URL, result, findPath);
		printf("URI path %s\n", result.str(5).c_str());
		m_Path = result.str(5);
	}

	bool rootParse()
	{
		bool ret;
		int num;
		ret=readMethod();
		if (!ret)
		{
			return false;
		}
		ret=readSpace();
		if (!ret)
		{
			return false;
		}
		ret=readUrl();
		if (!ret)
		{
			return false;
		}
		ret=readSpace();
		if (!ret)
		{
			return false;
		}
		ret=readProtocol();
		if (!ret)
		{
			return false;
		}
		num=readEnter();
		if (!num)
		{
			return false;
		}
		num=readHeaders();
		m_DataSize=readData();
		return true;
	}

    int readEnter()
	{
		int flag = 0;
		while (m_buf[m_Pos] == '\r' || m_buf[m_Pos] == '\n')
		{
			flag++;
			m_Pos++;
		}		
		return flag;
	}

	bool readSpace()
	{
		int flag = 0;
		while (m_buf[m_Pos] == '\t' || m_buf[m_Pos] == ' ')
		{
			flag++;
			m_Pos++;
		}
		if (0 == flag)
		{
			return false;
		}
		return true;
	}
	
	bool readMethod()
	{
		//while (!isBlank())
		//{
		//	m_Method.push_back(m_buf[m_Pos]);
		//	m_Pos++;
		//}
		bool ret=readStr(m_Method);
		printf("%s\n", m_Method.c_str());
		return ret;
	}
	
	bool readUrl()
	{
		//while (!isBlank())
		//{
		//	m_URL.push_back(m_buf[m_Pos]);
		//	m_Pos++;
		//}
		bool ret=readStr(m_URL);
		printf("%s\n", m_URL.c_str());
		return ret;
	}

	bool readProtocol()
	{
		//while (!isBlank())
		//{
		//	m_Protocol.push_back(m_buf[m_Pos]);
		//	m_Pos++;
		//}
		bool ret=readStr(m_Protocol);
		printf("%s\n", m_Protocol.c_str());
		return ret;
	}

	int readHeaders()
	{
		int numHeader = 0;
		while (true)
		{
			int num = readHeader();
			numHeader++;
			if (num == oneEnter * 2)
			{
				break;
			}			
		}
		return numHeader;
	}

	int readHeader()
	{
		string name, value;
		//while (!isBlank())
		//{
		//	name.push_back(m_buf[m_Pos]);
		//	m_Pos++;
		//}
		bool ret=readStr(name);
		//
		if (!ret)
		{
			return 0;
		}
		//去掉冒号
		name.erase(name.end() - 1);
		readSpace();
		//while (!isBlank())
		//{
		//	value.push_back(m_buf[m_Pos]);
		//	m_Pos++;
		//}		
		ret=readStr(value);
		if (!ret)
		{
			return 0;
		}
		printf("%s: %s\n", name.c_str(), value.c_str());
		m_Headers.insert(make_pair(name, value));
		return readEnter();
	}

	int readData()
	{
		m_data.assign(m_buf + m_Pos, m_len - m_Pos);
		printf("%s\n", m_data.c_str());
		return m_data.size();
	}

	bool isBlank()
	{
		if (m_buf[m_Pos] == ' ' || m_buf[m_Pos] == '\n' || m_buf[m_Pos] == '\r')
		{
			return true;
		}
		return false;
	}

	//特殊符号
	bool isSpecial()
	{
		auto c = m_buf[m_Pos];
		//printf("special char: %c\n", c);
		if (!((c >= 0 && c <= 31) || (c == 127)))
		{
			return false;
		}
		switch (c)
		{
		case '(': case ')': case '<': case '>': case '@':
		case ',': case ';': case ':': case '\\': case '"':
		case '/': case '[': case ']': case '?': case '=':
		case '{': case '}': case ' ': case '\t':
			return false;
		default:
			printf("special char: %c\n",c);
			return true;
		}
	}

	bool readStr(string& dest)
	{
		while (!isBlank())
		{
			if (isSpecial())
			{
				return false;
			}
			dest.push_back(m_buf[m_Pos]);
			m_Pos++;
		}
	}

	const char* m_buf;
	int m_len;
	int m_Pos;
	string m_Method;
	string m_Protocol;
	unsigned int m_MajorVersion;
	unsigned int m_MinorVersion;
	string m_URL;
	string m_Hostname;
	string m_Path;
	unordered_map<string, string> m_Parameters;
	unordered_map<string, string> m_Headers;
	string m_data;
	int m_DataSize;
};


2 解析

  • 正则表达式特殊符号用作普通符号 ?写成 \?, \也需要一个,因此需要两个\。
  • URL参数解析键与值分开解析,用到了std::sregex_iterator,最后一个值的解析利用?的惰性匹配,先反转URL,再进行匹配,最后结果再进行反转
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值