取字符串中的<>内的数据,但是还是怎样不包含<还是不会啊
// ConsoleApplication1.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include <iostream>
#include <sstream>
#include <string>
#include <regex>
#include <Windows.h>
using namespace std;
template<typename LT, typename E>
int Extraction(std::basic_string<E> src, E extFlagBegin, E extFlagEnd, LT& extData, bool isIncludeFlag=false)
{
int rtn=0;
std::basic_string<E> temp;
bool isStart = false;
for(auto it=src.begin(); it!=src.end(); ++it)
{
//已开始提取
if(isStart)
{
//提取结束
if(*it==extFlagEnd)
{
isStart = false;
//包含标识符
if(isIncludeFlag)
{
temp.push_back(*it);
}
extData.push_back(temp);
temp.clear();
rtn++;
continue;
}
temp.push_back(*it);
}
//未开始提取
else
{
//提取开始
if(*it==extFlagBegin)
{
//包含标识符
if(isIncludeFlag)
{
temp.push_back(*it);
}
isStart = true;
}
}
}
return rtn;
}
template<typename LT, typename E>
int ExtractionRegex(std::basic_string<E> src, E extFlagBegin, E extFlagEnd, LT& extData, bool isIncludeFlag=false)
{
basic_stringstream<E, char_traits<E>, allocator<E> > ss;
ss<< extFlagBegin <<"[^" << extFlagEnd << "]*" << extFlagEnd;
std::regex pattern(ss.str());
std::sregex_token_iterator end;
int count=0;
for (std::sregex_token_iterator it(src.begin(),src.end(), pattern); it != end ; ++it)
{
std::basic_string<E> temp = *it;
//不包含标识符
if(!isIncludeFlag)
{
temp.erase(0, 1);
temp.erase(temp.size()-1, 1);
}
extData.push_back(temp);
count++;
}
return count;
}
int _tmain(int argc, _TCHAR* argv[])
{
std::string str = "jay@so<hj>u<>t<hr>idg<evnnnnnni av>deo@com";
std::vector<string> vt;
vt.clear();
ExtractionRegex(str,'<','>',vt);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
ExtractionRegex(str,'<','>',vt,true);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
Extraction(str,'<','>',vt);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
Extraction(str,'<','>',vt,true);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
int ts = 1000;
clock_t start, finish;
start = clock();
for(int i=0; i<ts; i++)
{
vt.clear();
ExtractionRegex(str,'<','>',vt);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
ExtractionRegex(str,'<','>',vt,true);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
}
finish = clock();
std::cout << (double)(finish - start) / CLOCKS_PER_SEC << endl;
vt.clear();
start = clock();
for(int i=0; i<ts; i++)
{
vt.clear();
Extraction(str,'<','>',vt);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
Extraction(str,'<','>',vt,true);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
}
finish = clock();
std::cout << (double)(finish - start) / CLOCKS_PER_SEC << endl;
system("pause");
return 0;
}
//
#include "stdafx.h"
#include <iostream>
#include <sstream>
#include <string>
#include <regex>
#include <Windows.h>
using namespace std;
template<typename LT, typename E>
int Extraction(std::basic_string<E> src, E extFlagBegin, E extFlagEnd, LT& extData, bool isIncludeFlag=false)
{
int rtn=0;
std::basic_string<E> temp;
bool isStart = false;
for(auto it=src.begin(); it!=src.end(); ++it)
{
//已开始提取
if(isStart)
{
//提取结束
if(*it==extFlagEnd)
{
isStart = false;
//包含标识符
if(isIncludeFlag)
{
temp.push_back(*it);
}
extData.push_back(temp);
temp.clear();
rtn++;
continue;
}
temp.push_back(*it);
}
//未开始提取
else
{
//提取开始
if(*it==extFlagBegin)
{
//包含标识符
if(isIncludeFlag)
{
temp.push_back(*it);
}
isStart = true;
}
}
}
return rtn;
}
template<typename LT, typename E>
int ExtractionRegex(std::basic_string<E> src, E extFlagBegin, E extFlagEnd, LT& extData, bool isIncludeFlag=false)
{
basic_stringstream<E, char_traits<E>, allocator<E> > ss;
ss<< extFlagBegin <<"[^" << extFlagEnd << "]*" << extFlagEnd;
std::regex pattern(ss.str());
std::sregex_token_iterator end;
int count=0;
for (std::sregex_token_iterator it(src.begin(),src.end(), pattern); it != end ; ++it)
{
std::basic_string<E> temp = *it;
//不包含标识符
if(!isIncludeFlag)
{
temp.erase(0, 1);
temp.erase(temp.size()-1, 1);
}
extData.push_back(temp);
count++;
}
return count;
}
int _tmain(int argc, _TCHAR* argv[])
{
std::string str = "jay@so<hj>u<>t<hr>idg<evnnnnnni av>deo@com";
std::vector<string> vt;
vt.clear();
ExtractionRegex(str,'<','>',vt);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
ExtractionRegex(str,'<','>',vt,true);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
Extraction(str,'<','>',vt);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
Extraction(str,'<','>',vt,true);
std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
int ts = 1000;
clock_t start, finish;
start = clock();
for(int i=0; i<ts; i++)
{
vt.clear();
ExtractionRegex(str,'<','>',vt);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
ExtractionRegex(str,'<','>',vt,true);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
}
finish = clock();
std::cout << (double)(finish - start) / CLOCKS_PER_SEC << endl;
vt.clear();
start = clock();
for(int i=0; i<ts; i++)
{
vt.clear();
Extraction(str,'<','>',vt);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
vt.clear();
Extraction(str,'<','>',vt,true);
//std::for_each(vt.begin(),vt.end(),[](std::string& d){cout<<d<<endl;});
}
finish = clock();
std::cout << (double)(finish - start) / CLOCKS_PER_SEC << endl;
system("pause");
return 0;
}
提取数据后,用copy之类的截取不要部分?效率略低,但是只要这不是瓶颈代码就没问题。。。
-
baoming9999: 回复 森哥先森 :互相讨论吧,用户的每一个操作,花费的时间不应该超过1.5秒,最好0.8秒内,1000条的数据处理还是很常见的,还要有别的处理,所以对多条数据处理时,每一个函数都要斟酌下了
-
使用pcre 带不带分隔符各1000条 0.062秒
// ConsoleApplication1.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include <iostream>
#include <sstream>
#include <string>
#include <regex>
#include <iostream>
#include <sstream>
#include <string>
#include <regex>
#include <Windows.h>
#define PCRE_STATIC
#include "pcre.h"
#pragma comment (lib, "pcre.lib")
#pragma comment (lib, "pcreposix.lib")
#include "pcre.h"
#pragma comment (lib, "pcre.lib")
#pragma comment (lib, "pcreposix.lib")
using namespace std;
class WXRegex
{
{
#define OVECCOUNT 30 /* should be a multiple of 3 */
pcre *re;
public:
WXRegex() : re(nullptr)
{
if(re != nullptr)
{
free(re);
}
}
~WXRegex()
{
}
void Compile(const char* pattern="(?<=<)[^>]*(?=>)")
{
int erroffset;
const char *error;
WXRegex() : re(nullptr)
{
if(re != nullptr)
{
free(re);
}
}
~WXRegex()
{
}
void Compile(const char* pattern="(?<=<)[^>]*(?=>)")
{
int erroffset;
const char *error;
//释放以前的
if(re != nullptr)
{
free(re);
}
if(re != nullptr)
{
free(re);
}
re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
if (re == NULL)
{
throw exception(error);
}
}
int Extraction(const char* src, std::vector<string>& out)
{
int count= 0;
if (re == NULL)
{
throw exception(error);
}
}
int Extraction(const char* src, std::vector<string>& out)
{
int count= 0;
int exec_offset = 0;
int rc=0;
int ovector[OVECCOUNT];
const char *captured_string;
int rc=0;
int ovector[OVECCOUNT];
const char *captured_string;
do {
rc = pcre_exec(re, NULL, src, strlen(src), exec_offset, 0, ovector, OVECCOUNT);
if (rc < 0)
{
break;
}
for (int i = 0; i < rc; i++)
{
pcre_get_substring( src, ovector, rc, 0, &captured_string );
out.push_back(captured_string);
}
rc = pcre_exec(re, NULL, src, strlen(src), exec_offset, 0, ovector, OVECCOUNT);
if (rc < 0)
{
break;
}
for (int i = 0; i < rc; i++)
{
pcre_get_substring( src, ovector, rc, 0, &captured_string );
out.push_back(captured_string);
}
exec_offset = ovector[1];
count++;
} while ( rc > 0 );
return count;
}
};
}
};
int _tmain(int argc, _TCHAR* argv[])
{
std::string str = "<sd>< jay@>so<hj>ut<h在r>i在工城某工dg<evnnn厅nnni av>deo@com";
WXRegex wxr;
wxr.Compile("<[^>]*>");
std::vector<string> out;
{
std::string str = "<sd>< jay@>so<hj>ut<h在r>i在工城某工dg<evnnn厅nnni av>deo@com";
WXRegex wxr;
wxr.Compile("<[^>]*>");
std::vector<string> out;
int /*c = wxr.Extraction(str.c_str(), out);
for_each(out.begin(),out.end(), [](string& c){ std::cout << c <<endl;});
out.clear();*/
c = wxr.Extraction(str.c_str(), out);
for_each(out.begin(),out.end(), [](string& c){ std::cout << c <<endl;});
for_each(out.begin(),out.end(), [](string& c){ std::cout << c <<endl;});
out.clear();*/
c = wxr.Extraction(str.c_str(), out);
for_each(out.begin(),out.end(), [](string& c){ std::cout << c <<endl;});
clock_t start, finish;
start = clock();
for(int i=0;i<1000;i++)
{
out.clear();
c = wxr.Extraction(str.c_str(), out);
}
wxr.Compile("(?<=<)[^>]*(?=>)");
for(int i=0;i<1000;i++)
{
out.clear();
c = wxr.Extraction(str.c_str(), out);
}
out.clear();
c = wxr.Extraction(str.c_str(), out);
for_each(out.begin(),out.end(), [](string& c){ std::cout << c <<endl;});
start = clock();
for(int i=0;i<1000;i++)
{
out.clear();
c = wxr.Extraction(str.c_str(), out);
}
wxr.Compile("(?<=<)[^>]*(?=>)");
for(int i=0;i<1000;i++)
{
out.clear();
c = wxr.Extraction(str.c_str(), out);
}
out.clear();
c = wxr.Extraction(str.c_str(), out);
for_each(out.begin(),out.end(), [](string& c){ std::cout << c <<endl;});
finish = clock();
std::cout << (double)(finish - start) / CLOCKS_PER_SEC << endl;
std::cout << (double)(finish - start) / CLOCKS_PER_SEC << endl;
system("pause");
return 0;
}
}