Vertica 6.x 只支持R、C++编写Vertica的UDF,由于使用内置的String 提供的函数实现不了substring_index功能,且用C++实现
#include <algorithm>
#include <string>
#include "Vertica.h"
using namespace Vertica;
using namespace std;
// 从text中获取第|delim_num|个|delim|前的全部子串
std::string getSubString(const std::string& text, const std::string& delim, int delim_num) {
// 需要0个分隔符
if (delim_num <= 0) {
return std::string();
}
// 查找第n个分隔符
size_t delim_size = delim.size();
size_t next = 0;
do {
next = text.find(delim, next);
if (next != std::string::npos) {
--delim_num;
next+=delim_size;
}
} while (next <= text.size() && delim_num > 0);
return text.substr(0, next - delim_size);
}
/*
*
*/
class SubstringIndex : public ScalarFunction
{
public:
virtual void processBlock(ServerInterface &srvInterface,
BlockReader &arg_reader,
BlockWriter &res_writer) {
if (arg_reader.getNumCols() != 3)
vt_report_error(0, "Function only accept 3 arguments, but %zu provided", arg_reader.getNumCols());
// While we have inputs to process
try{
do {//C++实现UDF特有的(Java不需要),避免出现:ERROR 5400: User-defined Scalar Function substring_index produced fewer output rows (0) than input rows (1)
string src = arg_reader.getStringRef(0).str();
string delim = arg_reader.getStringRef(1).str();
vint occur = arg_reader.getIntRef(2);
int tot = occur;
string result = getSubString(src,delim,tot);
res_writer.getStringRef().copy(result);
res_writer.next();
} while (arg_reader.next());
}catch(...){//这里需要捕捉异常,不然Vertica后台会退出,整个db就stop了,PS:C++的异常太复杂,...表示捕捉所有异常
res_writer.getStringRef().copy("OTHER");
}
}
};
class SubstringIndexFactory : public ScalarFunctionFactory
{
virtual ScalarFunction *createScalarFunction(ServerInterface &interface)
{ return vt_createFuncObj(interface.allocator, SubstringIndex); }
virtual void getPrototype(ServerInterface &interface,
ColumnTypes &argTypes,
ColumnTypes &returnType)
{
argTypes.addVarchar();
argTypes.addVarchar();
argTypes.addInt();
returnType.addVarchar();
}
virtual void getReturnType(ServerInterface &srvInterface,
const SizedColumnTypes &argTypes,
SizedColumnTypes &returnType)
{
const VerticaType &t = argTypes.getColumnType(0);
returnType.addVarchar(t.getStringLength());
}
};
RegisterFactory(SubstringIndexFactory);
dwyyproduct=> select substring_index('a/b/c','/',2);
substring_index
-----------------
a/b
(1 row)
dwyyproduct=> select substring_index('a/b/c','/',3);
substring_index
-----------------
a/b/c
(1 row)
dwyyproduct=> select substring_index('a/b/c','/',4);
substring_index
-----------------
a/b/c
(1 row)
dwyyproduct=> select substring_index('a/b/c','/',0);
substring_index
-----------------
(1 row)
与JAVA实现不同,请注意上述功能代码的注释部分!
编写完cpp,在linux下执行gcc命令
gcc substring_index.cpp Vertica.cpp -I /opt/vertica/sdk/include/ -fPIC -shared -o verticaso.so
/opt/vertica/sdk/include/ 为vertica c++ 的 .h 文件所在位置
verticaso.so 为生成的so文件
使用dbadmin进入vsql,执行
CREATE LIBRARY verticasolib AS '/tmp/verticaso.so' language 'C++';
CREATE FUNCTION substring_index AS LANGUAGE 'C++' NAME 'SubstringIndexFactory' LIBRARY verticasolib;
ALL DONE!