Postgresql15安装插件madlib2.0

madlib2.0简介

Apache MADlib

在这里插入图片描述

madlib使用指南

MADlib: Main Page

madlib安装

Installation Guide - Apache MADlib - Apache Software Foundation

准备

  • cmake 3.5.2及以上版本
  • python3.9
  • postgresql15,源码编译时必须指定 –with-python
  • postgresql插件plpython3u

源码安装cmake3.16.2

cd /opt

wget https://github.com/Kitware/CMake/releases/download/v3.16.2/cmake-3.16.2.tar.gz
tar -zxvf cmake-3.16.2.tar.gz

cd cmake-3.16.2

./configure --prefix=/usr/local/cmake

make -j 4

make install

echo 'export CMAKE_HOME=/usr/local/cmake
export PATH=$CMAKE_HOME/bin:$PATH' >> /etc/profile

source /etc/profile

postgresql15.2源码安装

Centos7.6安装postgresql15

su - postgres
psql etl -U postgres
--指定库建立插件plpython3u
create extension plpython3u;
--创建用户madlib作为超级管理员
create role madlib with superuser login password '000000';
--默认公开由madlib创建的表的查询权限
alter default privileges for role madlib grant select on tables to public;
\q
exit

madlib2.0源码安装

cd /opt

# 上传 apache-madlib-2.0.0-src.tar.gz
# 下载地址:https://dist.apache.org/repos/dist/release/madlib/2.0.0/apache-madlib-2.0.0-src.tar.gz
rz # apache-madlib-2.0.0-src.tar.gz

# 解压
tar -zxvf /opt/apache-madlib-2.0.0-src.tar.gz -C /usr/local/src

# 环境变量
# MADLIB_ROOT : 源码目录
# MADLIB_BUILD_ROOT : 构建目录
echo 'export MADLIB_ROOT=/usr/local/src/apache-madlib-2.0.0-src
export MADLIB_BUILD_ROOT=/usr/local/src/apache-madlib-2.0.0-src/build' >> /etc/profile

source /etc/profile

# 创建build目录
mkdir $MADLIB_ROOT/build

# 切到目录 $MADLIB_BUILD_ROOT 
cd $MADLIB_BUILD_ROOT

# 配置
cmake ..

在这里插入图片描述

# 编译
# 会在线下载一些文件,所以比较慢,注意,不要用并行
make
# 会发现编译报错
/usr/local/src/apache-madlib-2.0.0-src/methods/array_ops/src/pg_gp/array_ops.c:11:24: fatal error: utils/int8.h: No such file or directory
 #include "utils/int8.h"
                        ^
compilation terminated.
make[2]: *** [src/ports/postgres/15/CMakeFiles/madlib_postgresql_15.dir/__/__/__/__/methods/array_ops/src/pg_gp/array_ops.c.o] Error 1
make[1]: *** [src/ports/postgres/15/CMakeFiles/madlib_postgresql_15.dir/all] Error 2
make: *** [all] Error 2

在这里插入图片描述

# 修改源码,将第11行的 #include "utils/int8.h" 删除
sed -i 's@#include "utils/int8.h"@@g' /usr/local/src/apache-madlib-2.0.0-src/methods/array_ops/src/pg_gp/array_ops.c

# 然后重新编译
make

# 又报另外一个错
# 函数调用错误,实际pg15中的pg_md5_hash有4个参数,单数源码中调用只用了3个参数
/usr/local/src/apache-madlib-2.0.0-src/methods/sketch/src/pg_gp/sketch_support.c: In function ‘sketch_md5_bytea’:
/usr/local/src/apache-madlib-2.0.0-src/methods/sketch/src/pg_gp/sketch_support.c:322:9: error: too few arguments to function ‘pg_md5_hash’
         pg_md5_hash(datp, len, outbuf);
         ^
In file included from /usr/local/src/apache-madlib-2.0.0-src/methods/sketch/src/pg_gp/sketch_support.c:46:0:
/usr/local/pgsql/include/server/common/md5.h:29:13: note: declared here
 extern bool pg_md5_hash(const void *buff, size_t len, char *hexsum,
             ^
make[2]: *** [src/ports/postgres/15/CMakeFiles/madlib_postgresql_15.dir/__/__/__/__/methods/sketch/src/pg_gp/sketch_support.c.o] Error 1
make[1]: *** [src/ports/postgres/15/CMakeFiles/madlib_postgresql_15.dir/all] Error 2
make: *** [all] Error 2

在这里插入图片描述

# 定位到321行的else,如下图操作
vi /usr/local/src/apache-madlib-2.0.0-src/methods/sketch/src/pg_gp/sketch_support.c
# 增加
const char *errstr = NULL;
# 修改
pg_md5_hash(datp, len, outbuf, &errstr);

在这里插入图片描述
在这里插入图片描述

# 再次编译,此时已经编译成功
make

在这里插入图片描述

# 将源码目录授权给postgres用户
chown -R postgres.postgres $MADLIB_ROOT

# 切换到postgres用户
su - postgres

# 开始安装
$MADLIB_ROOT/src/bin/madpack -s madlib -c madlib/000000@localhost:5432/etl install --platform postgres
# 报错
m4:/usr/local/src/apache-madlib-2.0.0-src/src/ports/postgres/modules/validation/cross_validation.sql_in:14: cannot open `SQLCommon.m4': No such file or directory
madpack.py: INFO : Installing MADlib:
madpack.py: ERROR : Failed executing /tmp/madlib.Niatpe/madlib_install.sql
madpack.py: ERROR : Check the log at /tmp/madlib.Niatpe/madlib_install.sql.log
madpack.py: INFO : MADlib install unsuccessful.
madpack.py: INFO : All changes are rolled back.
INFO: Log files saved in /tmp/madlib.Niatpe

在这里插入图片描述

# 查找文件 SQLCommon.m4
find $MADLIB_ROOT -name SQLCommon.m4
/usr/local/src/apache-madlib-2.0.0-src/build/src/ports/postgres/15/madpack/SQLCommon.m4

# 切到该目录
cd /usr/local/src/apache-madlib-2.0.0-src/build/src/ports/postgres/15/madpack/

# 重新安装
$MADLIB_ROOT/src/bin/madpack -s madlib -c madlib/000000@localhost:5432/etl install --platform postgres

# 依然报错
madpack.py: INFO : Detected PostgreSQL version 15.2.
madpack.py: INFO : *** Installing MADlib ***
madpack.py: INFO : MADlib tools version    = 2.0.0 (/usr/local/src/apache-madlib-2.0.0-src/src/bin/../madpack/madpack.py)
madpack.py: INFO : MADlib database version = None (host=localhost:5432, db=etl, schema=madlib)
madpack.py: INFO : Testing PL/Python environment...
madpack.py: INFO : > PL/Python environment OK (version: 3.9.17)
madpack.py: INFO : > Preparing objects for the following modules:
madpack.py: INFO : > - bayes
madpack.py: INFO : > - crf
madpack.py: INFO : > - elastic_net
madpack.py: INFO : > - linalg
madpack.py: INFO : > - pmml
madpack.py: INFO : > - prob
madpack.py: INFO : > - svm
madpack.py: INFO : > - tsa
madpack.py: INFO : > - conjugate_gradient
madpack.py: INFO : > - knn
madpack.py: INFO : > - lda
madpack.py: INFO : > - stats
madpack.py: INFO : > - utilities
madpack.py: INFO : > - assoc_rules
madpack.py: INFO : > - convex
madpack.py: INFO : > - dbscan
madpack.py: INFO : > - deep_learning
madpack.py: INFO : > - glm
madpack.py: INFO : > - graph
madpack.py: INFO : > - linear_systems
madpack.py: INFO : > - mxgboost
madpack.py: INFO : > - recursive_partitioning
madpack.py: INFO : > - regress
madpack.py: INFO : > - sample
madpack.py: INFO : > - summary
madpack.py: INFO : > - kmeans
madpack.py: INFO : > - pca
madpack.py: INFO : > - validation
madpack.py: INFO : Installing MADlib:
madpack.py: ERROR : Failed executing /tmp/madlib.QJOwWI/madlib_install.sql
madpack.py: ERROR : Check the log at /tmp/madlib.QJOwWI/madlib_install.sql.log
madpack.py: INFO : MADlib install unsuccessful.
madpack.py: INFO : All changes are rolled back.
INFO: Log files saved in /tmp/madlib.QJOwWI

在这里插入图片描述

# 查看日志
cat /tmp/madlib.QJOwWI/madlib_install.sql.log | tail -4
CREATE OR REPLACE FUNCTION madlib.vcrf_top1_label(mArray DOUBLE PRECISION[], rArray DOUBLE PRECISION[], nlabel int)
returns int[] as 'libmadlib.so' language c strict;
psql:/tmp/madlib.QJOwWI/madlib_install.sql:464: ERROR:  could not access file "libmadlib.so": No such file or directory

在这里插入图片描述

# 查找该文件
find $MADLIB_ROOT -name libmadlib.so
/usr/local/src/apache-madlib-2.0.0-src/build/src/ports/postgres/15/lib/libmadlib.so

# 编辑pg的数据目录下的postgresql.auto.conf文件
vi $PGDATA/postgresql.auto.conf
dynamic_library_path = '/usr/local/src/apache-madlib-2.0.0-src/build/src/ports/postgres/15/lib'

# 重启postgresql
pg_ctl restart -mf

# 再次重新安装
$MADLIB_ROOT/src/bin/madpack -s madlib -c madlib/000000@localhost:5432/etl install --platform postgres

# 依然报错
madpack.py: INFO : Detected PostgreSQL version 15.2.
server signaled
madpack.py: INFO : *** Installing MADlib ***
madpack.py: INFO : MADlib tools version    = 2.0.0 (/usr/local/src/apache-madlib-2.0.0-src/src/bin/../madpack/madpack.py)
madpack.py: INFO : MADlib database version = None (host=localhost:5432, db=etl, schema=madlib)
madpack.py: INFO : Testing PL/Python environment...
madpack.py: INFO : > PL/Python environment OK (version: 3.9.17)
madpack.py: INFO : > Preparing objects for the following modules:
madpack.py: INFO : > - bayes
madpack.py: INFO : > - crf
madpack.py: INFO : > - elastic_net
madpack.py: INFO : > - linalg
madpack.py: INFO : > - pmml
madpack.py: INFO : > - prob
madpack.py: INFO : > - svm
madpack.py: INFO : > - tsa
madpack.py: INFO : > - conjugate_gradient
madpack.py: INFO : > - knn
madpack.py: INFO : > - lda
madpack.py: INFO : > - stats
madpack.py: INFO : > - utilities
madpack.py: INFO : > - assoc_rules
madpack.py: INFO : > - convex
madpack.py: INFO : > - dbscan
madpack.py: INFO : > - deep_learning
madpack.py: INFO : > - glm
madpack.py: INFO : > - graph
madpack.py: INFO : > - linear_systems
madpack.py: INFO : > - mxgboost
madpack.py: INFO : > - recursive_partitioning
madpack.py: INFO : > - regress
madpack.py: INFO : > - sample
madpack.py: INFO : > - summary
madpack.py: INFO : > - kmeans
madpack.py: INFO : > - pca
madpack.py: INFO : > - validation
madpack.py: INFO : Installing MADlib:
madpack.py: ERROR : Failed executing /tmp/madlib.pSbRNZ/madlib_install.sql
madpack.py: ERROR : Check the log at /tmp/madlib.pSbRNZ/madlib_install.sql.log
madpack.py: INFO : MADlib install unsuccessful.
madpack.py: INFO : All changes are rolled back.
INFO: Log files saved in /tmp/madlib.pSbRNZ

在这里插入图片描述

# 查看日志 报错:函数不存在
cat /tmp/madlib.pSbRNZ/madlib_install.sql.log | tail -5
madlib.array_union(anyarray) (
    SFUNC = array_cat,
    STYPE = anyarray
);
psql:/tmp/madlib.pSbRNZ/madlib_install.sql:571: ERROR:  function array_cat(anyarray, anyarray) does not exist

在这里插入图片描述

# 自己建函数 array_cat(anyarray, anyarray)
psql etl -U madlib
CREATE OR REPLACE FUNCTION public.array_cat(anyarray, anyarray)
 RETURNS anyarray
 LANGUAGE plpgsql
 SECURITY DEFINER
AS $function$
begin
    return array_cat($1,$2);
end;
$function$
;
\q

# 再次安装
$MADLIB_ROOT/src/bin/madpack -s madlib -c madlib/000000@localhost:5432/etl install --platform postgres

# 依然报错
madpack.py: INFO : Detected PostgreSQL version 15.2.
madpack.py: INFO : *** Installing MADlib ***
madpack.py: INFO : MADlib tools version    = 2.0.0 (/usr/local/src/apache-madlib-2.0.0-src/src/bin/../madpack/madpack.py)
madpack.py: INFO : MADlib database version = None (host=localhost:5432, db=etl, schema=madlib)
madpack.py: INFO : Testing PL/Python environment...
madpack.py: INFO : > PL/Python environment OK (version: 3.9.17)
madpack.py: INFO : > Preparing objects for the following modules:
madpack.py: INFO : > - bayes
madpack.py: INFO : > - crf
madpack.py: INFO : > - elastic_net
madpack.py: INFO : > - linalg
madpack.py: INFO : > - pmml
madpack.py: INFO : > - prob
madpack.py: INFO : > - svm
madpack.py: INFO : > - tsa
madpack.py: INFO : > - conjugate_gradient
madpack.py: INFO : > - knn
madpack.py: INFO : > - lda
madpack.py: INFO : > - stats
madpack.py: INFO : > - utilities
madpack.py: INFO : > - assoc_rules
madpack.py: INFO : > - convex
madpack.py: INFO : > - dbscan
madpack.py: INFO : > - deep_learning
madpack.py: INFO : > - glm
madpack.py: INFO : > - graph
madpack.py: INFO : > - linear_systems
madpack.py: INFO : > - mxgboost
madpack.py: INFO : > - recursive_partitioning
madpack.py: INFO : > - regress
madpack.py: INFO : > - sample
madpack.py: INFO : > - summary
madpack.py: INFO : > - kmeans
madpack.py: INFO : > - pca
madpack.py: INFO : > - validation
madpack.py: INFO : Installing MADlib:
madpack.py: ERROR : Failed executing /tmp/madlib.IB61Bf/madlib_install.sql
madpack.py: ERROR : Check the log at /tmp/madlib.IB61Bf/madlib_install.sql.log
madpack.py: INFO : MADlib install unsuccessful.
madpack.py: INFO : All changes are rolled back.
INFO: Log files saved in /tmp/madlib.IB61Bf 

在这里插入图片描述

# 查看日志 报错:函数不存在
cat /tmp/madlib.IB61Bf/madlib_install.sql.log | tail -7
CREATE AGGREGATE madlib.agg_array_concat(REAL[]) (
   SFUNC = array_cat,
   PREFUNC = array_cat,
   STYPE = REAL[]
   );
psql:/tmp/madlib.IB61Bf/madlib_install.sql:15416: WARNING:  aggregate attribute "prefunc" not recognized
psql:/tmp/madlib.IB61Bf/madlib_install.sql:15416: ERROR:  function array_cat(real[], real[]) does not exist

在这里插入图片描述

# 自己建函数 array_cat(real[], real[])
psql etl -U madlib
CREATE OR REPLACE FUNCTION public.array_cat(real[], real[])
 RETURNS real[]
 LANGUAGE plpgsql
 SECURITY DEFINER
AS $function$
begin
    return array_cat($1,$2);
end;
$function$
;
\q

# 再次安装
$MADLIB_ROOT/src/bin/madpack -s madlib -c madlib/000000@localhost:5432/etl install --platform postgres

# 依然报错
madpack.py: INFO : Detected PostgreSQL version 15.2.
madpack.py: INFO : *** Installing MADlib ***
madpack.py: INFO : MADlib tools version    = 2.0.0 (/usr/local/src/apache-madlib-2.0.0-src/src/bin/../madpack/madpack.py)
madpack.py: INFO : MADlib database version = None (host=localhost:5432, db=etl, schema=madlib)
madpack.py: INFO : Testing PL/Python environment...
madpack.py: INFO : > PL/Python environment OK (version: 3.9.17)
madpack.py: INFO : > Preparing objects for the following modules:
madpack.py: INFO : > - bayes
madpack.py: INFO : > - crf
madpack.py: INFO : > - elastic_net
madpack.py: INFO : > - linalg
madpack.py: INFO : > - pmml
madpack.py: INFO : > - prob
madpack.py: INFO : > - svm
madpack.py: INFO : > - tsa
madpack.py: INFO : > - conjugate_gradient
madpack.py: INFO : > - knn
madpack.py: INFO : > - lda
madpack.py: INFO : > - stats
madpack.py: INFO : > - utilities
madpack.py: INFO : > - assoc_rules
madpack.py: INFO : > - convex
madpack.py: INFO : > - dbscan
madpack.py: INFO : > - deep_learning
madpack.py: INFO : > - glm
madpack.py: INFO : > - graph
madpack.py: INFO : > - linear_systems
madpack.py: INFO : > - mxgboost
madpack.py: INFO : > - recursive_partitioning
madpack.py: INFO : > - regress
madpack.py: INFO : > - sample
madpack.py: INFO : > - summary
madpack.py: INFO : > - kmeans
madpack.py: INFO : > - pca
madpack.py: INFO : > - validation
madpack.py: INFO : Installing MADlib:
madpack.py: ERROR : Failed executing /tmp/madlib.rjqeeK/madlib_install.sql
madpack.py: ERROR : Check the log at /tmp/madlib.rjqeeK/madlib_install.sql.log
madpack.py: INFO : MADlib install unsuccessful.
madpack.py: INFO : All changes are rolled back.
INFO: Log files saved in /tmp/madlib.rjqeeK

在这里插入图片描述

# 查看日志 报错:函数不存在
cat /tmp/madlib.rjqeeK/madlib_install.sql.log | tail -7
CREATE AGGREGATE madlib.agg_array_concat(SMALLINT[]) (
   SFUNC = array_cat,
   PREFUNC = array_cat,
   STYPE = SMALLINT[]
   );
psql:/tmp/madlib.rjqeeK/madlib_install.sql:15423: WARNING:  aggregate attribute "prefunc" not recognized
psql:/tmp/madlib.rjqeeK/madlib_install.sql:15423: ERROR:  function array_cat(smallint[], smallint[]) does not exist

在这里插入图片描述

# 自己建函数 array_cat(real[], real[])
psql etl -U madlib
CREATE OR REPLACE FUNCTION public.array_cat(smallint[], smallint[])
 RETURNS smallint[]
 LANGUAGE plpgsql
 SECURITY DEFINER
AS $function$
begin
    return array_cat($1,$2);
end;
$function$
;
\q

# 再次安装
$MADLIB_ROOT/src/bin/madpack -s madlib -c madlib/000000@localhost:5432/etl install --platform postgres

# 安装成功
madpack.py: INFO : Detected PostgreSQL version 15.2.
madpack.py: INFO : *** Installing MADlib ***
madpack.py: INFO : MADlib tools version    = 2.0.0 (/usr/local/src/apache-madlib-2.0.0-src/src/bin/../madpack/madpack.py)
madpack.py: INFO : MADlib database version = None (host=localhost:5432, db=etl, schema=madlib)
madpack.py: INFO : Testing PL/Python environment...
madpack.py: INFO : > PL/Python environment OK (version: 3.9.17)
madpack.py: INFO : > Preparing objects for the following modules:
madpack.py: INFO : > - bayes
madpack.py: INFO : > - crf
madpack.py: INFO : > - elastic_net
madpack.py: INFO : > - linalg
madpack.py: INFO : > - pmml
madpack.py: INFO : > - prob
madpack.py: INFO : > - svm
madpack.py: INFO : > - tsa
madpack.py: INFO : > - conjugate_gradient
madpack.py: INFO : > - knn
madpack.py: INFO : > - lda
madpack.py: INFO : > - stats
madpack.py: INFO : > - utilities
madpack.py: INFO : > - assoc_rules
madpack.py: INFO : > - convex
madpack.py: INFO : > - dbscan
madpack.py: INFO : > - deep_learning
madpack.py: INFO : > - glm
madpack.py: INFO : > - graph
madpack.py: INFO : > - linear_systems
madpack.py: INFO : > - mxgboost
madpack.py: INFO : > - recursive_partitioning
madpack.py: INFO : > - regress
madpack.py: INFO : > - sample
madpack.py: INFO : > - summary
madpack.py: INFO : > - kmeans
madpack.py: INFO : > - pca
madpack.py: INFO : > - validation
madpack.py: INFO : Installing MADlib:
madpack.py: INFO : > Created madlib schema
madpack.py: INFO : > Created madlib.MigrationHistory table
madpack.py: INFO : > Wrote version info in MigrationHistory table
madpack.py: INFO : MADlib 2.0.0 installed successfully in madlib schema.

在这里插入图片描述

# 如果依然报错函数错误,也可能是没有函数array_union(pg_catalog.anyarray)
psql etl -U madlib
CREATE OR REPLACE AGGREGATE public.array_union(pg_catalog.anyarray) (
	SFUNC = public.array_cat,
	STYPE = anyarray
);
\q
# 权限
psql etl -U postgres
--回收madlib的超级用户权限
alter role madlib with nosuperuser;
--授权madlib用户madlib模式的所有权限
grant all on schema madlib to madlib;
--授权public用户组madlib模式的使用权限
grant usage on schema madlib to public;
--回收madlib在public模式上函数的执行权限,也就是的函数得显式授权
alter default privileges for role madlib revoke execute on functions from public;
--回收在madlib模式上的public的执行权限,也就是madlib模式下的函数得显式授权才能使用
alter default privileges in schema madlib revoke execute on functions from public;
--默认公开在模式madlib下表的查询权限
alter default privileges in schema madlib grant select on tables to public;

测试

DROP TABLE IF EXISTS test;
CREATE TABLE test(
    id1 INTEGER,
    id2 INTEGER,
    gr1 INTEGER,
    gr2 INTEGER
);
INSERT INTO test VALUES
(1,0,1,1),
(2,0,1,1),
(3,0,1,1),
(4,0,1,1),
(5,0,1,1),
(6,0,1,1),
(7,0,1,1),
(8,0,1,1),
(9,0,1,1),
(9,0,1,1),
(9,0,1,1),
(9,0,1,1),
(0,1,1,2),
(0,2,1,2),
(0,3,1,2),
(0,4,1,2),
(0,5,1,2),
(0,6,1,2),
(10,10,2,2),
(20,20,2,2),
(30,30,2,2),
(40,40,2,2),
(50,50,2,2),
(60,60,2,2),
(70,70,2,2);
DROP TABLE IF EXISTS out;
SELECT madlib.stratified_sample(
    'test',    -- Source table
    'out',     -- Output table
    0.5,       -- Sample proportion
    'gr1,gr2', -- Strata definition
    'id1,id2', -- Columns to output
    false
);    -- Sample without replacement
SELECT * FROM out ORDER BY gr1,gr2,id1,id2;
  • 报错:ERROR: ImportError: cannot import name ‘stratified_sample’ from ‘sample’ (unknown location)
    在这里插入图片描述
  • 查看源码

在这里插入图片描述

# 发现/usr/local/src/apache-madlib-2.0.0-src/src/lib/python目录都不存在
cd /usr/local/src/apache-madlib-2.0.0-src/src/lib/python
-bash: cd: /usr/local/src/apache-madlib-2.0.0-src/src/lib/python: No such file or directory

# 发现不是py文件
ls /usr/local/src/apache-madlib-2.0.0-src/src/modules/sample
random_process.cpp  random_process.hpp  sample.hpp  weighted_sample.cpp  weighted_sample.hpp  WeightedSample_impl.hpp  WeightedSample_proto.hpp

在这里插入图片描述

find / -name python


find $MADLIB_ROOT -name sample
/usr/local/src/apache-madlib-2.0.0-src/build/third_party/src/EP_boost/libs/geometry/doc/src/docutils/tools/doxygen_xml2qbk/sample
/usr/local/src/apache-madlib-2.0.0-src/build/src/ports/greenplum/modules/sample
/usr/local/src/apache-madlib-2.0.0-src/build/src/ports/postgres/15/CMakeFiles/madlib_postgresql_15.dir/__/__/__/modules/sample
/usr/local/src/apache-madlib-2.0.0-src/build/src/ports/postgres/15/modules/sample
/usr/local/src/apache-madlib-2.0.0-src/build/src/ports/postgres/modules/sample
/usr/local/src/apache-madlib-2.0.0-src/src/ports/postgres/modules/sample
/usr/local/src/apache-madlib-2.0.0-src/src/modules/sample


在这里插入图片描述

在这里插入图片描述

su - postgres
mkdir -p /usr/local/src/apache-madlib-2.0.0-src/src/lib
# 创建软连接
ln -s /export/anaconda3/lib/python3.9 /usr/local/src/apache-madlib-2.0.0-src/src/lib/python

# 重命名
mv /usr/local/src/apache-madlib-2.0.0-src/src/modules /usr/local/src/apache-madlib-2.0.0-src/src/modules_bak
# 创建软连接
ln -s /usr/local/src/apache-madlib-2.0.0-src/build/src/ports/postgres/15/modules /usr/local/src/apache-madlib-2.0.0-src/src/modules
  • 重新测试
DROP TABLE IF EXISTS out;
SELECT madlib.stratified_sample(
    'test',    -- Source table
    'out',     -- Output table
    0.5,       -- Sample proportion
    'gr1,gr2', -- Strata definition
    'id1,id2', -- Columns to output
    false
);    -- Sample without replacement

SELECT * FROM out ORDER BY gr1,gr2,id1,id2;

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

sqlboy-yuzhenc

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值