#退出vsql时执行\q
-- 创建ODM用户资源池,主要做外部表查询
create resource pool pool_rcs MAXMEMORYSIZE '50%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONE PLANNEDCONCURRENCY 12;
-- 创建DW用户资源池,主要做大表加载、关联、汇总
create resource pool pool_name_dw MAXMEMORYSIZE '90%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONE PLANNEDCONCURRENCY AUTO;
-- 创建APP用户资源池,主要做查询、表关联、指标运算
create resource pool pool_name_app MAXMEMORYSIZE '80%' EXECUTIONPARALLELISM AUTO PRIORITY 0 QUEUETIMEOUT NONE PLANNEDCONCURRENCY 24;
-- 创建用户
create user "rcs" identified by 'huawei' resource pool pool_rcs;
-- 创建schema
create schema if not exists name.schema_rcs authorization rcs;
-- 跨用户赋予表查询权限
grant all on 表名 to dev_name_dw
#创建用户资源池
create resource pool pool_rcs maxmemorysize '50%' executionparallelism auto priority 0 queuetimeout none plannedconcurrency 12;
#判断schema是否存在
vsql --echo-all -d name -U oraclename -Atq -w name -c "select count(*) from schemata where upper(schema_name)='RCS_TEST'; " retail_queries.out 2>&1
#判断用户是否存在
vsql --echo-all -d name -U oraclename -Atq -w name -c "select count(*) from users where upper(user_name)='RCS'; " retail_queries.out 2>&1
#删除数据库用户
vsql -d name -U oraclename -Atq -w name -c "drop user if exists rcs cascade;"
#创建用户和schema
vsql -d name -U oraclename -Atq -w name -c "
create user rcs identified by 'huawei' resource pool pool_rcs;
create schema if not exists name.schema_rcs authorization rcs;" --name指数据库实例名, schema_rcs指schema名
#创建数据库的表
vsql -d name -U rcs -Atq -w huawei -c "
drop table if exists schema_rcs.fct_cl_user_serv_d;
drop table if exists schema_rcs.fct_user_info_m;
drop table if exists schema_rcs.fct_tnes_cell_detail_m;
"
vsql -d name -U rcs -Atq -w huawei -c "
create table schema_rcs.FCT_CL_USER_SERV_D(
year_id numeric(4),
month_id numeric(6) not null,
day_id numeric(8) not null,
prov_id numeric(12),
prov_name varchar(20),
city_id numeric(12),
city_name varchar(20),
city_cd varchar(20),
usr_nbr numeric(11),
imsi numeric(15),
rat varchar(20),
click_cnt numeric(10),
duration numeric(22,3),
flux_up numeric(22,3),
flux_down numeric(22,3),
flux_all numeric(22,3))
order by year_id,month_id,day_id,prov_id,prov_name,city_id,city_name,city_cd,usr_nbr
segmented by hash(usr_nbr) all nodes partition by day_id;
--创建表的同时,创建了FCT_CL_USER_SERV_D_super的Projections.
create table schema_rcs.DIM_REGION_INFO(
region_id numeric(10),
region_p_id numeric(10),
region_lvl numeric(1),
region_code varchar(20),
region_name varchar(50),
region_sname varchar(50),
region_desc varchar(254),
is_node numeric(1),
order_num numeric(10),
status_flg numeric(1),
left_up_longitiude numeric(10,6),
left_up_latitude numeric(10,6),
left_down_longitiude numeric(10,6),
left_down_latitude numeric(10,6),
right_up_longitiude numeric(10,6),
right_up_latitude numeric(10,6),
right_down_longitiude numeric(10,6),
right_down_latitude numeric(10,6),
constraint PK_DIM_REGION_INFO primary key (REGION_ID)
)unsegmented all nodes ;
--创建表的同时,创建了DIM_REGION_INFO_node0001和DIM_REGION_INFO_node0002的Projections.
create table schema_rcs.DIM_DETAIL_SVCTYPE(
first_busstype_id numeric(10),
first_busstype_name varchar(128),
second_busstype_id numeric(10),
second_busstype_name varchar(128),
url numeric(10),
if_app numeric(10),
constraint PK_DIM_DETAIL_SVCTYPE primary key (SECOND_BUSSTYPE_ID)
)
unsegmented all nodes;
"
#插入数据库信息
vsql -d name -U rcs -Atq -w huawei -c "
insert into "schema_rcs"."dim_tmn_brand_info"("tmn_brand_id","tmn_brand_name","remark") values (100217,'新脉',null);
insert into "schema_rcs"."dim_tmn_brand_info"("tmn_brand_id","tmn_brand_name","remark") values (100352,'拓展无限',null);
insert into "schema_rcs"."dim_tmn_brand_info"("tmn_brand_id","tmn_brand_name","remark") values (100476,'摩托罗拉[山寨]',null);
insert into "schema_rcs"."dim_tmn_brand_info"("tmn_brand_id","tmn_brand_name","remark") values (100609,'华为',null);
insert into "schema_rcs"."dim_tmn_brand_info"("tmn_brand_id","tmn_brand_name","remark") values (100647,'杂牌',null);
insert into "schema_rcs"."dim_tmn_brand_info"("tmn_brand_id","tmn_brand_name","remark") values (100873,'悍马',null);
commit;
"
#将配置文件中的内容导进去
/home/vertica/dim_cont_web_station文件的内容如下:
1000002|80000001000880||||80000001|80000001||LBE手机安全大师|LBE手机|http://www.lbesec.com|www.lbesec.com||0||1||1|||2012-02-13||
1000012|81600007000213||||81600007|81600007||奇虎网|奇虎网|http://img2.qihoo.com|img2.qihoo.com||0||1||1|||2012-02-13||
1000014|80000004006011||||80000004|80000004||521交换链|521交换|http://www.521lm.com|www.521lm.com||0||1||1|||2012-02-13||
1000016|80000004006012||||80000004|80000004||大联盟广告|大联盟广告|http://www.ubcpm.com|www.ubcpm.com||0||1||1|||2012-02-13||
1000017|80000001000881||||80000001|80000001||手机智能软件站|手机智能软|http://nokiarj.cn|nokiarj.cn||0||1||1|||2012-02-13||
vsql -d name -U oraclename -Atq -w name -c "
copy "schema_rcs"."dim_cont_web_station" from '/home/vertica/dim_cont_web_station' delimiter '|';" --将文件内容以|分割后,导入到"schema_rcs"."dim_cont_web_station"表里面
Vertica 数据库中的Projections说明:
Projections 是vertica的物理存储层
projection由一张或多张表的列组成
每个projection存储的数据相互独立
vertica优化器自动选择最优的projection进行关联处理
列在创建projection的时候,会自动压缩,并可以通过DBD工具,选择最优的压缩方式
ORDER BY 字段不仅可以影响projection的压缩比,在关联处理时,
如果关联字段同时又是两个关联对象的排序字段,vertica会自动采用merge join方式,极大的提升性能.
分布键设置:
对于经常需要关联的字段,建议设置成分布键,通过hash分布的方式,将数据分割到多个节点上
如果两张表的分布键一致,并且通过分布键进行关联,则不会产生网络开销
# 将文件中的内容导入到数据库表中
copy "name_app"."fct_bus_usrcnts" from '/home/oraclename/new_record/fct_bus_usrcnts' delimiter '|';
copy "name_app"."FCT_TMNL_USRCNTS" from '/home/oraclename/new_record/FCT_TMNL_USRCNTS' delimiter '|';
copy "name_app"."FCT_USER_SERV_INFO_M" from '/home/oraclename/new_record/FCT_USER_SERV_INFO_M' delimiter '|';
#查看vertica版本号
oraclename@linux116:/opt/vertica/examples/VMart_Schema> vsql -d name -U oraclename -Atq -w name
name=> select version();
Vertica Analytic Database v6.0.1-4
#使用copy direct生成数据
\set t_pwd `pwd`
\set input_file '''':t_pwd'/Date_Dimension.tbl'''
COPY Date_Dimension FROM :input_file DELIMITER '|' NULL '' DIRECT;
#查询满足条件时,才输出表中信息
#brand_id = 6满足时,exists才存在
select * from DIM_BRAND_INFO where exists (select 1 from DIM_BRAND_INFO where brand_id = 6);
#join测试
create table test1(id integer,name varchar2(30));
create table test2(id integer,age int);
insert into test1 values(1,'test1');
insert into test1 values(2,'test2');
insert into test1 values(3,'test3');
insert into test1 values(4,'test4');
insert into test2 values(1,25);
insert into test2 values(2,26);
insert into test2 values(8,30);
insert into test2 values(9,39);
commit;
#join或inner join配置共有id的内容
name=> select * from name_app.test1 inner join name_app.test2 on name_app.test1.id = name_app.test2.id;
1|test1|1|25
2|test2|2|26
#left join 以左边的表为基准,输出信息
name=> select * from name_app.test1 left join name_app.test2 on name_app.test1.id = name_app.test2.id order by name_app.test1.id;
1|test1|1|25
2|test2|2|26
3|test3||
4|test4||
#left join 以右边的表为基准,输出信息
name=> select * from name_app.test1 right join name_app.test2 on name_app.test1.id = name_app.test2.id order by name_app.test1.id;
||9|39
||8|30
1|test1|1|25
2|test2|2|26
name=> select * from name_app.test1 full join name_app.test2 on name_app.test1.id = name_app.test2.id order by name_app.test1.id;
||9|39
||8|30
1|test1|1|25
2|test2|2|26
3|test3||
4|test4||
Install Vertica Steps
Step1 Check Environment
(1) 安装Linux
(2) 分配swap分区
(3) 为catalog和data分区配置I/O调度
(4) 检查磁盘空间
(5) 检查root是否能成功登录
(6) 设置主机配置和默认时区(环回网卡,子网网卡,禁掉防火墙)
(7) 确保NTP服务正常启动
远程导入vertica数据:
CONNECT TO VERTICA name USER oraclename PASSWORD 'name' ON '10.41.20.116',5433;
export TO VERTICA name.name_dw.tw_re_pm_ucell_perf_ucell_d FROM name_dw1.tw_re_pm_ucell_perf_ucell_d