tpc-ds doris性能测试

基于tpc-ds 和 doris的测试

生成1G数据

./dsdgen -SCALE 1GB -DIR /data8/ryh/doris/apache-doris-be-1.1.5-bin-x86_64/data1G/ -CHILD 2 -TERMINATE

数据导入

使用tpc-ds生成数据后,可以使用以下方式导入

cat customer_demographics.dat | sed 's/.$//' | curl --location-trusted -u root -H "label:customer_demographics" -H "timeout:1200" -H "column_separator:|" -T - http://127.0.0.1:8230/api/test_db/customer_demographics/_stream_load



cat inventory.dat | sed 's/.$//' | curl --location-trusted -u root -H "label:inventory" -H "timeout:1200" -H "column_separator:|" -T - http://127.0.0.1:8230/api/test_db/inventory/_stream_load

建表语句

这里提供一些基于doris格式的tpc-ds建表语句:

CREATE TABLE dbgen_version (
    `dv_version` varchar(16),
    `dv_create_date` DATETIME,
    `dv_create_time` DATETIME,
    `dv_cmdline_args` varchar(200)
) 
DUPLICATE KEY(`dv_version`) 
DISTRIBUTED BY HASH(`dv_version`) BUCKETS 1 
PROPERTIES (
    "replication_num" = "3"
);

CREATE TABLE customer_address (
    `ca_address_sk` INT NOT NULL ,
    `ca_address_id` CHAR(16) NOT NULL ,
    `ca_street_number` CHAR(10) NULL ,
    `ca_street_name` VARCHAR(60) NULL ,
    `ca_street_type` CHAR(15) NULL ,
    `ca_suite_number` CHAR(10) NULL ,
    `ca_city` VARCHAR(60) NULL ,
    `ca_county` VARCHAR(30) NULL ,
    `ca_state` CHAR(2) NULL ,
    `ca_zip` CHAR(10) NULL ,
    `ca_country` VARCHAR(20) NULL ,
    `ca_gmt_offset` DECIMAL(5,2) NULL ,
    `ca_location_type` CHAR(20) NULL
) 
DUPLICATE KEY(`ca_address_sk`) 
DISTRIBUTED BY HASH(`ca_address_sk`) BUCKETS 1 
PROPERTIES (
    "replication_num" = "3"
);

CREATE TABLE customer_demographics (
    `cd_demo_sk` INT NOT NULL,
    `cd_gender` CHAR(1) NULL,
    `cd_marital_status` CHAR(1) NULL,
    `cd_education_status` CHAR(20) NULL,
    `cd_purchase_estimate` INT NULL,
    `cd_credit_rating` CHAR(10) NULL,
    `cd_dep_count` INT NULL,
    `cd_dep_employed_count` INT NULL,
    `cd_dep_college_count` INT NULL
)
DUPLICATE KEY(`cd_demo_sk`)
DISTRIBUTED BY HASH(`cd_demo_sk`) BUCKETS 1
PROPERTIES (
    “replication_num” =3);

CREATE TABLE date_dim (
    `d_date_sk` INT NOT NULL,
    `d_date_id` CHAR(16) NOT NULL,
    `d_date` DATETIME NULL,
    `d_month_seq` INT NULL,
    `d_week_seq` INT NULL,
    `d_quarter_seq` INT NULL,
    `d_year` INT NULL,
    `d_dow` INT NULL,
    `d_moy` INT NULL,
    `d_dom` INT NULL,
    `d_qoy` INT NULL,
    `d_fy_year` INT NULL,
    `d_fy_quarter_seq` INT NULL,
    `d_fy_week_seq` INT NULL,
    `d_day_name` CHAR(9) NULL,
    `d_quarter_name` CHAR(6) NULL,
    `d_holiday` CHAR(1) NULL,
    `d_weekend` CHAR(1) NULL,    
    `d_following_holiday` CHAR(1) NULL,
    `d_first_dom` INT NULL,
    `d_last_dom` INT NULL,
    `d_same_day_ly` INT NULL,
    `d_same_day_lq` INT NULL,
    `d_current_day` CHAR(1) NULL,
    `d_current_week` CHAR(1) NULL,
    `d_current_month` CHAR(1) NULL,
    `d_current_quarter` CHAR(1) NULL,
    `d_current_year` CHAR(1) NULL
)
DUPLICATE KEY(`d_date_id`)
DISTRIBUTED BY HASH(`d_date_sk`) BUCKETS 1
PROPERTIES (
    “replication_num” =3);
CREATE TABLE warehouse (
    `w_warehouse_sk` INT NOT NULL,
    `w_warehouse_id` CHAR(16) NOT NULL,
    `w_warehouse_name` VARCHAR(20) NULL,
    `w_warehouse_sq_ft` INT NULL,
    `w_street_number` CHAR(10) NULL,
    `w_street_name` VARCHAR(60) NULL,
    `w_street_type` CHAR(15) NULL,
    `w_suite_number` CHAR(10) NULL,
    `w_city` VARCHAR(60) NULL,
    `w_county` VARCHAR(30) NULL,
    `w_state` CHAR(2) NULL,
    `w_zip` CHAR(10) NULL,    
    `w_country` VARCHAR(20) NULL,
    `w_gmt_offset` DECIMAL(5,2) NULL
)
DUPLICATE KEY(`w_warehouse_id`)
DISTRIBUTED BY HASH(`w_warehouse_sk`) BUCKETS 1
PROPERTIES (
    “replication_num” =3);

CREATE TABLE ship_mode (
    `sm_ship_mode_sk` INT NOT NULL,
    `sm_ship_mode_id` CHAR(16) NOT NULL,
    `sm_type` CHAR(30) NULL,
    `sm_code` CHAR(10) NULL,
    `sm_carrier` CHAR(20) NULL,
    `sm_contract` CHAR(20) NULL
)
DUPLICATE KEY(`sm_ship_mode_id`)
DISTRIBUTED BY HASH(`sm_ship_mode_sk`) BUCKETS 1
PROPERTIES (
    “replication_num” =3);
CREATE TABLE time_dim (
    `t_time_sk` INT NOT NULL,
    `t_time_id` CHAR(16) NOT NULL,
    `t_time` INT NULL,
    `t_hour` INT NULL,
    `t_minute` INT NULL,
    `t_second` INT NULL,
    `t_am_pm` CHAR(2) NULL,
    `t_shift` CHAR(20) NULL,
    `t_sub_shift` CHAR(20) NULL,
    `t_meal_time` CHAR(20) NULL
)
DUPLICATE KEY(`t_time_id`)
DISTRIBUTED BY HASH(`t_time_sk`) BUCKETS 1
PROPERTIES (
    “replication_num” =3);

CREATE TABLE reason (
    `r_reason_sk` INT NOT NULL,
    `r_reason_id` CHAR(16) NOT NULL,
    `r_reason_desc` CHAR(100) NULL
)
DUPLICATE KEY(`r_reason_id`)
DISTRIBUTED BY HASH(`r_reason_sk`) BUCKETS 1
PROPERTIES (
    “replication_num” =3);

CREATE TABLE income_band (
    `ib_income_band_sk` INT NOT NULL,
    `ib_lower_bound` INT NULL,
    `ib_upper_bound` INT NULL
)
DUPLICATE KEY(`ib_income_band_sk`)
DISTRIBUTED BY HASH(`ib_income_band_sk`) BUCKETS 1
PROPERTIES (
    “replication_num” =3);

查询语句生成

在使用的时候会出现以下报错

ERROR: Substitution'_END' is used before being initialized at line 59 in ../query_templates/query3.tpl

我们需要添加这个在每个 query0.tpl 文件,一共99个文件
在这里插入图片描述

./dsqgen -input ../query_templates/templates.lst -directory ../query_templates -dialect oracle -scale 1GB -OUTPUT_DIR /data/tpc-ds/data
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值