HDP测试笔记

这篇博客主要探讨了HDP、Phoenix、Hive和Greenplum等大数据存储系统中的分区和压缩技术。作者通过创建不同类型的表,如GP分区、Phoenix分区、Hive的ORC和Parquet文件格式,并对比了不同压缩算法(如SNAPPY、LZO、ZLIB、BZIP2和RLE_TYPE、ZSTD)下的数据存储效率。同时,还进行了HBase与Phoenix的压缩性能测试,以及Greenplum的压缩选项。此外,文中还提到了并发性能测试,包括Hive、Phoenix和Hbase的插入、更新和删除操作的性能表现。
摘要由CSDN通过智能技术生成

【GP分区】
create table catalog_sales_test(
        "id" BIGINT, 
        "t1" smallint,
        "t2" integer,
        "t3" bigint,
         "t4" decimal(6,2),
         "t5" numeric(7,3),
        "t6" double precision,
         "t7" varchar(255),
        "t8" char(10),
          "t9" text,
          "t10" time,
          "t11" date,
        "t12" TIMESTAMP
        )distributed by (id) 
    partition by range("t11") 
    (
        partition p1 start ('2020-01-01') inclusive end ('2020-01-31') exclusive, 
        partition p2 start ('2020-04-01') inclusive end ('2020-04-30') exclusive, 
        default partition default_p
    );
        


【Phoenix分区】
create table if not exists test.catalog_sales_test(
  id varchar(255) primary key,
  column1 tinyint,
  column2 smallint,
  column3 integer,
  column4 bigint,
  column5 float,
  column6 double,
  column7 DECIMAL,
  column8 TIMESTAMP,
  column9 DATE,
  column10 varchar(255)
) SPLIT ON ('2020-04-24','2020-04-25','2020-04-26');


--------------------------------------------------------------------------


【Hive】压缩比测试:  无压缩8.940s

create table if not exists test_orc_snappy(
  column1 INT,
  column2 BIGINT,
  column3 STRING,
  column4 BIGINT,
  column5 FLOAT,
  column6 STRING,
  column7 STRING,
  column8 TIMESTAMP,
  column9 STRING,
  column10 BOOLEAN
)
row format delimited fields terminated by ','
stored AS orc tblproperties ("orc.compress"="SNAPPY");

create table if not exists test_orc_lzo(
  column1 INT,
  column2 BIGINT,
  column3 STRING,
  column4 BIGINT,
  column5 FLOAT,
  column6 STRING,
  column7 STRING,
  column8 TIMESTAMP,
  column9 STRING,
  column10 BOOLEAN
)
row format delimited fields terminated by ','
stored AS orc tblproperties ("orc.compress"="LZO");

create table if not exists test_parquet_zlib(
  column1 INT,
  column2 BIGINT,
  column3 STRING,
  column4 BIGINT,
  column5 FLOAT,
  column6 STRING,
  column7 STRING,
  column8 TIMESTAMP,
  column9 STRING,
  column10 BOOLEAN
)
row format delimited fields terminated by ','
stored AS PARQUET tblproperties ("parquet.compress"="ZLIB");


create table if not exists test(
  column1 INT,
  column2 BIGINT,
  column3 STRING,
  column4 BIGINT,
  column5 FLOAT,
  column6 STRING,
  column7 STRING,
  column8 TIMESTAMP,
  column9 STRING,
  column10 BOOLEAN
)
row format delimited fields terminated by ','
stored AS textfile;


load data inpath '/exportcsv.csv' into table test_txt;
insert into table test_orc_snappy select * from test_txt;

hadoop fs -du -s -h /warehouse/tablespace/managed/hive/test.db/test_orc_snappy;

hdfs dfs -put /root/exportcsv.csv /exportcsv.csv

数据库:tpcds_text_400,tpcds_bin_orc_400;
表名:store_sales
1151988104
1151988104

---------------------------------------------------------------------------------------

【HBase + Phoenix】压缩比测试: 无压缩18.278s

/usr/hdp/3.1.4.0-315/phoenix/bin/sqlline.py host121:2181

java -cp /testCompress/maven_javase-1.0-SNAPSHOT.jar com.test.maven.

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值