HDP测试笔记

最新推荐文章于 2022-11-21 10:37:41 发布

LSur_king

最新推荐文章于 2022-11-21 10:37:41 发布

阅读量403

点赞数

分类专栏：工作笔记文章标签： hdfs hadoop linux centos

本文链接：https://blog.csdn.net/LSur_king/article/details/117971091

版权

这篇博客主要探讨了HDP、Phoenix、Hive和Greenplum等大数据存储系统中的分区和压缩技术。作者通过创建不同类型的表，如GP分区、Phoenix分区、Hive的ORC和Parquet文件格式，并对比了不同压缩算法（如SNAPPY、LZO、ZLIB、BZIP2和RLE_TYPE、ZSTD）下的数据存储效率。同时，还进行了HBase与Phoenix的压缩性能测试，以及Greenplum的压缩选项。此外，文中还提到了并发性能测试，包括Hive、Phoenix和Hbase的插入、更新和删除操作的性能表现。

摘要由CSDN通过智能技术生成

【GP分区】
create table catalog_sales_test(
   "id" BIGINT,
       "t1" smallint,
       "t2" integer,
       "t3" bigint,
      "t4" decimal(6,2),
      "t5" numeric(7,3),
       "t6" double precision,
      "t7" varchar(255),
       "t8" char(10),
       "t9" text,
       "t10" time,
       "t11" date,
       "t12" TIMESTAMP
       )distributed by (id)
partition by range("t11")
(
partition p1 start ('2020-01-01') inclusive end ('2020-01-31') exclusive,
partition p2 start ('2020-04-01') inclusive end ('2020-04-30') exclusive,
default partition default_p
);

【Phoenix分区】
create table if not exists test.catalog_sales_test(
id varchar(255) primary key,
column1 tinyint,
column2 smallint,
column3 integer,
column4 bigint,
column5 float,
column6 double,
column7 DECIMAL,
column8 TIMESTAMP,
column9 DATE,
column10 varchar(255)
) SPLIT ON ('2020-04-24','2020-04-25','2020-04-26');

--------------------------------------------------------------------------

【Hive】压缩比测试：无压缩8.940s

create table if not exists test_orc_snappy(
column1 INT,
column2 BIGINT,
column3 STRING,
column4 BIGINT,
column5 FLOAT,
column6 STRING,
column7 STRING,
column8 TIMESTAMP,
column9 STRING,
column10 BOOLEAN
)
row format delimited fields terminated by ','
stored AS orc tblproperties ("orc.compress"="SNAPPY");

create table if not exists test_orc_lzo(
column1 INT,
column2 BIGINT,
column3 STRING,
column4 BIGINT,
column5 FLOAT,
column6 STRING,
column7 STRING,
column8 TIMESTAMP,
column9 STRING,
column10 BOOLEAN
)
row format delimited fields terminated by ','
stored AS orc tblproperties ("orc.compress"="LZO");

create table if not exists test_parquet_zlib(
column1 INT,
column2 BIGINT,
column3 STRING,
column4 BIGINT,
column5 FLOAT,
column6 STRING,
column7 STRING,
column8 TIMESTAMP,
column9 STRING,
column10 BOOLEAN
)
row format delimited fields terminated by ','
stored AS PARQUET tblproperties ("parquet.compress"="ZLIB");

create table if not exists test(
column1 INT,
column2 BIGINT,
column3 STRING,
column4 BIGINT,
column5 FLOAT,
column6 STRING,
column7 STRING,
column8 TIMESTAMP,
column9 STRING,
column10 BOOLEAN
)
row format delimited fields terminated by ','
stored AS textfile;

load data inpath '/exportcsv.csv' into table test_txt;
insert into table test_orc_snappy select * from test_txt;

hadoop fs -du -s -h /warehouse/tablespace/managed/hive/test.db/test_orc_snappy;

hdfs dfs -put /root/exportcsv.csv /exportcsv.csv

数据库：tpcds_text_400,tpcds_bin_orc_400;
表名：store_sales
1151988104
1151988104

---------------------------------------------------------------------------------------

【HBase + Phoenix】压缩比测试：无压缩18.278s

/usr/hdp/3.1.4.0-315/phoenix/bin/sqlline.py host121:2181

java -cp /testCompress/maven_javase-1.0-SNAPSHOT.jar com.test.maven.