hawk跑TPC-DS
创建文件夹,把TPC工具放入
cd /tpcds/v2.1.0/tools/
./dsdgen -DIR /opt/3t_data -SCALE 3000-parallel 20 -child 20 -TERMINATE N
[root@node2 /]# mkdir tpcds_3t [root@node2 /]# ls bin boot cgroups_test dev etc hadoop home lib lib64 lost+found media mnt opt proc root sbin selinux srv sys tmp tpcds_3t usr var [root@node2 /]# cd tpcds_3t [root@node2 tpcds_3t]# ls DSTools.zip [root@node2 tpcds_3t]# |
解压工具包,进入tools编译
[root@node2 tpcds_3t]# unzip DSTools.zip ----- [root@node2 tpcds_3t]# ls DSTools.zip TPCDSVersion1.3.1 [root@node2 tpcds_3t]# cd TPCDSVersion1.3.1/ [root@node2 TPCDSVersion1.3.1]# ls answer_sets dbgen2 query_templates query_variants specification tools [root@node2 TPCDSVersion1.3.1]# cd tools [root@node2 tools]# make |
[root@node2 tools]# ./dsqgen –help
|
多线程生成数据,后台运行
nohup ./dsdgen -DIR /opt/3t_data -SCALE 3000 -parallel 30 -child 1 -TERMINATE N & |
查看后台进程
Jobs –l |
修改query_template下query1-99模板,在行尾加define _END = "";
#!/bin/bash
COUNTER=1 while [ $COUNTER -lt 100 ] do echo $COUNTER echo "define _END = \"\";">>query$COUNTER.tpl COUNTER=`expr $COUNTER + 1` done |
生成查询语句
./dsqgen -output_dir /opt/tpc_3t_queries/ -input /tpcds_3t/TPCDSVersion1.3.1/query_templates/templates.lst -scale 3000 -dialect ansi -directory /tpcds_3t/TPCDSVersion1.3.1/query_templates -rngseed 05092045000 |
[root@node2 tools]# su gpadmin [gpadmin@node2 tools]$ psql psql (8.2.15) Type "help" for help.
gpadmin=# |
gpadmin=# create database tpcds_3t; CREATE DATABASE gpadmin=# \l List of databases Name | Owner | Encoding | Access privileges -----------+---------+----------+------------------- gpadmin | gpadmin | UTF8 | postgres | gpadmin | UTF8 | template0 | gpadmin | UTF8 | template1 | gpadmin | UTF8 | tpcds | gpadmin | UTF8 | tpch | gpadmin | UTF8 | (6 rows) gpadmin=# \c tpcds You are now connected to database "tpcds" as user "gpadmin". tpcds=# |
生成表
tpcds=# \d List of relations Schema | Name | Type | Owner | Storage --------+-----------------------+-------+---------+------------- public | customer_address | table | gpadmin | append only public | customer_demographics | table | gpadmin | append only public | date_dim | table | gpadmin | append only public | dbgen_version | table | gpadmin | append only public | income_band | table | gpadmin | append only public | inventory | table | gpadmin | append only public | item | table | gpadmin | append only public | promotion | table | gpadmin | append only public | reason | table | gpadmin | append only public | ship_mode | table | gpadmin | append only public | store_returns | table | gpadmin | append only public | store_sales | table | gpadmin | append only public | time_dim | table | gpadmin | append only public | warehouse | table | gpadmin | append only public | web_page | table | gpadmin | append only public | web_site | table | gpadmin | append only (16 rows) |
拷贝yaml文件到数据路径
[root@node2 ds_data]# pwd /opt/ds_data [root@node2 ds_data]# ls –s |
批量修改yaml文件(数据库名、端口号,数据路径,数据文件名等)
[root@node2 ds_data]# sed -i 's/5432/5430/g' *.yaml |
加载表
[root@node2 ds_data]# gpload -f call_center.yaml 2016-05-06 16:14:39|INFO|gpload session started 2016-05-06 16:14:39 2016-05-06 16:14:39|INFO|setting schema 'public' for table 'call_center' 2016-05-06 16:14:39|INFO|started gpfdist -p 8081 -P 8082 -f "data1g/call_center.dat" -t 30 2016-05-06 16:14:46|INFO|running time: 6.75 seconds 2016-05-06 16:14:46|INFO|rows Inserted = 6 2016-05-06 16:14:46|INFO|rows Updated = 0 2016-05-06 16:14:46|INFO|data formatting errors = 0 2016-05-06 16:14:46|INFO|gpload succeeded [root@node2 ds_data]# |
批量加载脚本
#!/bin/bash for f in *.yaml do gpload -f $f done |
加载后查看表大小
select relname, pg_size_pretty(pg_relation_size(relname)) from pg_stat_user_tables where schemaname = 'public' order by pg_relation_size(relname) desc;
|
生成99条sql的日志文件
COUNTER=1 while [ $COUNTER -lt 100 ] do echo $COUNTER touch query$COUNTER.log chown gpadmin query$COUNTER.log COUNTER=`expr $COUNTER + 1` done |
在每一条sql之前加入\timing
[root@node2 query_templates]# sed -i -e '1i\\\timing' query* |
执行sql批处理
time for f in query* do log=${f}".log" echo $log psql -d tpcds -f $f > $log; done |
[gpadmin@node2 query_templates]$ ./sql.sh |
合并测试结果
[root@node2 query_templates]# cat query*.log > 1g_result.log |
运行完成后清除缓存
free –m echo 3 > /proc/sys/vm/drop_caches |
表加载,加载机发送速率约120MB,接收速率约50MB(这样至少要8个小时,为什么不切割加?)