1.下载数据集
数据集链接为TPC-H Homepage
在这里填好自己的信息后确认提交,之后会有一个下载链接发送到邮箱里,点击下载即可。
2.生成数据集
将压缩包解压后,打开压缩包下的dbgen文件夹,使用Microsoft Visual Studio打开该目录下的tpch.sln文件。如图所示:
点击生成后,去dbgen下的Debug目录里找到dbgen.exe,将其复制到dbgen目录下,然后右键在该目录打开命令行。(这个1是生成1g容量数据的意思)
./dbgen -vf -s 1
生成成功的话,将会看到以下几个文件
需要注意的事,生成的该文件每一行都会有一个“|”符号,使其无法直接导入到pgsql数据库中,因此我准备了下面的代码,先处理一下这些数据
# Python脚本用于处理.tbl文件,去除每行末尾的'|'字符
# 定义源文件和目标文件的路径
input_file = r"E:\PublicDataset\TPC-H V3.0.1\rawdata\supplier.tbl"
output_file = r"E:\PublicDataset\TPC-H V3.0.1\data\supplier.tbl"
# 读取输入文件并处理每一行
with open(input_file, "r") as f:
lines = f.readlines()
processed_lines = []
for line in lines:
# 去掉行末尾的"|"符号
print(line)
processed_line = line.rstrip("|\n")
print(processed_line)
processed_lines.append(processed_line + "\n")
# 将处理后的结果写入输出文件
with open(output_file, "w") as f:
f.writelines(processed_lines)
# 打印处理后的结果
将处理完成的数据就可以直接导入到pgsql中
3.导入到pgsql
导入的目录自己需要按自己的位置修改一下
CREATE TABLE NATION ( N_NATIONKEY INTEGER NOT NULL,
N_NAME CHAR(25) NOT NULL,
N_REGIONKEY INTEGER NOT NULL,
N_COMMENT VARCHAR(152));
CREATE TABLE REGION ( R_REGIONKEY INTEGER NOT NULL,
R_NAME CHAR(25) NOT NULL,
R_COMMENT VARCHAR(152));
CREATE TABLE PART ( P_PARTKEY INTEGER NOT NULL,
P_NAME VARCHAR(55) NOT NULL,
P_MFGR CHAR(25) NOT NULL,
P_BRAND CHAR(10) NOT NULL,
P_TYPE VARCHAR(25) NOT NULL,
P_SIZE INTEGER NOT NULL,
P_CONTAINER CHAR(10) NOT NULL,
P_RETAILPRICE DECIMAL(15,2) NOT NULL,
P_COMMENT VARCHAR(23) NOT NULL );
CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER NOT NULL,
S_NAME CHAR(25) NOT NULL,
S_ADDRESS VARCHAR(40) NOT NULL,
S_NATIONKEY INTEGER NOT NULL,
S_PHONE CHAR(15) NOT NULL,
S_ACCTBAL DECIMAL(15,2) NOT NULL,
S_COMMENT VARCHAR(101) NOT NULL);
CREATE TABLE PARTSUPP ( PS_PARTKEY INTEGER NOT NULL,
PS_SUPPKEY INTEGER NOT NULL,
PS_AVAILQTY INTEGER NOT NULL,
PS_SUPPLYCOST DECIMAL(15,2) NOT NULL,
PS_COMMENT VARCHAR(199) NOT NULL );
CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL,
C_NAME VARCHAR(25) NOT NULL,
C_ADDRESS VARCHAR(40) NOT NULL,
C_NATIONKEY INTEGER NOT NULL,
C_PHONE CHAR(15) NOT NULL,
C_ACCTBAL DECIMAL(15,2) NOT NULL,
C_MKTSEGMENT CHAR(10) NOT NULL,
C_COMMENT VARCHAR(117) NOT NULL);
CREATE TABLE ORDERS ( O_ORDERKEY INTEGER NOT NULL,
O_CUSTKEY INTEGER NOT NULL,
O_ORDERSTATUS CHAR(1) NOT NULL,
O_TOTALPRICE DECIMAL(15,2) NOT NULL,
O_ORDERDATE DATE NOT NULL,
O_ORDERPRIORITY CHAR(15) NOT NULL,
O_CLERK CHAR(15) NOT NULL,
O_SHIPPRIORITY INTEGER NOT NULL,
O_COMMENT VARCHAR(79) NOT NULL);
CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL,
L_PARTKEY INTEGER NOT NULL,
L_SUPPKEY INTEGER NOT NULL,
L_LINENUMBER INTEGER NOT NULL,
L_QUANTITY DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
L_DISCOUNT DECIMAL(15,2) NOT NULL,
L_TAX DECIMAL(15,2) NOT NULL,
L_RETURNFLAG CHAR(1) NOT NULL,
L_LINESTATUS CHAR(1) NOT NULL,
L_SHIPDATE DATE NOT NULL,
L_COMMITDATE DATE NOT NULL,
L_RECEIPTDATE DATE NOT NULL,
L_SHIPINSTRUCT CHAR(25) NOT NULL,
L_SHIPMODE CHAR(10) NOT NULL,
L_COMMENT VARCHAR(44) NOT NULL);
copy CUSTOMER from 'C:\Program Files\PostgreSQL\9.5\dbgen\customer1.tbl' DELIMITER '|';
copy SUPPLIER from 'C:\Program Files\PostgreSQL\9.5\dbgen\supplier1.tbl' DELIMITER '|';
copy NATION from 'C:\Program Files\PostgreSQL\9.5\dbgen\nation1.tbl' DELIMITER '|';
copy REGION from 'C:\Program Files\PostgreSQL\9.5\dbgen\region1.tbl' DELIMITER '|';
copy PART from 'C:\Program Files\PostgreSQL\9.5\dbgen\part1.tbl' DELIMITER '|';
copy PARTSUPP from 'C:\Program Files\PostgreSQL\9.5\dbgen\partsupp1.tbl' DELIMITER '|';
copy ORDERS from 'C:\Program Files\PostgreSQL\9.5\dbgen\orders1.tbl' DELIMITER '|';
copy LINEITEM from 'C:\Program Files\PostgreSQL\9.5\dbgen\lineitem1.tbl' DELIMITER '|';
为表建立主键和约束
-- For table REGION
ALTER TABLE REGION
ADD PRIMARY KEY (R_REGIONKEY);
-- For table NATION
ALTER TABLE NATION
ADD PRIMARY KEY (N_NATIONKEY);
ALTER TABLE NATION
ADD FOREIGN KEY (N_REGIONKEY) references REGION;
COMMIT WORK;
-- For table PART
ALTER TABLE PART
ADD PRIMARY KEY (P_PARTKEY);
COMMIT WORK;
-- For table SUPPLIER
ALTER TABLE SUPPLIER
ADD PRIMARY KEY (S_SUPPKEY);
ALTER TABLE SUPPLIER
ADD FOREIGN KEY (S_NATIONKEY) references NATION;
COMMIT WORK;
-- For table PARTSUPP
ALTER TABLE PARTSUPP
ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY);
COMMIT WORK;
-- For table CUSTOMER
ALTER TABLE CUSTOMER
ADD PRIMARY KEY (C_CUSTKEY);
ALTER TABLE CUSTOMER
ADD FOREIGN KEY (C_NATIONKEY) references NATION;
COMMIT WORK;
-- For table LINEITEM
ALTER TABLE LINEITEM
ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER);
COMMIT WORK;
-- For table ORDERS
ALTER TABLE ORDERS
ADD PRIMARY KEY (O_ORDERKEY);
COMMIT WORK;
-- For table PARTSUPP
ALTER TABLE PARTSUPP
ADD FOREIGN KEY (PS_SUPPKEY) references SUPPLIER;
COMMIT WORK;
ALTER TABLE PARTSUPP
ADD FOREIGN KEY (PS_PARTKEY) references PART;
COMMIT WORK;
-- For table ORDERS
ALTER TABLE ORDERS
ADD FOREIGN KEY (O_CUSTKEY) references CUSTOMER;
COMMIT WORK;
-- For table LINEITEM
ALTER TABLE LINEITEM
ADD FOREIGN KEY (L_ORDERKEY) references ORDERS;
COMMIT WORK;
ALTER TABLE LINEITEM
ADD FOREIGN KEY (L_PARTKEY,L_SUPPKEY) references PARTSUPP;
COMMIT WORK;