srcTables:
"src", "src1", "srcbucket", "srcbucket2", "src_json", "src_thrift",
"src_sequencefile", "srcpart"
src、src1两表都是两个columns:
src :
create table src(key string, value string);
load data local inpath 'src/data/files/kv1.txt' into table src;
src1:
data/files/kv3.txt
create table src1(key string, value string);
load data local inpath 'src/data/files/kv3.txt' into table src1;
srcbucket:
CREATE TABLE srcbucket(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
LOAD DATA LOCAL INPATH '/home/tianzhao/apache/hive-trunk-snapshot/data/files/srcbucket0.txt' INTO TABLE srcbucket
LOAD DATA LOCAL INPATH '/home/tianzhao/apache/hive-trunk-snapshot/data/files/srcbucket1.txt' INTO TABLE srcbucket
srcbucket2:
"CREATE TABLE srcbucket2(key int, value string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE"
"srcbucket20.txt", "srcbucket21.txt","srcbucket22.txt", "srcbucket23.txt"
src_sequencefile:
data/files/kv1.seq
src_thrift:
data/files/complex.seq
create table src_thrift(aint int, aString string,lint array<int>, lString array<string>, lintString array<struct<myint:int, mString:string, underscore_int:int>>, mStringString map<string, string>) row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' with serdeproperties("serialization.class"="org.apache.hadoop.hive.serde2.thrift.test.Complex", "serialization.format"="org.apache.thrift.protocol.TBinaryProtocol") stored as sequencefile;
load数据是:
hive> load data local inpath 'src/data/files/complex.seq' into table src_thrift;
src_json:
data/files/json.txt
srcpart:
create table srcpart(key string, value string) partitioned by(ds string, hr string);
LOAD DATA LOCAL INPATH 'src/data/files/kv1.txt' OVERWRITE INTO TABLE srcpart PARTITION (ds='2008-04-08',hr='11');
LOAD DATA LOCAL INPATH 'src/data/files/kv1.txt' OVERWRITE INTO TABLE srcpart PARTITION (ds='2008-04-08',hr='12');
LOAD DATA LOCAL INPATH 'src/data/files/kv1.txt' OVERWRITE INTO TABLE srcpart PARTITION (ds='2008-04-09',hr='11');
LOAD DATA LOCAL INPATH 'src/data/files/kv1.txt' OVERWRITE INTO TABLE srcpart PARTITION (ds='2008-04-09',hr='12');
输入 ql/src/test/results/clientpositive
输出 build/ql/test/logs/clientpositive
输入数据:ql/../data/files
build/test/data/warehouse
URL="jdbc:derby:;databaseName=../build/test/junit_metastore_db;create=true" driver="org.apache.derby.jdbc.EmbeddedDriver" userName="APP"
11/06/09 09:24:09 INFO metastore.HiveMetaStore: 0: get_table : db=default tbl=src_sequencefile
11/06/09 09:24:10 INFO metastore.HiveMetaStore: 0: drop_table : db=default tbl=src_sequencefile
11/06/09 09:24:10 INFO metastore.HiveMetaStore: 0: get_table : db=default tbl=src_sequencefile
11/06/09 09:24:10 INFO metastore.warehouse: deleting pfile:/home/tianzhao/apache/hive-trunk-snapshot/build/test/data/warehouse/src_sequencefile
11/06/09 09:24:10 INFO metastore.warehouse: Deleted the diretory pfile:/home/tianzhao/apache/hive-trunk-snapshot/build/test/data/warehouse/src_sequencefile
创建了这些表之后就可以启动hive,在hive的cli里面运行hive-0.6.0/src/ql/src/test/queries下面的testcase里面的HiveSQL了。
例如:
创建src表,加载数据到表src
create table src(key string, value string);
load data local inpath 'src/data/files/kv1.txt' into table src;
运行hive-0.6.0/src/ql/src/test/queries/clientpositive/groupby1.q里面的HiveSQL。
输出的结果可以跟hive-0.6.0/src/ql/src/test/results/clientpositive/groupby1.q.out里面的进行比较。