【云计算】impala建表，文件关联，查询-CSDN博客

[cll@hadoop164 cxf]$ hdfs dfs -mkdir -p /csv-pig/tab1 /csv-pig/tab1

tab1.csv:

1,true,123.123,2012-10-24 08:55:00 
2,false,1243.5,2012-10-25 13:40:00
3,false,24453.325,2008-08-22 09:33:21.123
4,false,243423.325,2007-05-12 22:32:21.33454
5,true,243.325,1953-04-22 09:11:33

[cll@hadoop164 cxf]$ hdfs dfs -put tab1.csv /csv-pig/tab1

DROP TABLE IF EXISTS tab1;

-- The EXTERNAL clause means the data is located outside the central location for Impala data files
-- and is preserved when the associated Impala table is dropped. We expect the data to already
-- exist in the directory specified by the LOCATION clause.
CREATE EXTERNAL TABLE tab1
(
id INT,
col_1 BOOLEAN,
col_2 DOUBLE,
col_3 TIMESTAMP
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/csv-pig/tab1';

-- 如果分区

PARTITIONED BY (
startdate STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
WITH SERDEPROPERTIES ('serialization.format'='|', 'field.delim'='|')
STORED AS TEXTFILE
LOCATION '/csv-pig/gbiupsdns'

ALTER TABLE expweixin ADD PARTITION (startdate='2014103002');

-- 如果权限

sudo -u hdfs hdfs dfs -chown -R impala:hive /csv-pig/expweixin

impala-shell -i 172.16.230.152 -f table_setup.sql

转载于:https://www.cnblogs.com/illday/p/4045537.html