在hadoop端执行:
[cloudera@quickstart ~]$ cat /tmp/pxf_hive_datafile.txt
Prague,Jan,101,4875.33
Rome,Mar,87,1557.39
Bangalore,May,317,8936.99
Beijing,Jul,411,11600.67
San Francisco,Sept,156,6846.34
Paris,Nov,159,7134.56
San Francisco,Jan,113,5397.89
Prague,Dec,333,9894.77
Bangalore,Jul,271,8320.55
Beijing,Dec,100,4248.41
[cloudera@quickstart ~]$ HADOOP_USER_NAME=hdfs hive
Logging initialized using configuration in file:/etc/hive/conf.dist/hive-log4j.properties
WARNING: Hive CLI is deprecated and migration to Beeline is recommended.
hive> CREATE TABLE sales_info (location string, month string,
> number_of_orders int, total_sales double)
> ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
> STORED AS textfile;
OK
hive> LOAD DATA LOCAL INPATH '/tmp/pxf_hive_datafile.txt'
> INTO TABLE sales_info;
Loading data to table default.sales_info
Table default.sales_info stats: [numFiles=1, totalSize=250]
OK
hive> SELECT * FROM sales_info;
OK
Prague Jan 101 4875.33
Rome Mar 87 1557.39
Bangalore May 317 8936.99
Beijing Jul 411 11600.67
San Francisco Sept 156 6846.34
Paris Nov 159 7134.56
San Francisco Jan 113 5397.89
Prague Dec 333 9894.77
Bangalore Jul 271 8320.55
Beijing Dec 100 4248.41
Time taken: 0.37 seconds, Fetched: 10 row(s)
hive>
找到Hive表的存储地址:
~~hive> DESCRIBE EXTENDED sales_info;
OK
location string
month string
number_of_orders int
total_sales double
Detailed Table Information Table(tableName:sales_info, dbName:default, owner:hdfs, createTime:1549949771, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:location, type:string, comment:null), FieldSchema(name:month, type:string, comment:null), FieldSchema(name:number_of_orders, type:int, comment:null), FieldSchema(name:total_sales, type:double, comment:null)], location:hdfs://quickstart.cloudera:8020/user/hive/warehouse/sales_info, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=,, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{numFiles=1, transient_lastDdlTime=1549949805, COLUMN_STATS_ACCURATE=true, totalSize=250}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
Time taken: 0.084 seconds, Fetched: 6 row(s)~~
hive> DESCRIBE EXTENDED sales_info;
OK
location string
month string
number_of_orders int
total_sales double
Detailed Table Information Table(tableName:sales_info, dbName:default, owner:hdfs, createTime:1576406293, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:location, type:string, comment:null), FieldSchema(name:month, type:string, comment:null), FieldSchema(name:number_of_orders, type:int, comment:null), FieldSchema(name:total_sales, type:double, comment:null)], location:hdfs://txcloudhdfs/apps/hive/warehouse/sales_info, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=,, field.delim=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{transient_lastDdlTime=1576406349, totalSize=250, numRows=0, rawDataSize=0, numFiles=1}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
Time taken: 0.067 seconds, Fetched: 6 row(s)
GP端
CREATE EXTERNAL TABLE salesinfo_hiveprofile(location text, month text, num_orders int, total_sales float8)
LOCATION ('pxf://default.sales_info?PROFILE=Hive')
FORMAT 'custom' (FORMATTER='pxfwritable_import');
test=> select * from salesinfo_hiveprofile;
ERROR: remote component error (500) from '127.0.0.1:5888': type Exception report message Failed connecting to Hive MetaStore service: Could not connect to meta store using any of the URIs provided. Most recent failure: org.apache.thrift.transport.TTransportException: java.net.UnknownHostException: bizcent-bigdata-app-07 description The server encountered an internal error that prevented it from fulfilling this request. exception java.lang.RuntimeException: Failed connecting to Hive MetaStore service: Could not connect to meta store using any of the URIs provided. Most recent failure: org.apache.thrift.transport.TTransportException: java.net.UnknownHostException: bizcent-bigdata-app-07 (libchurl.c:920) (seg0 slice1 10.253.14.136:7000 pid=31481) (libchurl.c:920)
CONTEXT: External table salesinfo_hiveprofile
test=>
host没配
test=# \dS+ salesinfo_hiveprofile;
External table "public.salesinfo_hiveprofile"
Column | Type | Modifiers | Storage | Stats target | Description
-------------+------------------+-----------+----------+--------------+-------------
location | text | | extended | |
month | text | | extended | |
num_orders | integer | | plain | |
total_sales | double precision | | plain | |
Type: readable
Encoding: UTF8
Format type: custom
Format options: formatter 'pxfwritable_import'
External options: {}
External location: pxf://default.sales_info?PROFILE=Hive
Execute on: all segments
test=# select * from salesinfo_hiveprofile;
ERROR: remote component error (500) from '127.0.0.1:5888': type Exception report message javax.servlet.ServletException: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.authorize.AuthorizationException): User: gpadmin is not allowed to impersonate gpadmin description The server encountered an internal error that prevented it from fulfilling this request. exception javax.servlet.ServletException: javax.servlet.ServletException: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.authorize.AuthorizationException): User: gpadmin is not allowed to impersonate gpadmin (libchurl.c:920) (seg0 slice1 10.253.14.136:7000 pid=11487) (libchurl.c:920)
CONTEXT: External table salesinfo_hiveprofile
test=#
改hdfs 的配置
test=> select * from salesinfo_hiveprofile;
location | month | num_orders | total_sales
---------------+-------+------------+-------------
Prague | Jan | 101 | 4875.33
Rome | Mar | 87 | 1557.39
Bangalore | May | 317 | 8936.99
Beijing | Jul | 411 | 11600.67
San Francisco | Sept | 156 | 6846.34
Paris | Nov | 159 | 7134.56
San Francisco | Jan | 113 | 5397.89
Prague | Dec | 333 | 9894.77
Bangalore | Jul | 271 | 8320.55
Beijing | Dec | 100 | 4248.41
(10 rows)
test=> \dS+ salesinfo_hiveprofile
External table "stest.salesinfo_hiveprofile"
Column | Type | Modifiers | Storage | Stats target | Description
-------------+------------------+-----------+----------+--------------+-------------
location | text | | extended | |
month | text | | extended | |
num_orders | integer | | plain | |
total_sales | double precision | | plain | |
Type: readable
Encoding: UTF8
Format type: custom
Format options: formatter 'pxfwritable_import'
External options: {}
External location: pxf://default.sales_info?PROFILE=Hive
Execute on: all segments
test=>