1、建表
CREATE TABLE tmp.regex_log (
log_date STRING,
log_time STRING,
log_type STRING,
log_info STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "(^[0-9|-]{0,}) ([^ ]*) (INFO|WARNING|ERROR) ([^*]*)$",
"output.format.string" = "%1$s %2$s %3$s %4$s"
)
STORED AS TEXTFILE;
2、准备数据
cat /opt/my.log
2022-07-28 16:13:03 INFO session.SessionState: Created HDFS directory: /tmp/hive/root/689ec2d7-b603-4ad1-863d-cb7d4b0bba04
2022-07-28 16:13:03 INFO session.SessionState: Created local directory: /tmp/root/689ec2d7-b603-4ad1-863d-cb7d4b0bba04
2022-07-28 16:13:03 INFO session.SessionState: Created HDFS directory: /tmp/hive/root/689ec2d7-b603-4ad1-863d-cb7d4b0bba04/_tmp_space.db
2022-07-28 16:13:03 INFO conf.HiveConf: Using the default value passed in for log id: 689ec2d7-b603-4ad1-863d-cb7d4b0bba04
2022-07-28 16:13:03 INFO session.SessionState: Updating thread name to 689ec2d7-b603-4ad1-863d-cb7d4b0bba04 main
2022-07-28 16:13:03 WARNING Hive CLI is deprecated and migration to Beeline is recommended.
2022-07-28 16:14:38 INFO conf.HiveConf: Using the default value passed in for log id: 689ec2d7-b603-4ad1-863d-cb7d4b0bba04
2022-07-28 16:14:39 INFO ql.Driver: Compiling command(queryId=root_20220728161438_9466e8ce-c2bc-4608-ade9-99915b4b1e4a): select '123' as id, explode(array(1,3,5))
2022-07-28 16:14:40 INFO hive.metastore: HMS client filtering is enabled.
2022-07-28 16:14:40 INFO hive.metastore: Trying to connect to metastore with URI thrift://KF-CFT-Hadoop03:9083
2022-07-28 16:14:40 INFO hive.metastore: Opened a connection to metastore, current connections: 1
2022-07-28 16:14:40 INFO hive.metastore: Connected to metastore.
2022-07-28 16:14:41 ERROR ql.Driver: FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested in expressions
3、导入
load data local inpath '/opt/my.log' overwrite into table tmp.regex_log;
4、查询
select * from tmp.regex_log;
+---------------------+---------------------+---------------------+----------------------------------------------------+
| regex_log.log_date | regex_log.log_time | regex_log.log_type | regex_log.log_info |
+---------------------+---------------------+---------------------+----------------------------------------------------+
| 2022-07-28 | 16:13:03 | INFO | session.SessionState: Created HDFS directory: /tmp/hive/root/689ec2d7-b603-4ad1-863d-cb7d4b0bba04 |
| 2022-07-28 | 16:13:03 | INFO | session.SessionState: Created local directory: /tmp/root/689ec2d7-b603-4ad1-863d-cb7d4b0bba04 |
| 2022-07-28 | 16:13:03 | INFO | session.SessionState: Created HDFS directory: /tmp/hive/root/689ec2d7-b603-4ad1-863d-cb7d4b0bba04/_tmp_space.db |
| 2022-07-28 | 16:13:03 | INFO | conf.HiveConf: Using the default value passed in for log id: 689ec2d7-b603-4ad1-863d-cb7d4b0bba04 |
| 2022-07-28 | 16:13:03 | INFO | session.SessionState: Updating thread name to 689ec2d7-b603-4ad1-863d-cb7d4b0bba04 main |
| 2022-07-28 | 16:13:03 | WARNING | Hive CLI is deprecated and migration to Beeline is recommended. |
| 2022-07-28 | 16:14:38 | INFO | conf.HiveConf: Using the default value passed in for log id: 689ec2d7-b603-4ad1-863d-cb7d4b0bba04 |
| 2022-07-28 | 16:14:39 | INFO | ql.Driver: Compiling command(queryId=root_20220728161438_9466e8ce-c2bc-4608-ade9-99915b4b1e4a): select '123' as id, explode(array(1,3,5)) |
| 2022-07-28 | 16:14:40 | INFO | hive.metastore: HMS client filtering is enabled. |
| 2022-07-28 | 16:14:40 | INFO | hive.metastore: Trying to connect to metastore with URI thrift://KF-CFT-Hadoop03:9083 |
| 2022-07-28 | 16:14:40 | INFO | hive.metastore: Opened a connection to metastore, current connections: 1 |
| 2022-07-28 | 16:14:40 | INFO | hive.metastore: Connected to metastore. |
| 2022-07-28 | 16:14:41 | ERROR | ql.Driver: FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested in expressions |
+---------------------+---------------------+---------------------+----------------------------------------------------+
5、修改
alter table tmp.regex_log set SERDEPROPERTIES ('input.regex'='(^[0-9|-]{0,}) ([^ ]*) (INFO|WARNING|ERROR) ([^*]*)$');