1.首先建立一个文本表(text表),默认create table xx as select * from xxx ; 就生成了xx文本表。
这里s1_date 是已经建好的文本表,不再累述。
2. 查看文本表s1_date在hdfs存储的位置。
[slave20:21000] > describe formatted s1_date;
Query: describe formatted s1_date
+---------------------------------+------------------------------------------------------------+----------------------+
| name | type | comment |
+---------------------------------+------------------------------------------------------------+----------------------+
| # col_name | data_type | comment |
| | NULL | NULL |
| eci | int | NULL |
| s1_cnt_erabcreate_req | bigint | NULL |
| s1_cnt_erabcreate_succ | bigint | NULL |
| s1_cnt_hoout_req | bigint | NULL |
| s1_cnt_hoout_succ | bigint | NULL |
| s1_cnt_erabrelease_req | bigint | NULL |
| s1_cnt_uerelease | bigint | NULL |
| s1_cnt_uerelease_abnormal | bigint | NULL |
| user_cnt_tcp_downretrans | bigint | NULL |
| user_octet_up | bigint | NULL |
| user_octet_down | bigint | NULL |
| user_totaldelay_http_page30 | bigint | NULL |
| user_totaloctet_http_page30 | bigint | NULL |
| user_totaldelay_http_pageless30 | bigint | NULL |
| user_cnt_http_pageless30 | bigint | NULL |
| user_totaloctet_http_page500 | bigint | NULL |
| user_totaldelay_http_page500 | bigint | NULL |
| user_totaloctet_http_page2 | bigint | NULL |
| user_totaldelay_http_page2 | bigint | NULL |
| user_cnt_tcp_upretrans | bigint | NULL |
| | NULL | NULL |
| # Detailed Table Information | NULL | NULL |
| Database: | mobile | NULL |
| Owner: | impala | NULL |
| CreateTime: | Tue Dec 29 21:30:40 CST 2015 | NULL |
| LastAccessTime: | UNKNOWN | NULL |
| Protect Mode: | None | NULL |
| Retention: | 0 | NULL |
| Location: | hdfs://myha/user/hive/warehouse/mobile.db/s1_date | NULL |
| Table Type: | MANAGED_TABLE | NULL |
| Table Parameters: | NULL | NULL |
| | transient_lastDdlTime | 1451395840 |
| | NULL | NULL |
| # Storage Information | NULL | NULL |
| SerDe Library: | org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe | NULL |
| InputFormat: | org.apache.hadoop.mapred.TextInputFormat | NULL |
| OutputFormat: | org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | NULL |
| Compressed: | No | NULL |
| Num Buckets: | 0 | NULL |
| Bucket Columns: | [] | NULL |
| Sort Columns: | [] | NULL |
+---------------------------------+------------------------------------------------------------+----------------------+
Fetched 43 row(s) in 0.15s
3. 回到master节点,查看该路径下有多少个文件,是否正确。
[slave20:21000] > exit;
[root@MASTER01~]# hadoop fs -ls /user/hive/warehouse/mobile.db/s1_date/
Found 48 items
drwxrwxrwx - impala supergroup 0 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/.impala_insert_staging
-rw-r--r-- 2 impala supergroup 28040 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/cc4fd90699199b86-7baa2858d61b53a3_1538064594_data.0.
-rw-r--r-- 2 impala supergroup 27948 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/cc4fd90699199b86-7baa2858d61b53a4_1769399956_data.0.
-rw-r--r-- 2 impala supergroup 26547 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/cc4fd90699199b86-7baa2858d61b53a5_376761693_data.0.
-rw-r--r-- 2 impala supergroup 27712 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/cc4fd90699199b86-7baa2858d61b53a6_748289471_data.0.
-rw-r--r-- 2 impala supergroup 26024 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/cc4fd90699199b86-7baa2858d61b53a7_376761693_data.0.
-rw-r--r-- 2 impala supergroup 25695 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/cc4fd90699199b86-7baa2858d61b53a8_2090998726_data.0.
-rw-r--r-- 2 impala supergroup 27486 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/cc4fd90699199b86-7baa2858d61b53a9_1028851036_data.0.
-rw-r--r-- 2 impala supergroup 26543 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/cc4fd90699199b86-7baa2858d61b53aa_1410957694_data.0.
-rw-r--r-- 2 impala supergroup 29005 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/cc4fd90699199b86-7baa2858d61b53ab_1984115561_data.0.
-rw-r--r-- 2 impala supergroup 23728 2015-12-29 21:31 /user/hive/warehouse/mobile.db/s1_date/cc4fd90699199b86-7baa2858d61b53ac_1346909925_data.0.
4.查看该表的记录条数
[root@MASTER01 ~]# impala-shell -i slave30
Starting Impala Shell without Kerberos authentication
Connected to slave30:21000
Server version: impalad version 2.0.1-cdh5 RELEASE (build cc09df07d93c79351052eef2f4e5baf52373dc33)
Welcome to the Impala shell. Press TAB twice to see a list of available commands.
Copyright (c) 2012 Cloudera, Inc. All rights reserved.
(Shell build version: Impala Shell v2.0.1-cdh5 (cc09df0) built on Wed Nov 19 10:57:34 PST 2014)
[slave30:21000] > use mobile;
Query: use mobile
[slave30:21000] > select count(*) from s1_date;
Query: select count(*) from s1_date
+----------+
| count(*) |
+----------+
| 9226 |
5.在本地建个s1_date 目录,并进入目录
[root@MASTER01 ~]# mkdir s1_date;
[root@MASTER01 ~]# cd s1_date/
[root@MASTER01 s1_date]# ls
6.从HDFS拷贝到本地目录 s1_date
[root@MASTER01 s1_date]# hadoop fs -copyToLocal /user/hive/warehouse/mobile.db/s1_date/* .
[root@MASTER01 s1_date]# pwd
/root/s1_date
[root@MASTER01 s1_date]# ls
cc4fd90699199b86-7baa2858d61b53a3_1538064594_data.0. cc4fd90699199b86-7baa2858d61b53b3_533285813_data.0. cc4fd90699199b86-7baa2858d61b53c3_1880186907_data.0.
cc4fd90699199b86-7baa2858d61b53a4_1769399956_data.0. cc4fd90699199b86-7baa2858d61b53b4_781871636_data.0. cc4fd90699199b86-7baa2858d61b53c4_1108168201_data.0.
cc4fd90699199b86-7baa2858d61b53a5_376761693_data.0. cc4fd90699199b86-7baa2858d61b53b5_1226722329_data.0. cc4fd90699199b86-7baa2858d61b53c5_1865579449_data.0.
cc4fd90699199b86-7baa2858d61b53a6_748289471_data.0. cc4fd90699199b86-7baa2858d61b53b6_1892136031_data.0. cc4fd90699199b86-7baa2858d61b53c6_568737959_data.0.
cc4fd90699199b86-7baa2858d61b53a7_376761693_data.0. cc4fd90699199b86-7baa2858d61b53b7_1139362409_data.0. cc4fd90699199b86-7baa2858d61b53c7_491335232_data.0.
cc4fd90699199b86-7baa2858d61b53a8_2090998726_data.0. cc4fd90699199b86-7baa2858d61b53b8_969914465_data.0. cc4fd90699199b86-7baa2858d61b53c8_1413328270_data.0.
cc4fd90699199b86-7baa2858d61b53a9_1028851036_data.0. cc4fd90699199b86-7baa2858d61b53b9_419715216_data.0. cc4fd90699199b86-7baa2858d61b53c9_855410744_data.0.
7.查看拷贝出来的文件个数 ,是否与表条数相同
[root@MASTER01 s1_date]# cat * |wc -l
9226
8.把所有数据放入csv中,下载即可(如果分隔符不对,需要重新替换分隔符,才能正常打开)
[root@MASTER01 s1_date]# cat * > s1_date.csv
[root@MASTER01 s1_date]# cat s1_date.csv |wc -l
9226
[root@MASTER01 s1_date]# sz s1_date.csv
*B00000000000000
重新保存,用csv打开