采集OIDD数据

[ods@master ~]$ mkdir ctyun
[ods@master ~]$ ls
ctyun  students.txt
[ods@master ~]$ cd ctyun/
[ods@master ctyun]$ ls
[ods@master ctyun]$ pwd
/home/ods/ctyun
[ods@master ctyun]$ mkdir oidd
[ods@master ctyun]$ cd oidd/
[ods@master oidd]$ ls
[ods@master oidd]$ pwd
/home/ods/ctyun/oidd
[ods@master oidd]$ ls
part-00000  part-00002  part-00004  part-00006  part-00008
part-00001  part-00003  part-00005  part-00007  part-00009
[ods@master oidd]$ cd ..
[ods@master ctyun]$ ls
oidd
[ods@master ctyun]$ cd ~
[ods@master ~]$ ls
ctyun  students.txt
[ods@master ~]$ mkdir flume
[ods@master ~]$ ls
ctyun  flume  students.txt
[ods@master ~]$ cd flume/
[ods@master flume]$ ls
[ods@master flume]$ vim flume-oss-oidd-to-hdfs.properties

# a表示给agent命名为a
# 给source组件命名为r1
a.sources = r1
# 给channel组件命名为c1
a.channels = c1
# 给sink组件命名为k1
a.sinks = k1 

#指定spooldir的属性
a.sources.r1.type = spooldir 
a.sources.r1.spoolDir = /home/ods/ctyun/oidd
a.sources.r1.fileHeader = true 
a.sources.r1.interceptors = i1 
a.sources.r1.interceptors.i1.type = timestamp

#指定sink的类型
a.sinks.k1.type = hdfs
a.sinks.k1.hdfs.path = /daas/motl/ods/ods_oidd/day_id=%Y%m%d
# 指定文件名前缀
a.sinks.k1.hdfs.filePrefix = oidd
# 指定达到多少数据量写一次文件 单位:bytes
a.sinks.k1.hdfs.rollSize = 70240000
# 指定多少条写一次文件
a.sinks.k1.hdfs.rollCount = 600000
# 指定多久写一次文件
a.sinks.k1.hdfs.rollInterval = 60
# 指定文件类型为 流 来什么输出什么
a.sinks.k1.hdfs.fileType = DataStream
# 指定文件输出格式 为text
a.sinks.k1.hdfs.writeFormat = text
# 指定文件名后缀
a.sinks.k1.hdfs.fileSuffix = .txt

#指定channel
a.channels.c1.type = memory 
a.channels.c1.capacity = 10000
# 表示sink每次会从channel里取多少数据
a.channels.c1.transactionCapacity = 1000

# 组装
a.sources.r1.channels = c1 
a.sinks.k1.channel = c1

[ods@master flume]$ nohup flume-ng agent -n a -f ~/flume/flume-oss-oidd-to-hdfs.properties -Dflume.root.logger=DEBUG,console &
[1] 93842
[ods@master flume]$ nohup: 忽略输入并把输出追加到"nohup.out"

[ods@master flume]$ ls
flume-oss-oidd-to-hdfs.properties  nohup.out
[ods@master flume]$ tail -f nohup.out 
[ods@master ~]$ cd "flume"
[ods@master flume]$ hdfs dfs -ls /daas/motl/ods/
Found 2 items
drwxr-x---   - ods ods          0 2022-05-31 20:05 /daas/motl/ods/ods_oidd
drwxr-x---+  - ods ods          0 2022-05-31 15:26 /daas/motl/ods/student
[ods@master flume]$ hdfs dfs -ls /daas/motl/ods/ods_oidd
Found 1 items
drwxr-x---   - ods ods          0 2022-05-31 20:10 /daas/motl/ods/ods_oidd/day_id=20220531
[ods@master flume]$ hdfs dfs -ls /daas/motl/ods/ods_oidd/day_id=20220531
Found 12 items
-rw-r--r--   1 ods ods   70705242 2022-05-31 20:06 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730040.txt
-rw-r--r--   1 ods ods   70705200 2022-05-31 20:06 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730041.txt
-rw-r--r--   1 ods ods   70705318 2022-05-31 20:06 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730042.txt
-rw-r--r--   1 ods ods   70705238 2022-05-31 20:07 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730043.txt
-rw-r--r--   1 ods ods   70705178 2022-05-31 20:07 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730044.txt
-rw-r--r--   1 ods ods   70705306 2022-05-31 20:08 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730045.txt
-rw-r--r--   1 ods ods   70705226 2022-05-31 20:08 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730046.txt
-rw-r--r--   1 ods ods   70705190 2022-05-31 20:09 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730047.txt
-rw-r--r--   1 ods ods   70705222 2022-05-31 20:09 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730048.txt
-rw-r--r--   1 ods ods   70705198 2022-05-31 20:10 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730049.txt
-rw-r--r--   1 ods ods   70705306 2022-05-31 20:10 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730050.txt
-rw-r--r--   1 ods ods       8512 2022-05-31 20:10 /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730051.txt.tmp
[ods@master flume]$ hdfs dfs -du -h /daas/motl/ods/ods_oidd/day_id=20220531
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730040.txt
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730041.txt
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730042.txt
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730043.txt
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730044.txt
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730045.txt
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730046.txt
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730047.txt
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730048.txt
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730049.txt
67.4 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730050.txt
11.5 M  /daas/motl/ods/ods_oidd/day_id=20220531/oidd.1653998730051.txt
[ods@master flume]$ cd ..
[ods@master ~]$ ls
ctyun  flume  students.txt
[ods@master ~]$ cd ctyun/oidd/
[ods@master oidd]$ ls
part-00000.COMPLETED  part-00005.COMPLETED
part-00001.COMPLETED  part-00006.COMPLETED
part-00002.COMPLETED  part-00007.COMPLETED
part-00003.COMPLETED  part-00008.COMPLETED
part-00004.COMPLETED  part-00009.COMPLETED
[root@master ~]# chmod 777 -R /usr/local/soft/hive-1.2.1
[root@master ~]# hive --service metastore

Starting Hive Metastore Server
[ods@master oidd]$ hive

Logging initialized using configuration in jar:file:/usr/local/soft/hive-1.2.1/lib/hive-common-1.2.1.jar!/hive-log4j.properties
hive> show databases;
OK
dal
default
dim
dwi
dws
ods
Time taken: 0.593 seconds, Fetched: 6 row(s)
hive> use ods;
OK
Time taken: 0.03 seconds
hive> show tables;
OK
student
Time taken: 0.024 seconds, Fetched: 1 row(s)
hive> drop table student;
OK
Time taken: 0.897 seconds
hive> CREATE EXTERNAL TABLE IF NOT EXISTS ods.ods_oidd(
    >     mdn string comment '手机号码'  
    >     ,start_time string comment '业务时间'  
    >     ,county_id string comment '区县编码'  
    >     ,longi string comment '经度'  
    >     ,lati string comment '纬度'  
    >     ,bsid string comment '基站标识'  
    >     ,grid_id string comment '网格号'  
    >     ,biz_type string comment '业务类型'  
    >     ,event_type string comment '事件类型'  
    >     ,data_source string comment '数据源'  
    > ) 
    > comment  'oidd'
    > PARTITIONED BY (
    >     day_id string comment '天分区'  
    > ) 
    > ROW FORMAT DELIMITED 
    >     FIELDS TERMINATED BY '\t' 
    > STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' 
    >     OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'  
    > location '/daas/motl/ods/ods_oidd'; 
OK
Time taken: 0.159 seconds
hive> select *from ods_oidd limit 10;
OK
Time taken: 0.238 seconds
hive> alter table ods.ods_oidd add if not exists partition(day_id='20220531');
OK
Time taken: 0.125 seconds
hive> select *from ods_oidd limit 10;
OK
F333EF56BDAB6AFF4445A6148C04EF24A3C531E0	20180503112611,20180503111824	8340103	117.288	31.868	FA938536F29F85238F68611CF3D59B57	117285031865040	3	2	oidd	20220531
F333EF56BDAB6AFF4445A6148C04EF24A3C531E0	20180503192731,20180503191841	8340111	117.283	31.723	FBEA793A755CAF6570D93E085D89ECEC	117280031720040	2	1	oidd	20220531
F333EF56BDAB6AFF4445A6148C04EF24A3C531E0	20180503145344,20180503144344	8340111	117.298	31.843	8AFD901C281AB4DFCE6D4EA13D4547E6	117295031840040	3	3	oidd	20220531
F333EF56BDAB6AFF4445A6148C04EF24A3C531E0	20180503180245,20180503175145	8340103	117.283	31.873	E6531E4A97F81E6921B9BB98DB98A12B	117280031870040	2	3	oidd	20220531
314A7A2616D6AA63D9F00C42668C18C9E14ABBDF	20180503174532,20180503173932	8340121	117.253	31.998	EE238C703C4F16504BE19C3AE6C509E0	117250031995040	3	3	oidd	20220531
314A7A2616D6AA63D9F00C42668C18C9E14ABBDF	20180503103626,20180503102726	8340121	117.233	31.958	A08CD92D9B456E5817F552F32558D320	117230031955040	3	2	oidd	20220531
314A7A2616D6AA63D9F00C42668C18C9E14ABBDF	20180503101726,20180503101226	8340121	117.248	31.998	8FB0056D782EBA15DC1833758128855D	117245031995040	1	2	oidd	20220531
314A7A2616D6AA63D9F00C42668C18C9E14ABBDF	20180503110550,20180503105650	8340103	117.283	31.893	8D14BC6E8FFF02384B09733C12087ED2	117280031890040	2	1	oidd	20220531
314A7A2616D6AA63D9F00C42668C18C9E14ABBDF	20180503104141,20180503103841	8340103	117.268	31.958	30D5197264FD4B5D4D116AA0455C76BB	117265031955040	3	3	oidd	20220531
314A7A2616D6AA63D9F00C42668C18C9E14ABBDF	20180503155136,20180503154236	8340103	117.288	31.893	45207462663B005B5F52AEBB050C26BE	117285031890040	3	2	oidd	20220531
Time taken: 0.056 seconds, Fetched: 10 row(s)
hive> MSCK REPAIR TABLE ods.ods_oidd;
OK
Time taken: 0.068 seconds

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值