#########################################################
##主要作用是监听目录中的新增文件,采集到数据之后,输出到hdfs
#########################################################
a1.sources = r1
a1.sinks = k1
a1.channels = c1
#对于source的配置描述 监听目录中的新增文件
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F /home/centos/logs/nginx/access.log
#对于sink的配置描述 使用log日志做数据的消费
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = hdfs://bdha/input/data-clean/nginx/%Y/%m/%d
a1.sinks.k1.hdfs.filePrefix = nginx
a1.sinks.k1.hdfs.fileSuffix = .log
a1.sinks.k1.hdfs.inUseSuffix = .tmp
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.rollSize = 0
a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.rollCount = 100
a1.sinks.k1.hdfs.serializer = TEXT
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.minBlockReplicas = 1
a1.sinks.k1.hdfs.useLocalTimeStamp = true
#对于channel的配置描述 使用内存缓冲区域做数据的临时缓存
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 1000
#通过channel c1将source r1和sink k1关联起来
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
清洗完的脚本定时追加到MySQL
#!/bin/sh
###############
###############
SQOOP_BIN=/home/centos/sqoop/bin/sqoop
START_DATE=date -d "1 day ago" +%Y-%m-%d
echo “START_DATE=” S T A R T D A T E E N D D A T E = ‘ d a t e + e c h o