hadoop hdfs整合 flume分析nginx日志

安装ssh拷贝

yum install ssh

yum install rsync

 

创建hadoop 用户

groupadd -g 102 hadoopgroup  # 创建用户组
useradd -d /mnt/hadoop -u 10201  -g 102  hadoop  #创建用户

passwd hadoop

生产密钥

  $ ssh-keygen -t rsa
  $ cd .ssh
  $ cp id_rsa.pub authorized_keys
  $ cat authorized_keys

slave拷贝公钥

  1. scp ~/.ssh/id_dsa.pub hadoop@master:/home/hadoop/.ssh/id_dsa.pub.slave0

拷贝flume

scp /mnt/hadoop3/apache-flume-1.9.0-bin/ hadoop@app2:/mnt/hadoop3

tar -zxvf

解压,在/etc/profile文件中增加设置:

export FLUME_HOME=/opt/apache-flume-1.7.0-bin
export PATH=$PATH:$FLUME_HOME/bin
 

最重要的,修改flume-conf.properties文件:

# The configuration file needs to define the sources, 
# the channels and the sinks.
# Sources, channels and sinks are defined per agent, 
# in this case called 'a1'

a1.sources = s1
a1.channels = c1
a1.sinks = k1

# For each one of the sources, the type is defined

a1.sources.s1.type = exec

a1.sources.s1.command = tail -F  /logs/gdnyjy.org/access.log

# The channel can be defined as follows.
a1.sources.s1.channels = c1

# Each sink's type must be defined

a1.sinks.k1.type = hdfs

a1.sinks.k1.hdfs.path =  hdfs://app1:9000/flume/log/nginx=%Y%m%d/hour=%H

a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 0

a1.sinks.k1.hdfs.rollSize = 131072000
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.useLocalTimeStamp = true

#Specify the channel the sink should use
a1.sinks.k1.channel = c1

# Each channel's type is defined.

a1.channels.c1.type = memory

# Other config values specific to each type of channel(sink or source)
# can be defined as well
# In this case, it specifies the capacity of the memory channel

a1.channels.c1.capacity = 1000

a1.channels.c1.transactionCapacity = 100 


 

 

flume-ng agent -n a1 -c conf -f $FLUME_HOME/conf/flume-conf.properties

 

 

# 解决 filesize太小问题

a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.minBlockReplicas=1
#a1.sinks.k1.hdfs.fileType=DataStream
#a1.sinks.k1.hdfs.writeFormat=Text
a1.sinks.k1.hdfs.rollInterval=180
a1.sinks.k1.hdfs.rollSize=0
a1.sinks.k1.hdfs.rollCount=0
a1.sinks.k1.hdfs.idleTimeout=0

 

启动kafka

./bin/kafka-server-start.sh  -daemon  config/server.properties  &
 

 

 

 

 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值