Storm+Kafka实时计算框架搭建
标签(空格分隔): storm kafaka 大数据 云计算
本篇文章是本人根据安装、配置storm实时计算框架时的执行过的命令整理出来的,中间没有配插图,不太适合初学者参考配置环境,请勿吐槽。
虚拟机环境配置
1. 网路配置
1. 配置IP
vi /etc/sysconfig/network-scripts/ifcfg-eth0
DEVICE=eth0
HWADDR=00:0C:29:4A:1C:67
TYPE=Ethernet
UUID=7467a4d4-994c-4100-bee3-b2c27659c591
ONBOOT=yes
NM_CONTROLLED=yes
BOOTPROTO=static
IPADDR=192.168.137.100
NETMASK=255.255.255.0
GATEWAY=192.168.137.1
DNS1=192.168.137.1
2. 配置主机IP映射,可以通过主机名称访问主机
vi /etc/hosts
192.168.137.100 h0
192.168.137.101 h1
192.168.137.102 h2
192.168.137.103 h3
192.168.137.104 h4
192.168.137.105 h5
192.168.137.106 h6
192.168.137.107 h7
192.168.137.108 h8
192.168.137.109 h9
3. 防火墙配置
#临时关闭防火墙
service iptables stop
#永久关闭防火墙
chkconfig iptables off
#关闭Selinux
vi /etc/selinux/config
4. ssh互访
#创建ssh公钥,打通ssh互相访问
ssh-keygen -trsa -P ''
cd .ssh
cat id_rsa.pub >authorized_keys
chmod 600 authorized_keys
软件环境配置
1. 下载安装软件环境
#下载安装wget、gcc、gcc-c++、libtool等软件
yum install -y wget gcc gcc-c++ libtool
2. 下载、安装Python
wget https://www.python.org/ftp/python/3.0/Python-3.0.tar.bz2
tar -xvf Python-3.0.tar.bz2
cd Python-3.0
./configure
make && make install
#验证python是否安装成功,如果安装成功则没有任何输出
ldconfig
3. 安装JDK
tar -zvxf jdk-7u65-linux-x64.tar.gz
环境变量配置参考 .bashrc
vi ~/.bashrc
source ~/.bashrc
java -version
# .bashrc
# User specific aliases and functions
alias rm='rm -i'
alias cp='cp -i'
alias mv='mv -i'
# Source global definitions
if [ -f /etc/bashrc ]; then
. /etc/bashrc
fi
set -o vi
export JAVA_HOME=/usr/java/jdk1.7.0_75
export MAVEN_HOME=/usr/apache/apache-maven-3.2.1
export ANT_HOME=/usr/apache/apache-ant-1.9.4
export HADOOP_HOME=/usr/cdh/hadoop-2.5.0-cdh5.3.0
export HADOOP_MAPRED_HOME=/usr/cdh/hadoop-2.5.0-cdh5.3.0
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export YARN_HOME=${HADOOP_HOME}
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HDFS_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export YARN_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HIVE_HOME=/usr/cdh/hive-0.13.1-cdh5.3.0
export SQOOP_HOME=
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$JAVA_HOME/bin:$MAVEN_HOME/bin:$SQOOP_HOME/bin:$HIVE_HOME/bin:$ANT_HOME/bin
4. 下载、安装zookeeper
- 下载解压zookeeper
wget http://apache.dataguru.cn/zookeeper/stable/zookeeper-3.4.6.tar.gz
tar -zvxf zookeeper-3.4.6.tar.gz
- 配置zookeeper
#配置myid,很重要
mkdir /data/
chmod 777 /data
mkdir /data/zookeeper/
echo '1' > /data/zookeeper/myid
#配置zoo.cfg
cd zookeeper-3.4.6/conf
cp zoo_sample.cfg zoo.cfg
vi zoo.cfg
#zoo.cfg配置,重点加一下配置
dataDir=/data/zookeeper
server.0=h0:2888:38888
server.1=h1:2888:38888
server.2=h2:2888:38888
- 启动zookeeper
#启动
zkServer start
#查看zookeeper状态
zkServer status
5. 下载、安装storm
- 下载解压storm
wget http://apache.dataguru.cn/storm/apache-storm-0.9.5/apache-storm-0.9.5.tar.gz
tar -zxvf apache-storm-0.9.5.tar.gz
- 配置storm
cd apache-storm-0.9.5/conf
vi storm.yaml
#storm.yaml 中增加以下配置,注意空格
storm.local.dir: "/data/storm/data"
storm.zookeeper.servers:
- "h0"
- "h1"
- "h2"
nimbus.host: "h0"
- 启动storm
#启动nimbus,主节点(相当于hadoop中的jobtracker),只启动一个即可
nohup storm nimbus>nimbus.out&
#启动nimbus,主节点(相当于hadoop中的tasktracker),可以多启动几个
nohup storm supervisor>supervisor.out&
#启动web监控界面,默认访问端口是8080(如果在storm.yaml中未修改的话),访问http://h0:8080
nohup storm ui>nimbus.out&
6. 下载、安装Kfaka
- 下载解压kafka
wget http://apache.dataguru.cn/kafka/0.8.2.1/kafka_2.10-0.8.2.1.tgz
tar -zxvf kafka_2.10-0.8.2.1.tgz
- 配置kafka
cd kafka_3.10-0.8.2.1/config/
cp server.properties server1.properties
cp server.properties server2.properties
#分别修改server1.properties 和server2.properties 的配置
----------------------------------------------------
config/server-1.properties:
broker.id=1
port=9093
log.dir=/tmp/kafka-logs-1
config/server-2.properties:
broker.id=2
port=9094
log.dir=/tmp/kafka-logs-2
----------------------------------------------------
- kafka相关操作
#h0上启动kafka服务,启动两个broker
nohup kafka-server-start.sh $KAFKA_HOME/config/server1.properties &
nohup kafka-server-start.sh $KAFKA_HOME/config/server1.properties &
#查询所有topic
kafka-topics.sh --list --zookeeper h0:2181
#创建一个topic
kafka-topics.sh --create --zookeeper h0:2181 --replication-factor 1 --partitions 1 --topic test
#启动消息生产者
kafka-console-producer.sh --broker-list h0:9092 --topic test
#启动消息消费者
kafka-console-consumer.sh --zookeeper h0:2181 --topic test --from-beginning
7. 下载安装flume
wget http://mirrors.hust.edu.cn/apache/flume/1.6.0/apache-flume-1.6.0-bin.tar.gz
tar -zxvf apache-flume-1.6.0-bin.tar.gz
1. flume简单配置,其他配置可以到用户手册中查找
# example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
2. 启动flume命令
flume-ng agent -c $FLUME_CONF_DIR -f $FLUME_CONF_DIR/hdfs.conf --name a1 -Dflume.root.logger=INFO,console
hadoop安装配置
- 在core-site.xml目录下新增slaves文件,文件内容如下
h0
h1
h2
在core-site.xml目录下新增master文件,文件内容为
h0
core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://h0:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/hadoop/tmp</value>
</property>
</configuration>
- hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/hadoop/name</value>
<final>true</final>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data/hadoop/data</value>
<final>true</final>
</property>
</configuration>
- mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapred.system.dir</name>
<value>file:/data/hadoop/mapred/system</value>
<final>true</final>
</property>
<property>
<name>mapred.local.dir</name>
<value>file:/data/hadoop/mapred/local</value>
<final>true</final>
</property>
<property>
<name>mapred.job.tracker</name>
<value>http://h0:9001</value>
</property>
</configuration>
- yarn-site.xml
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>h0:18025</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>h0:18040</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>h0:18030</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>h0:18141</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>h0:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce.shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
格式化namenode
hadoop namenode -format
启动hadoop
#启动hadoop
start-dfs.sh
#启动yarn
start-yarn.sh
hadoop命令测试
hadoop fs -mkdir /test
hadoop fs -ls /