1.拷贝依赖jar包到flume的lib下面
(1)将hive的lib下的所有依赖包都拷贝到flume/lib下面,最省事!
cp /opt/modules/hive-1.2.1/lib/* /opt/modules/flume-1.7.0-bin/lib
(2)将hive的hcatalog/share/hcatalog下面的jar拷贝过来
cp /opt/modules/hive-1.2.1/hcatalog/share/hcatalog/* /opt/modules/flume-1.7.0-bin/lib
(3)将mysql的驱动jar拷贝过来
cp /opt/modules/hive-1.2.1/lib/mysql-connector-java-5.1.27-bin.jar /opt/modules/flume-1.7.0-bin/lib
(4)将hadoop的相关jar包拷贝过来
-》hadoop-core-2.6.0-mr1-cdh5.10.2.jar【没找到】
-》hadoop-mapreduce-client-core-2.6.0-cdh5.10.2.jar
cp /opt/modules/apache/hadoop-2.7.3/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.7.3.jar /opt/modules/flume-1.7.0-bin/lib
2.flume的agent配置flume-hive.properties
#name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = bigdata.ibeifeng.com
a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = hive
a1.sinks.k1.channel = c1
a1.sinks.k1.hive.metastore = thrift://bigdata.ibeifeng.com:9083
a1.sinks.k1.hive.database = flume_test
a1.sinks.k1.hive.table = flume_user
a1.sinks.k1.serializer = DELIMITED
a1.sinks.k1.serializer.delimiter = "\t"
a1.sinks.k1.serializer.serdeSeparator = '\t'
a1.sinks.k1.serializer.fieldnames =user_id,user_name,user_age
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
3.测试
(1)启动hdfs
(2)启动hive元数据
bin/hive --service metastore &
(3)进入hive,并且创建表
bin/hive
创建表
create database flume_test;
create table flume_user(
user_id int,
user_name string,
user_age int
)CLUSTERED BY (user_id) INTO 2 BUCKETS
stored as orc;
(4)启动flume
bin/flume-ng agent --name a1 --conf conf --conf-file conf/flume-hive.properties
(5)启动telnet
telnet bigdata.ibeifeng.com 44444
输入
22 ss 23
OK
45 ggg 34
OK
(hive中出现数据,成功~)