Hadoop、Hive、Hbase、
Flume
等QQ交流群:138615359(已满),请加入新群:149892483
尊重原创,转载请注明: 转载自过往记忆(http://www.iteblog.com/)
本文链接地址: 《Flume-ng与Mysql整合开发》(http://www.iteblog.com/archives/1109)
2014 Spark亚太峰会会议资料下载、
《Hadoop从入门到上手企业开发视频下载[70集]》、
《炼数成金-Spark大数据平台视频百度网盘免费下载》、
《Spark 1.X 大数据平台V2百度网盘下载[完整版]》、
《深入浅出Hive视频教程百度网盘免费下载》
IT英文电子书免费下载频道上线啦,共收录4300+本IT方面的电子书,欢迎访问
http://books.iteblog.com
我们知道,Flume可以和许多的系统进行整合,包括了Hadoop、Spark、Kafka、Hbase等等;当然,强悍的Flume也是可以和Mysql进行整合,将分析好的日志存储到Mysql(当然,你也可以存放到pg、oracle等等关系型数据库)。
不过我这里想多说一些:Flume是分布式收集日志的系统;既然都分布式了,数据量应该很大,为什么你要将Flume分析出来的数据用Mysql进行储存?能否在下面评论处留下你的使用场景呢?
其实,Flume和Mysql进行整合开发的过程也是相当的简单的。代码如下:
001 | package com.iteblog.flume; |
002 |
003 | /** |
004 | * User: 过往记忆 |
005 | * Date: 14-9-4 |
006 | * Time: 下午13:16 |
007 | * bolg: http://www.iteblog.com |
008 | * 本文地址:http://www.iteblog.com/archives/1109 |
009 | * 过往记忆博客,专注于hadoop、hive、spark、shark、flume的技术博客,大量的干货 |
010 | * 过往记忆博客微信公共帐号:iteblog_hadoop |
011 | */ |
012 |
013 | import com.google.common.base.Preconditions; |
014 | import com.google.common.base.Throwables; |
015 | import com.google.common.collect.Lists; |
016 | import org.apache.flume.*; |
017 | import org.apache.flume.conf.Configurable; |
018 | import org.apache.flume.sink.AbstractSink; |
019 | import org.slf4j.Logger; |
020 | import org.slf4j.LoggerFactory; |
021 |
022 | import java.sql.Connection; |
023 | import java.sql.DriverManager; |
024 | import java.sql.PreparedStatement; |
025 | import java.sql.SQLException; |
026 | import java.util.List; |
027 |
028 | public class MysqlSink extends AbstractSink implements Configurable { |
029 |
030 | private Logger LOG = LoggerFactory.getLogger(MysqlSink. class ); |
031 | private String hostname; |
032 | private String port; |
033 | private String databaseName; |
034 | private String tableName; |
035 | private String user; |
036 | private String password; |
037 | private PreparedStatement preparedStatement; |
038 | private Connection conn; |
039 | private int batchSize; |
040 |
041 | public MysqlSink() { |
042 | LOG.info( "MysqlSink start..." ); |
043 | } |
044 |
045 | @Override |
046 | public void configure(Context context) { |
047 | hostname = context.getString( "hostname" ); |
048 | Preconditions.checkNotNull(hostname, "hostname must be set!!" ); |
049 | port = context.getString( "port" ); |
050 | Preconditions.checkNotNull(port, "port must be set!!" ); |
051 | databaseName = context.getString( "databaseName" ); |
052 | Preconditions.checkNotNull(databaseName, "databaseName must be set!!" ); |
053 | tableName = context.getString( "tableName" ); |
054 | Preconditions.checkNotNull(tableName, "tableName must be set!!" ); |
055 | user = context.getString( "user" ); |
056 | Preconditions.checkNotNull(user, "user must be set!!" ); |
057 | password = context.getString( "password" ); |
058 | Preconditions.checkNotNull(password, "password must be set!!" ); |
059 | batchSize = context.getInteger( "batchSize" , 100 ); |
060 | Preconditions.checkNotNull(batchSize > 0 , "batchSize must be a positive number!!" ); |
061 | } |
062 |
063 | @Override |
064 | public void start() { |
065 | super .start(); |
066 | try { |
067 | //调用Class.forName()方法加载驱动程序 |
068 | Class.forName( "com.mysql.jdbc.Driver" ); |
069 | } catch (ClassNotFoundException e) { |
070 | e.printStackTrace(); |
071 | } |
072 |
073 | String url = "jdbc:mysql://" + hostname + ":" + port + "/" + databaseName; |
074 | //调用DriverManager对象的getConnection()方法,获得一个Connection对象 |
075 |
076 | try { |
077 | conn = DriverManager.getConnection(url, user, password); |
078 | conn.setAutoCommit( false ); |
079 | //创建一个Statement对象 |
080 | preparedStatement = conn.prepareStatement( "insert into " + tableName + |
081 | " (content) values (?)" ); |
082 |
083 | } catch (SQLException e) { |
084 | e.printStackTrace(); |
085 | System.exit( 1 ); |
086 | } |
087 |
088 | } |
089 |
090 | @Override |
091 | public void stop() { |
092 | super .stop(); |
093 | if (preparedStatement != null ) { |
094 | try { |
095 | preparedStatement.close(); |
096 | } catch (SQLException e) { |
097 | e.printStackTrace(); |
098 | } |
099 | } |
100 |
101 | if (conn != null ) { |
102 | try { |
103 | conn.close(); |
104 | } catch (SQLException e) { |
105 | e.printStackTrace(); |
106 | } |
107 | } |
108 | } |
109 |
110 | @Override |
111 | public Status process() throws EventDeliveryException { |
112 | Status result = Status.READY; |
113 | Channel channel = getChannel(); |
114 | Transaction transaction = channel.getTransaction(); |
115 | Event event; |
116 | String content; |
117 |
118 | List<String> actions = Lists.newArrayList(); |
119 | transaction.begin(); |
120 | try { |
121 | for ( int i = 0 ; i < batchSize; i++) { |
122 | event = channel.take(); |
123 | if (event != null ) { |
124 | content = new String(event.getBody()); |
125 | actions.add(content); |
126 | } else { |
127 | result = Status.BACKOFF; |
128 | break ; |
129 | } |
130 | } |
131 |
132 | if (actions.size() > 0 ) { |
133 | preparedStatement.clearBatch(); |
134 | for (String temp : actions) { |
135 | preparedStatement.setString( 1 , temp); |
136 | preparedStatement.addBatch(); |
137 | } |
138 | preparedStatement.executeBatch(); |
139 |
140 | conn.commit(); |
141 | } |
142 | transaction.commit(); |
143 | } catch (Throwable e) { |
144 | try { |
145 | transaction.rollback(); |
146 | } catch (Exception e2) { |
147 | LOG.error( "Exception in rollback. Rollback might not have been" + |
148 | "successful." , e2); |
149 | } |
150 | LOG.error( "Failed to commit transaction." + |
151 | "Transaction rolled back." , e); |
152 | Throwables.propagate(e); |
153 | } finally { |
154 | transaction.close(); |
155 | } |
156 |
157 | return result; |
158 | } |
159 | } |
pom文件中的依赖:
01 | <dependencies> |
02 | <dependency> |
03 | <groupId>org.apache.flume</groupId> |
04 | <artifactId>flume-ng-core</artifactId> |
05 | </dependency> |
06 |
07 | <dependency> |
08 | <groupId>org.apache.flume</groupId> |
09 | <artifactId>flume-ng-configuration</artifactId> |
10 | </dependency> |
11 |
12 | <dependency> |
13 | <groupId>mysql</groupId> |
14 | <artifactId>mysql-connector-java</artifactId> |
15 | <version> 5.1 . 25 </version> |
16 | </dependency> |
17 |
18 | <dependency> |
19 | <groupId>org.slf4j</groupId> |
20 | <artifactId>slf4j-api</artifactId> |
21 | </dependency> |
22 |
23 | <dependency> |
24 | <groupId>org.slf4j</groupId> |
25 | <artifactId>slf4j-log4j12</artifactId> |
26 | <scope>test</scope> |
27 | </dependency> |
28 | </dependencies> |
运行程序时,先在Mysql中创建一个表
1 | mysql> create table mysqltest( |
2 | -> id int ( 11 ) NOT NULL AUTO_INCREMENT, |
3 | -> content varchar( 50000 ) NOT NULL, |
4 | -> PRIMARY KEY (`id`) |
5 | -> ) ENGINE=InnoDB AUTO_INCREMENT= 4 DEFAULT CHARSET=utf8; |
6 | Query OK, 0 rows affected, 1 warning ( 0.05 sec) |
然后在flume中创建以下配置
01 | # User: 过往记忆 |
02 | # Date: 14 - 9 - 4 |
03 | # Time: 下午 13 : 16 |
04 | # bolg: http: //www.iteblog.com |
05 | # 本文地址:http: //www.iteblog.com/archives/1109 |
06 | # 过往记忆博客,专注于hadoop、hive、spark、shark、flume的技术博客,大量的干货 |
07 | # 过往记忆博客微信公共帐号:iteblog_hadoop |
08 |
09 | agent.sinks.mysqlSink.type = com.iteblog.flume.MysqlSink |
10 | agent.sinks.mysqlSink.hostname=localhost |
11 | agent.sinks.mysqlSink.port= 3306 |
12 | agent.sinks.mysqlSink.databaseName=ngmonitor |
13 | agent.sinks.mysqlSink.tableName=mysqltest |
14 | agent.sinks.mysqlSink.user=root |
15 | agent.sinks.mysqlSink.password= 123456 |
16 | agent.sinks.mysqlSink.channel = c1 |
用下面的命令就可以启动:
1 | bin/flume-ng agent -c conf/ -f conf/mysql_test.conf -n agent |
再看下Mysql中的情况:
1 | mysql> select count(*) from mysqltest; |
2 | +----------+ |
3 | | count(*) | |
4 | +----------+ |
5 | | 98300 | |
6 | +----------+ |
好了,开发完成!上面的程序还可以改进,可以用Mybatis进行编写,将Flume处理逻辑和业务的处理逻辑分离开,这样下次只需要处理业务,Flume那块都不需要我们去考虑了,大大降低了编程的难度。具体怎么开发我就不说了,有需要请线下讨论。
本博客文章除特别声明,全部都是原创!尊重原创,转载请注明: 转载自过往记忆(http://www.iteblog.com/)
本文链接地址: 《Flume-ng与Mysql整合开发》(http://www.iteblog.com/archives/1109)