直接上代码:
pom.xml文件:
<!--storm整合jdbc--> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-jdbc</artifactId> <version>1.0.1</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.31</version> </dependency> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>1.1.1</version> <exclusions> <exclusion> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-slf4j-impl</artifactId> </exclusion> </exclusions> </dependency>
spout:
package com.yjp.stormjdbc.demo1.spout; import com.google.common.collect.Lists; import org.apache.storm.spout.SpoutOutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.IRichSpout; import org.apache.storm.topology.OutputFieldsDeclarer; import org.apache.storm.tuple.Fields; import org.apache.storm.tuple.Values; import java.util.List; import java.util.Map; import java.util.Random; public class UserSpout implements IRichSpout { boolean isDistributed; SpoutOutputCollector collector; public static final List<Values> rows = Lists.newArrayList( new Values(1, "peter", System.currentTimeMillis()), new Values(2, "bob", System.currentTimeMillis()), new Values(3, "alice", System.currentTimeMillis())); public UserSpout() { } public UserSpout(boolean isDistributed) { this.isDistributed = isDistributed; } public boolean isDistributed() { return this.isDistributed; } @SuppressWarnings("rawtypes") @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.collector = collector; } @Override public void close() { } @Override public void activate() { } @Override public void deactivate() { } @Override public void nextTuple() { final Random rand = new Random(); final Values row = rows.get(rand.nextInt(rows.size() - 1)); System.out.println("row:" + row); this.collector.emit(row); // Thread.yield(); } @Override public void ack(Object msgId) { } @Override public void fail(Object msgId) { } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("user_id", "user_name", "create_date")); } @Override public Map<String, Object> getComponentConfiguration() { return null; } }
抽象类:将一些公用的方法封装
package com.yjp.stormjdbc.demo1.topology; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.yjp.stormjdbc.demo1.spout.UserSpout; import org.apache.storm.Config; import org.apache.storm.LocalCluster; import org.apache.storm.StormSubmitter; import org.apache.storm.generated.StormTopology; import org.apache.storm.jdbc.common.Column; import org.apache.storm.jdbc.common.ConnectionProvider; import org.apache.storm.jdbc.common.HikariCPConnectionProvider; import org.apache.storm.jdbc.common.JdbcClient; import org.apache.storm.jdbc.mapper.JdbcLookupMapper; import org.apache.storm.jdbc.mapper.JdbcMapper; import org.apache.storm.jdbc.mapper.SimpleJdbcLookupMapper; import org.apache.storm.jdbc.mapper.SimpleJdbcMapper; import org.apache.storm.tuple.Fields; import java.sql.Types; import java.util.ArrayList; import java.util.List; import java.util.Map; public abstract class AbstractUserTopology { //sql语句 可以自己现在数据库中创建好 private static final List<String> setupSqls = Lists.newArrayList( "drop table if exists user", "drop table if exists department", "drop table if exists user_department", "create table if not exists user (user_id integer, user_name varchar(100), dept_name varchar(100), create_date date)", "create table if not exists department (dept_id integer, dept_name varchar(100))", "create table if not exists user_department (user_id integer, dept_id integer)", "insert into department values (1, 'R&D')", "insert into department values (2, 'Finance')", "insert into department values (3, 'HR')", "insert into department values (4, 'Sales')", "insert into user_department values (1, 1)", "insert into user_department values (2, 2)", "insert into user_department values (3, 3)", "insert into user_department values (4, 4)" ); protected UserSpout userSpout; protected JdbcMapper jdbcMapper; protected JdbcLookupMapper jdbcLookupMapper; //线程安全的 实现了ConnectionProvider接口 有三个方法 prepare(),getConnection() 获取连接,cleanUp() protected ConnectionProvider connectionProvider; protected static final String TABLE_NAME = "user"; protected static final String JDBC_CONF = "jdbc.conf"; protected static final String SELECT_QUERY = "select dept_name from department, user_department where department.dept_id = user_department.dept_id" + " and user_department.user_id = ?"; public void execute(String[] args) throws Exception { // if (args.length != 4 && args.length != 5) { // System.out.println("Usage: " + this.getClass().getSimpleName() + " <dataSourceClassName> <dataSource.url> " // + "<user> <password> [topology name]"); // System.exit(-1); // } Map map = Maps.newHashMap(); // map.put("com.mysql.jdbc.jdbc2.optional.MysqlDataSource", args[0]);//com.mysql.jdbc.jdbc2.optional.MysqlDataSource // map.put("jdbc:mysql://localhost:3306/login", args[1]);//jdbc:mysql://localhost/test // map.put("root", args[2]);//root // // if (args.length == 4) { // map.put("root", args[3]);//password // } map.put("dataSourceClassName", "com.mysql.jdbc.jdbc2.optional.MysqlDataSource");// map.put("dataSource.url", "jdbc:mysql://localhost:3306/login");//jdbc:mysql://localhost/test map.put("dataSource.user", "root");//root map.put("dataSource.password", "root"); Config config = new Config(); config.put(JDBC_CONF, map); ConnectionProvider connectionProvider = new HikariCPConnectionProvider(map); //对数据库连接池进行初始化 connectionProvider.prepare(); //数据查找超时时间 int queryTimeoutSecs = 60; //获得数据库连接 JdbcClient jdbcClient = new JdbcClient(connectionProvider, queryTimeoutSecs); for (String sql : setupSqls) { System.err.println("sql:" + sql); //执行sql语句 jdbcClient.executeSql(sql); } List<Column> list = new ArrayList<>(); //创建一列将值传入 列名 值 值的类型 list.add(new Column("dept_id", 1, Types.INTEGER)); //查询 List<List<Column>> lists = jdbcClient.select("select dept_id,dept_name from department where dept_id=?", list); //计算出查询的条数 Long count = lists.stream().count(); System.err.println(count); this.userSpout = new UserSpout(); //通过connectionProvider和table自己去获取数据表的metadata(元数据)表字段的类型,名称,初始化schemaColumns // 使用tableName进行插入数据,需要指定表中的所有字段 this.jdbcMapper = new SimpleJdbcMapper(TABLE_NAME, connectionProvider); //关闭数据库连接池 connectionProvider.cleanup(); //上面的代码可以独立运行 //指定bolt的输出字段 // declarer.declare(new Fields("user_id", "user_name", "create_date")); spout中发射的字段 //dept_name 通过查询出的字段 Fields outputFields = new Fields("user_id", "user_name", "dept_name", "create_date"); //指定查询条件字段 user_id的值是spout中发射出user_id的值 List<Column> queryParamColumns = Lists.newArrayList(new Column("user_id", Types.INTEGER)); //通过查询为outputFields中的 dept_name赋值 其他三个字段是原始spout中的 this.jdbcLookupMapper = new SimpleJdbcLookupMapper(outputFields, queryParamColumns); //拿到还未初始化的连接 this.connectionProvider = new HikariCPConnectionProvider(map); String topoName = "test"; // if (args.length > 4) { // topoName = args[4]; // } if (args.length != 4) { LocalCluster cluster = new LocalCluster(); cluster.submitTopology("JDBCTopo", config, getTopology()); Thread.sleep(10000); cluster.shutdown(); } else { StormSubmitter.submitTopology(args[4], config, getTopology()); } StormSubmitter.submitTopology(topoName, config, getTopology()); } public abstract StormTopology getTopology(); }普通topology的插入:
package com.yjp.stormjdbc.demo1.topology; import com.google.common.collect.Lists; import org.apache.storm.generated.StormTopology; import org.apache.storm.jdbc.bolt.JdbcInsertBolt; import org.apache.storm.jdbc.bolt.JdbcLookupBolt; import org.apache.storm.jdbc.common.Column; import org.apache.storm.jdbc.mapper.JdbcMapper; import org.apache.storm.jdbc.mapper.SimpleJdbcMapper; import org.apache.storm.topology.TopologyBuilder; import java.sql.Types; import java.util.List; public class UserPersistanceTopology extends AbstractUserTopology { private static final String USER_SPOUT = "USER_SPOUT"; private static final String LOOKUP_BOLT = "LOOKUP_BOLT"; private static final String PERSISTANCE_BOLT = "PERSISTANCE_BOLT"; public static void main(String[] args) throws Exception { new UserPersistanceTopology().execute(args); new UserPersistanceTopology().getTopology(); } @Override public StormTopology getTopology() { //将outputFields的四个字段输出 JdbcLookupBolt departmentLookupBolt = new JdbcLookupBolt(connectionProvider, SELECT_QUERY, this.jdbcLookupMapper); //must specify column schema when providing custom query. //自己将表中的字段类型,名称映射 List<Column> schemaColumns = Lists.newArrayList(new Column("create_date", Types.DATE), new Column("dept_name", Types.VARCHAR), new Column("user_id", Types.INTEGER), new Column("user_name", Types.VARCHAR)); JdbcMapper mapper = new SimpleJdbcMapper(schemaColumns); //将映射好的mapper与JdbcInsertBolt组合 形成一个完整的执行插入操作的bolt 使用schemaColumns,可以指定字段要插入的字段 JdbcInsertBolt userPersistanceBolt = new JdbcInsertBolt(connectionProvider, mapper) .withInsertQuery("insert into user (create_date, dept_name, user_id, user_name) values (?,?,?,?)"); // userSpout ==> jdbcBolt TopologyBuilder builder = new TopologyBuilder(); builder.setSpout(USER_SPOUT, this.userSpout, 1); builder.setBolt(LOOKUP_BOLT, departmentLookupBolt, 1).shuffleGrouping(USER_SPOUT); builder.setBolt(PERSISTANCE_BOLT, userPersistanceBolt, 1).shuffleGrouping(LOOKUP_BOLT); return builder.createTopology(); } }
tridentTopology的插入:
package com.yjp.stormjdbc.demo1.topology; import com.google.common.collect.Lists; import com.yjp.stormjdbc.demo1.spout.UserSpout; import org.apache.storm.generated.StormTopology; import org.apache.storm.jdbc.common.Column; import org.apache.storm.jdbc.mapper.SimpleJdbcLookupMapper; import org.apache.storm.jdbc.trident.state.JdbcQuery; import org.apache.storm.jdbc.trident.state.JdbcState; import org.apache.storm.jdbc.trident.state.JdbcStateFactory; import org.apache.storm.jdbc.trident.state.JdbcUpdater; import org.apache.storm.trident.Stream; import org.apache.storm.trident.TridentState; import org.apache.storm.trident.TridentTopology; import org.apache.storm.tuple.Fields; import java.sql.Types; public class UserPersistanceTridentTopology extends AbstractUserTopology { public static void main(String[] args) throws Exception { new UserPersistanceTridentTopology().execute(args); new UserPersistanceTridentTopology().getTopology(); } @Override public StormTopology getTopology() { TridentTopology topology = new TridentTopology(); //为了创建一个jdbc持久化的trident state我们需要提供一个表名或者一个插入查询, // JdbcMapper的实例以及connection provider的实例来初始化trident state JdbcState.Options options = new JdbcState.Options() .withConnectionProvider(connectionProvider)//拿到未初始化的连接 .withMapper(this.jdbcMapper)//通过表名完成映射 //因为sql语句中时通过user_id查询出dept_name的 所以将user_id传入值为上个流中发射user_id的值 .withJdbcLookupMapper(new SimpleJdbcLookupMapper(new Fields("dept_name"), Lists.newArrayList(new Column("user_id", Types.INTEGER)))) .withTableName(TABLE_NAME) //表名 通过表名将数据写入表中 注释这句话则不会写入 //.withInsertQuery("") 也可以通过sql语句来代替上面的表名 .withSelectQuery(SELECT_QUERY);//查询语句 JdbcStateFactory jdbcStateFactory = new JdbcStateFactory(options); Stream stream = topology.newStream("userSpout", new UserSpout()); TridentState state = topology.newStaticState(jdbcStateFactory); //查询 将stream 的字段改变 由三个字段变为四个 ("user_id", "user_name", "dept_name", "create_date") stream = stream.stateQuery(state, new Fields("user_id", "user_name", "create_date"), new JdbcQuery(), new Fields("dept_name")); //将数据更新插入数据库 jdbcStateFactory 根据设置的表名更新到对应的数据库 批处理 一批一批的插入 stream.partitionPersist(jdbcStateFactory, new Fields("user_id", "user_name", "dept_name", "create_date"), new JdbcUpdater(), new Fields()); System.err.println("执行完成"); return topology.build(); } }整合到此结束, 项目中有需求的话直接将部分代码copy就可以了
努力吧,皮卡丘