mysql计算订单总金额_多级分销情况下实时计算订单总金额(Flume+kafka+storm+mysql)

本文介绍了一种在多级分销情况下实时计算订单总金额的方法,利用Flume监听日志文件并将内容传输到Kafka,接着由Storm消费Kafka数据进行实时计算,并通过JDBC将结果存储到MySQL数据库。涉及的技术包括Flume配置、Storm的KafkaSpout、KafkaBolt、MultiBolt和ComputingBolt的实现,以及解决重复消费和storm-jdbc操作的注意事项。
摘要由CSDN通过智能技术生成

1.flume配置

创建一个文件,flume监听这个文件,代替kafka生产者向消费者传输内容,具体配置如下

###############给sinnsource、channel起名字###############a

kafka_agent.sources = kafka_source

kafka_agent.sinks = kafka_sink

kafka_agent.channels = kafka_channel

##############配置source#####################

#对接文件

kafka_agent.sources.kafka_source.type = exec

kafka_agent.sources.kafka_source.command = tail -F /usr/local/flume1.8/test/MultilevelComputing.log

kafka_agent.sources.tailsource-1.shell = /bin/bash -c

###############配置sink######################

#对接kafka

kafka_agent.sinks.kafka_sink.type = org.apache.flume.sink.kafka.KafkaSink

#配置传输到哪个topic中

kafka_agent.sinks.kafka_sink.kafka.topic = Multilevel

#地址

kafka_agent.sinks.kafka_sink.kafka.bootstrap.servers = htkj101:9092

#批处理大小,1条消息处理一次

kafka_agent.sinks.kafka_sink.kafka.flumeBatchSize = 1

#输入数据传输成功策略

kafka_agent.sinks.kafka_sink.kafka.producer.acks = -1

kafka_agent.sinks.kafka_sink.kafka.producer.linger.ms = 1

kafka_agent.sinks.kafka_sink.kafka.producer.compression.type = snappy

############################配置channel###################

#对于channel的配置描述 使用文件做数据的临时缓存 这种的安全性要高

kafka_agent.channels.kafka_channel.type = file

kafka_agent.channels.kafka_channel.checkpointDir = /home/uplooking/data/flume/checkpoint

kafka_agent.channels.kafka_channel.dataDirs = /home/uplooking/data/flume/data

###########################整合三个组件#########################

kafka_agent.sources.kafka_source.channels = kafka_channel

kafka_agent.sinks.kafka_sink.channel = kafka_channel

2.storm接受kafka的数据并进行实时计算

2.1pom.xml

org.apache.storm

storm-core

1.1.0

org.slf4j

slf4j-log4j12

org.slf4j

log4j-over-slf4j

org.apache.kafka

kafka-clients

0.10.0.1

org.apache.kafka

kafka_2.11

0.10.0.1

org.apache.zookeeper

zookeeper

org.slf4j

slf4j-log4j12

org.slf4j

slf4j-api

org.slf4j

log4j-over-slf4j

org.apache.logging.log4j

log4j-slf4j-impl

org.apache.storm

storm-kafka-client

1.1.0

org.apache.storm

storm-jdbc

1.1.1

mysql

mysql-connector-java

5.1.31

2.2 KafkaSpout

从kafka中获取数据,同时也是topology,在这个类中进行配置

import org.apache.kafka.clients.consumer.ConsumerConfig;

import org.apache.kafka.common.serialization.StringDeserializer;

import org.apache.storm.Config;

import org.apache.storm.LocalCluster;

import org.apache.storm.kafka.spout.KafkaSpout;

import org.apache.storm.kafka.spout.KafkaSpoutConfig;

import org.apache.storm.topology.TopologyBuilder;

import org.apache.storm.tuple.Fields;

import org.apache.storm.utils.Utils;

public class KafkaTopo {

public static void main(String[] args) {

//创建TopologyBuilder

TopologyBuilder topologyBuilder = new TopologyBuilder();

KafkaSpoutConfig.Builder kafkaSpoutConfigBuilder;

//kafka连接信息

String bootstrapServers="htkj101:9092,htkj102:9093,htkj103:9094";

//主题

String topic = "Multilevel";

/**

* 构造kafkaSpoutConfigBuilder构造器

*

* bootstrapServers: Kafka链接地址 ip:port

* StringDeserializer: key Deserializer 主题key的反序列化

* StringDeserializer: value Deserializer 主题的value的反序列化

* topic: 主题名称

*/

kafkaSpoutConfigBuilder = new KafkaSpoutConfig.Builder<>(

bootstrapServers,

StringDeserializer.class,

StringDeserializer.class,

topic);

//使用kafkaSpoutConfigBuilder构造器构造kafkaSpoutConfig,并配置相应属性

KafkaSpoutConfig kafkaSpoutConfig = kafkaSpoutConfigBuilder

/**

* 设置groupId

*/

.setProp(ConsumerConfig.GROUP_ID_CONFIG, topic.toLowerCase() + "_storm_group")

/**

* 设置session超时时间,该值应介于

* [group.min.session.timeout.ms, group.max.session.timeout.ms] [6000,300000]

* 默认值:10000

*/

.setProp(ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, "100000")

/**

* 设置拉取最大容量

*/

.setProp(ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG, "1048576")

/**

* 设置控制客户端等待请求响应的最大时间量

* 默认值:30000

*/

.setProp(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG, "300000")

/**

* 设置心跳到消费者协调器之间的预期时间。

* 心跳用于确保消费者的会话保持活动并且当新消费者加入或离开组时促进重新平衡

* 默认值:3000 (一般设置低于session.timeout.ms的三分之一)

*/

.setProp(ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, "30000")

/**

* 设置offset提交时间15s 默认30s

*/

.setOffsetCommitPeriodMs(15000)

/**

* 设置拉取最大在session超时时间内最好处理完成的个数

*/

.setMaxPollRecords(20)

/**

* 设置拉取策略

*/

.setFirstPollOffsetStrategy(KafkaSpoutConfig.FirstPollOffsetStrategy.LATEST)

/**

* 构造kafkaSpoutConfig

*/

.build();

//setSpout 从kafka中接受数据

topologyBuilder.setSpout("kafkaSpout",new KafkaSpout(kafkaSpoutConfig));

//setbolt 将从kafka中接受的数据进行处理

topologyBuilder.setBolt("KafkaSpoutBolt", new KafkaBolt()).localOrShuffleGrouping("kafkaSpout");

//setbolt 将从KafkaSpoutBolt中接受的数据进行处理

topologyBuilder.setBolt("MultiBolt",new MultiBolt()).fieldsGrouping("KafkaSpoutBolt",new Fields("orderSn","cateId","goodsAmount","parentId","CEOId"));

//setbolt 将从MultiBolt中接受的数据进行处理

topologyBuilder.setBolt("ComputingBolt",new ComputingBolt()).fieldsGrouping("MultiBolt",new Fields("CEOId","parentId","goodsAmount"));

Config config = new Config();

/**

* 设置supervisor和worker之间的通信超时时间.

* 超过这个时间supervisor会重启worker (秒)

*/

config.put("supervisor.worker.timeout.secs",600000);

/**

* 设置storm和zookeeper之间的超时时间.

*/

config.put("storm.zookeeper.session.timeout",1200000000);

/**

* 设置debug模式 日志输出更全

* 只能在本地LocalCluster模式下启用

*/

config.setDebug(true);

LocalCluster localCluster = new LocalCluster();

localCluster.submitTopology("KafKaTopo", config, topologyBuilder.createTopology());

Utils.sleep(Long.MAX_VALUE);

localCluster.shutdown();

}

}

2.3KafkaBolt

在这里获取kafka中获取的数据,并进行切割

import org.apache.storm.topology.BasicOutputCollector;

import org.apache.storm.topology.OutputFieldsDeclarer;

import org.apache.storm.topology.base.BaseBasicBolt;

import org.apache.storm.tuple.Fields;

import org.apache.storm.tuple.Tuple;

import org.apache.storm.tuple.Values;

public class KafkaBolt extends BaseBasicBolt {

@Override

public void execute(Tuple tuple, BasicOutputCollector basicOutputCollector) {

String s = tuple.getString(4);

//切割 为一个空格

System.out.println("kafkabolt-----"+s);

String[] split = s.split(" ");

Integer orderSn=Integer.valueOf(split[0]);

Integer cateId=Integer.valueOf(split[1]);

Integer goodsAmount=Integer.valueOf(split[2]);

Integer parentId=Integer.valueOf(split[3]);

Integer CEOId=Integer.valueOf(split[4]);

//提交

basicOutputCollector.emit(new Values(orderSn,cateId,goodsAmount,parentId,CEOId));

}

@Override

public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

outputFieldsDeclarer.declare(new Fields("orderSn","cateId","goodsAmount","parentId","CEOId"));

}

}

2.4MultiBolt

在这里进行业务逻辑的判断,有些商品不能进入到业绩计算

import org.apache.storm.topology.BasicOutputCollector;

import org.apache.storm.topology.OutputFieldsDeclarer;

import org.apache.storm.topology.base.BaseBasicBolt;

import org.apache.storm.tuple.Fields;

import org.apache.storm.tuple.Tuple;

import org.apache.storm.tuple.Values;

public class MultiBolt extends BaseBasicBolt {

@Override

public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

outputFieldsDeclarer.declare(new Fields("CEOId", "parentId", "goodsAmount"));

}

@Override

public void execute(Tuple tuple, BasicOutputCollector basicOutputCollector) {

//获取数据

Integer orderSn = tuple.getIntegerByField("orderSn");

Integer cateId = tuple.getIntegerByField("cateId");

Integer goodsAmount = tuple.getIntegerByField("goodsAmount");

Integer parentId = tuple.getIntegerByField("parentId");

Integer CEOId = tuple.getIntegerByField("CEOId");

System.out.println("orderSn:" + orderSn + "cateId:" + cateId + "goodsAmount:" + goodsAmount + "parentId:" + parentId + "CEOId:" + CEOId);

//物品不能为配件和面膜

if (cateId != 9 && cateId != 65) {

System.out.println("成功");

basicOutputCollector.emit(new Values(CEOId, parentId, goodsAmount));

}

}

}

2.5ComputingBolt

这里连接了数据库,把计算得到的订单数据添加到数据库中

import com.google.common.collect.Maps;

import org.apache.storm.jdbc.common.Column;

import org.apache.storm.jdbc.common.ConnectionProvider;

import org.apache.storm.jdbc.common.HikariCPConnectionProvider;

import org.apache.storm.jdbc.common.JdbcClient;

import org.apache.storm.task.OutputCollector;

import org.apache.storm.task.TopologyContext;

import org.apache.storm.topology.OutputFieldsDeclarer;

import org.apache.storm.topology.base.BaseRichBolt;

import org.apache.storm.tuple.Tuple;

import java.sql.Types;

import java.util.ArrayList;

import java.util.List;

import java.util.Map;

public class ComputingBolt extends BaseRichBolt {

private OutputCollector collector;

//创建数据库连接对象

private ConnectionProvider connectionProvider;

//数据库的操作对象

private JdbcClient jdbcClient;

//一级代理从数据库获取的金额

private Integer parentAmount;

//一级代理总金额

private Integer parentAllAmount;

//CEO从数据库获取的金额

private Integer CEOAmount;

//CEO总金额

private Integer CEOAllAmount;

@Override

public void prepare(Map stormConf, TopologyContext topologyContext, OutputCollector collector) {

this.collector = collector;

//创建map集合存储连接属性

Map map = Maps.newHashMap();

//driver

map.put("dataSourceClassName","com.mysql.jdbc.jdbc2.optional.MysqlDataSource");

//url

map.put("dataSource.url", "jdbc:mysql:///yazan");

//用户名

map.put("dataSource.user","root");

//密码

map.put("dataSource.password","123456");

//创建连接对象

connectionProvider = new HikariCPConnectionProvider(map);

//对数据库连接进行初始化

connectionProvider.prepare();

//创建数据库操作对象,参数2:查询的超时时间

jdbcClient = new JdbcClient(connectionProvider,30);

}

@Override

public void execute(Tuple tuple) {

Integer goodsAmount=tuple.getIntegerByField("goodsAmount");

Integer parentId=tuple.getIntegerByField("parentId");

Integer CEOId=tuple.getIntegerByField("CEOId");

System.out.println("goodsAmount-->"+goodsAmount+"parentId-->"+parentId+"CEOId-->"+CEOId);

//创建集合,存储列的条件

List parent = new ArrayList();

//添加条件

parent.add(new Column("user_id", parentId, Types.INTEGER));

//查询user_id,条件是parentId

List> selectParentId = jdbcClient.select("SELECT user_id ,amount from test WHERE user_id = ?", parent);

//如果没有ParentId,执行增加

if (selectParentId.size()==0){

System.out.println("没有数据");

jdbcClient.executeSql("INSERT INTO test (user_id,amount,lid,lid_name) VALUES("+parentId+","+goodsAmount+",10,'总代理')");

}

//如果有ParentId,查找金额,并进行修改

else {

for (List columns : selectParentId) {

for (Column column : columns) {

String columnName= column.getColumnName();

if("amount".equalsIgnoreCase(columnName)){

//获取当前金额

parentAmount= (Integer) column.getVal();

System.out.println("当前金额"+parentAmount);

}

}

}

parentAllAmount=parentAmount+goodsAmount;

System.out.println("总金额"+parentAllAmount);

jdbcClient.executeSql("UPDATE test SET amount = "+parentAllAmount+" WHERE user_id = '"+parentId+"'");

}

List CEO = new ArrayList();

CEO.add(new Column("user_id", CEOId, Types.INTEGER));

List> selectCEOId = jdbcClient.select("SELECT user_id ,amount from test WHERE user_id = ?", CEO);

//如果没有CEOId,执行增加

if (selectCEOId.size()==0){

System.out.println("没有数据");

jdbcClient.executeSql("INSERT INTO test (user_id,amount,lid,lid_name) VALUES("+CEOId+","+goodsAmount+",9,'CEO')");

}

//如果有CEOId,查找金额,并进行修改

else {

for (List columns : selectCEOId) {

for (Column column : columns) {

String columnName= column.getColumnName();

if("amount".equalsIgnoreCase(columnName)){

//获取当前金额

CEOAmount= (Integer) column.getVal();

System.out.println("当前金额"+CEOAmount);

}

}

}

CEOAllAmount=CEOAmount+goodsAmount;

System.out.println("总金额"+CEOAllAmount);

jdbcClient.executeSql("UPDATE test SET amount = "+CEOAllAmount+" WHERE user_id = '"+CEOId+"'");

}

collector.ack(tuple);

}

@Override

public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

}

}

3.shell脚本

写一个shell脚本 进行测试

#!/bin/bash

file='/usr/local/flume1.8/test/MultilevelComputing.log '

for((i=0;i<1000000;i++))

do

orderSn=$RANDOM

cateId=$(($RANDOM%100+1))

goodsAmount=$RANDOM

parentId=$(($RANDOM%50+51))

CEOId=$(($RANDOM%50+1))

echo $orderSn $cateId $goodsAmount $parentId $CEOId >> $file;

sleep 0.001;

done

4.测试结果

535ed72630b86c26ac1b6110a189ca41.png

5.一些注意事项

kafka+storm重复消费计算的问题

一开始我以为是kafka的问题,后来经过排查是

ComputingBolt这个类继承了BaseRichBolt这个类,没有进行ack锚定

在代码最后添加了

collector.ack(tuple);

就成功解决了。

在这里要说明的是如果继承的是BaseBasicBolt 就不用进行ack锚定 ,这是因为他已经帮我们写好了

storm-jdbc的一些错误

当时写的时候报了一个这样的错误

1328ebac6c7f596bc8b9dcae31922d6f.png

这是因为当时sql语句直接写的是 select * from xxx

而storm-jdbc 操作进行查询时 是根据我们创建集合里面列的属性来查询的

当时我对这个列只add了userId 自然是查不到其他的

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值