#user nginx;
worker_processes 1;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr,[$time_local]\$http_user_agent\'$request'';
# '$status $body_bytes_sent '$http_referer' '
# ''$http_user_agent' '$http_x_forwarded_for'';
access_log /var/log/nginx/access.log main;
sendfile on;
#tcp_nopush on;
keepalive_timeout 65;
#gzip on;
server {
listen 80;
server_name localhost;
#charset koi8-r;
access_log /var/log/nginx/access.log main;
location / {
root /web/baidu;
index index.html index.htm;
}
}
include /etc/nginx/conf.d/*.conf;
}
agent.sources = r1
agent.sinks = k1
agent.channels = c1
## common
agent.sources.r1.channels = c1
agent.sinks.k1.channel = c1
## sources config
agent.sources.r1.type = exec
##监听的日志位置
agent.sources.r1.command = tail -F /var/log/nginx/access.log
agent.sources.r1.interceptors = t1
agent.sources.r1.interceptors.t1.type = timestamp
## channels config
agent.channels.c1.type = memory
agent.channels.c1.capacity = 1000
agent.channels.c1.transactionCapacity = 1000
agent.channels.c1.byteCapacityBufferPercentage = 20
agent.channels.c1.byteCapacity = 1000000
agent.channels.c1.keep-alive = 60
#sinks config
agent.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
agent.sinks.k1.channel = c1
#存放的kafka位置
agent.sinks.k1.topic = my-first-topic4
agent.sinks.k1.brokerList = localhost:9092
agent.sinks.k1.requiredAcks = 1
agent.sinks.k1.batchSize = 100
package storm_kafka
import java.util.Map;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.task.TopologyContext;
public class SplitBolt implements IRichBolt {
private OutputCollector collector;
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.collector = collector;
}
@Override
public void execute(Tuple input) {
String sentence = input.getString(0);//获取信息
int in = sentence.indexOf('GET /');
int out = sentence.indexOf('HTTP/1.1');
System.out.println('++++++++++');
String split = sentence.substring(in+5,out);
System.out.println('--------------');
if (!' '.equals(split) &&!'favicon.ico '.equals(split)) {
int in1 = split.indexOf('/');
int out1 = split.indexOf('.jpg');
String split1 =split.substring(in1+1,out1);
collector.emit(new Values(split1));
}
//
collector.ack(input);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields('word'));//声明元组的输出模式
}
@Override
public void cleanup() {}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
package storm_kafka
import java.util.Map;
import java.util.HashMap;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.task.TopologyContext;
public class CountBolt implements IRichBolt{
Map<String, Integer> counters;
private OutputCollector collector;
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.counters = new HashMap<String, Integer>();
this.collector = collector;
}
@Override
public void execute(Tuple input) {
String name = input.getString(0);//获取元组数据
String count= counters.get(name);
//按键汇总值
if(!counters.containsKey(name)){
counters.put(name, '2');
System.err.println('namenamenamenamnamanamnan');
}else {
int n = Integer.parseInt(counters.get(name));
Integer c = n +1;
counters.put(name, c.toString());
System.err.println('cccccccccc----cccccccc');
}
collector.emit(new Values(name,count));
collector.ack(input);
}
//当spout将要关闭时调用此方法
@Override
public void cleanup() {
for(Map.Entry<String, Integer> entry:counters.entrySet()){
System.err.println(entry.getKey()+;' : '+entry.getValue());//打印键值对
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declarer(new Fields('word','count'));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
}
package storm_kafka
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.topology.TopologyBuilder;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import org.apache.storm.hbase.bolt.HbaseBolt
import org.apache.storm.hbase.bolt.mapper.SimpleHbaseMapper
import org.apache.storm.spout.SchemeAsMultiScheme;
import org.apache.storm.kafka.ZkHosts;
import org.apache.storm.kafka.Broker;
import org.apache.storm.kafka.StaticHosts;
import org.apache.storm.kafka.BrokerHosts;
import org.apache.storm.kafka.SpoutConfig;
import org.apache.storm.kafka.KafkaConfig;
import org.apache.storm.kafka.KafkaSpout;
import org.apache.storm.kafka.StringScheme;
public class KafkaStormSampleTopo {
public static void main(String[] args) throws Exception{
Config config = new Config();//创建storm配置对象
config.setDebug(true);//设置日志显示级别
config.put(Config.TOPOLOGY_MAX_SPOUT_PENDING, 1);
String zkConnString = 'localhost:2181';//zookeeper所在服务器和端口
String topic = 'my-first-topic';//topic名称
BrokerHosts hosts = new ZkHosts(zkConnString);
SpoutConfig kafkaSpoutConfig = new SpoutConfig (hosts, topic, '/' + topic,
UUID.randomUUID().toString());//spout获取数据配置
kafkaSpoutConfig.bufferSizeBytes = 1024 * 1024 * 4;//kafkaSpoutConfig属性设置
kafkaSpoutConfig.fetchSizeBytes = 1024 * 1024 * 4;//kafkaSpoutConfig属性设置
kafkaSpoutConfig.scheme = new SchemeAsMultiScheme(new StringScheme());
TopologyBuilder builder = new TopologyBuilder();//创建拓扑对象
builder.setSpout('kafka-spout', new KafkaSpout(kafkaSpoutCon-fig));//设置KafkaSpout类属性
builder.setBolt('word-spitter', new SplitBolt()).shuffleGroup-ing('kafka-spout');//设置SplitBolt类属性和对数据进行随机分组处理
builder.setBolt('word-counter', new CountBolt()).shuffleGroup-ing('word-spitter');//设置CountBolt属性和对数据进行随机分组处理
SimpleHBaseMapper mapper = new SimpleHBaseMapper();
HBaseBolt hBaseBolt = new HBaseBolt('wordcount', mapper).withConfigKey('hbase.conf');
mapper.withColumnFamily('result');
mapper.withColumnFields(new Fields('count'));
mapper.withRowKeyField('word');
Map<String, Object> hbConf = new HashMap<String,Object>();
hbConf.put('hbase.rootdir','hdfs://localhost:9000/hbase');
hbConf.put('hbase.zookeeper.quorum','localhost:2181');
config.put('hbase.conf', hbConf);
builder.setBolt('hbase', hBaseBolt).shuffleGrouping('word-counter');
LocalCluster cluster = new LocalCluster();//创建本地模拟集群对象
cluster.submitTopology('KafkaStormSampleTopo', config, builder.create-Topology());//提交拓扑
}
}