jar包--flume--nginx--kafka--hive
1,打jar包
在idea中,右击项目中的package
2,在hdp-4上Alt+P打开新窗口
把jar包拖进去,
java jar frame-1.0-SNAPSHOT.jar
在网页上检查运行结果
3,启动nginx
cd /usr/local/nginx/sbin
./nginx
在网页上检查运行结果hdp-4:80
nginx的配置文件(路径为/usr/local/nginx/conf/nginx.conf)
#user nobody;
worker_processes 1;
#error_log logs/error.log;
#error_log logs/error.log notice;
#error_log logs/error.log info;
#pid logs/nginx.pid;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
#access_log logs/access.log main;
sendfile on;
#tcp_nopush on;
#keepalive_timeout 0;
keepalive_timeout 65;
#gzip on;
upstream frame-tomcat {
server hdp-4:8180;
}
server {
listen 80;
server_name hdp-4;
#服务器端,80是外部访问端口号
#charset koi8-r;
#生成日志路径
access_log logs/host.access.log main;
location / {
# root html;
# index index.html index.htm;
proxy_pass http://frame-tomcat;
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
}
server {
listen 80;
server_name localhost;
#charset koi8-r;
#access_log logs/host.access.log main;
location / {
root html;
index index.html index.htm;
}
#error_page 404 /404.html;
# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
# proxy the PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
# proxy_pass http://127.0.0.1;
#}
# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
#location ~ \.php$ {
# root html;
# fastcgi_pass 127.0.0.1:9000;
# fastcgi_index index.php;
# fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
# include fastcgi_params;
#}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
# deny all;
#}
}
# another virtual host using mix of IP-, name-, and port-based configuration
#
#server {
# listen 8000;
# listen somename:8080;
# server_name somename alias another.alias;
# location / {
# root html;
# index index.html index.htm;
# }
#}
# HTTPS server
#
#server {
# listen 443;
# server_name localhost;
# ssl on;
# ssl_certificate cert.pem;
# ssl_certificate_key cert.key;
# ssl_session_timeout 5m;
# ssl_protocols SSLv2 SSLv3 TLSv1;
# ssl_ciphers HIGH:!aNULL:!MD5;
# ssl_prefer_server_ciphers on;
# location / {
# root html;
# index index.html index.htm;
# }
#}
}
阶段二:nginx采集数据到hdfs
1,启动集群(Instead use start-dfs.sh and start-yarn.sh)
start-all.sh
2,启动zookeeper和kafka
脚本文件启动zookeeper
脚本代码
#!/bin/bash
for host in hdp-1 hdp-2 hdp-4
do
echo "${host}:${1}ing..."
ssh $host "source /etc/profile;/root/apps/zookeeper-3.4.6/bin/zkServer.sh $1"
done
启动代码
./zkmanager.sh start
脚本文件启动kafka
脚本代码
#!/bin/bash
for host in hdp-1 hdp-2 hdp-4
do
echo "${host}:${1}ing..."
ssh $host "source /etc/profile;/root/apps/kafka_2.12-2.2.0/bin/kafka-server-$1.sh -daemon /root/apps/kafka_2.12-2.2.0/config/server.properties"
done
启动代码
./kafkaManager.sh start
3,修改nginx配置文件(路径/usr/local/nginx/conf/nginx.conf)
备份nginx.conf文件
cp nginx.conf nginx.conf.dak
vi nginx.conf
#user nobody;
worker_processes 1;
#error_log logs/error.log;
#error_log logs/error.log notice;
#error_log logs/error.log info;
#pid logs/nginx.pid;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
#$remote_addr:ip,采集ip
log_format main '$remote_addr';
#access_log logs/access.log main;
sendfile on;
#tcp_nopush on;
#keepalive_timeout 0;
keepalive_timeout 65;
#gzip on;
upstream frame-tomcat {
server hdp-4:8180;
}
server {
listen 80;
server_name hdp-4;
#charset koi8-r;
access_log logs/host.access.log main;
location / {
# root html;
# index index.html index.htm;
proxy_pass http://frame-tomcat;
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
}
server {
listen 80;
server_name localhost;
#charset koi8-r;
#access_log logs/host.access.log main;
location / {
root html;
index index.html index.htm;
}
#error_page 404 /404.html;
# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
# proxy the PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
# proxy_pass http://127.0.0.1;
#}
# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
#location ~ \.php$ {
# root html;
# fastcgi_pass 127.0.0.1:9000;
# fastcgi_index index.php;
# fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
# include fastcgi_params;
#}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
# deny all;
#}
}
# another virtual host using mix of IP-, name-, and port-based configuration
#
#server {
# listen 8000;
# listen somename:8080;
# server_name somename alias another.alias;
# location / {
# root html;
# index index.html index.htm;
# }
#}
# HTTPS server
#
#server {
# listen 443;
# server_name localhost;
# ssl on;
# ssl_certificate cert.pem;
# ssl_certificate_key cert.key;
# ssl_session_timeout 5m;
# ssl_protocols SSLv2 SSLv3 TLSv1;
# ssl_ciphers HIGH:!aNULL:!MD5;
# ssl_prefer_server_ciphers on;
# location / {
# root html;
# index index.html index.htm;
# }
#}
}
重新运行nginx(路径/usr/local/nginx/sbin)
./nginx -s reload
在网页上hdp-4:80查看nginx是否启动成功
4,修改flume配置文件(路径为/root/apps/apache-flume-1.6.0-bin/tail-flumekafka.conf)
a1.sources = source1
a1.sinks = k1
a1.channels = c1
a1.sources.source1.type = exec
a1.sources.source1.command = tail -F /usr/local/nginx/logs/host.access.log
# Describe the sink
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.topic = test
a1.sinks.k1.brokerList = hdp-1:9092, hdp-2:9092, hdp-4:9092
a1.sinks.k1.requiredAcks = 1
a1.sinks.k1.batchSize = 20
a1.sinks.k1.channel = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.source1.channels = c1
a1.sinks.k1.channel = c1
a1.sources.source1.command = tail -F /usr/local/nginx/logs/host.access.log
地址对应ngingx配置文件中生成log日志的路径
5,启动fiume(路径/root/apps/apache-flume-1.6.0-bin/bin)
./flume-ng agent -C ../conf/ -f ../tail-flumekafka.conf -n a1 -Dflume.root.logger=INFO,console
6,启动kafka的一个topic
cd apps/kafka_2.12-2.2.0/bin
./kafka-topics.sh --create --zookeeper hdp-1:2181,hdp-2:2181,hdp-4:2181 --replication-factor 2 --partitions 2 --topic test
7,启动kafka消费者(在idea)
D:\kafkaNewdemoe\src\main\resources\log4j.properties(日志文件)
### \u8BBE\u7F6E###
#log4j.rootLogger=debug,stdout,genlog
log4j.rootLogger=logRollingFile,stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=[%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n
###
log4j.logger.logRollingFile= DEBUG,test1
log4j.appender.test1 = org.apache.log4j.RollingFileAppender
log4j.appender.test1.layout = org.apache.log4j.PatternLayout
log4j.appender.test1.layout.ConversionPattern =%m%n
log4j.appender.test1.Threshold = DEBUG
log4j.appender.test1.ImmediateFlush = TRUE
log4j.appender.test1.Append = TRUE
log4j.appender.test1.File = d:/testlog/access.log
log4j.appender.test1.MaxFileSize = 64KB
log4j.appender.test1.MaxBackupIndex = 200
### log4j.appender.test1.Encoding = UTF-8
消费者
package com.example.kafkademo2.kafak3;
import org.apache.hadoop.conf.Configuration;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.log4j.Logger;
//java.io
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.Properties;
public class ConsumerDemo3 {
public static void main(String[] args) {
//调用接收消息的方法
receiveMsg();
}
/**
* 获取kafka topic(animal)上的数据
*/
private static void receiveMsg() {
Logger logger = Logger.getLogger("logRollingFile");
Properties properties = new Properties();
properties.put("bootstrap.servers", "hdp-1:9092");
properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("group.id","ddd");
properties.put("enable.auto.commit", true);
//一个方法
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(properties);
consumer.subscribe(Collections.singleton("blue"));
URI uri = null;
Configuration conf = null;
String user = "root";
try {
uri = new URI("hdfs://hdp-1:9000");
conf = new Configuration();
conf = new Configuration();
//dfs.replication:分布式文件系统副本的数量
conf.set("dfs.replication", "2");
//dfs.blocksize:分布式文件系统的块的大小 100M 64+36
conf.set("dfs.blocksize", "64m");
} catch (URISyntaxException e) {
e.printStackTrace();
}
try {
// 文件输出流
FileOutputStream fos = new FileOutputStream("e:/flumekafka.log");
// 写入文件内容流
OutputStreamWriter osw = new OutputStreamWriter(fos);
while(true) {
/**
* 获取kafka
*/
ConsumerRecords<String, String> records = consumer.poll(100);
for(ConsumerRecord<String, String> record: records) {
String msg = "key:" + record.key()+ ",value:" + record.value() + ",offset:" + record.offset()+",topic:" + record.topic()+"\r\n";
System.out.printf("key=%s,value=%s,offet=%s,topic=%s",record.key() , record.value() , record.offset(), record.topic());
logger.debug(record.value());
}
}
}catch (Exception e) {
e.printStackTrace();
} finally {
consumer.close();
}
}
}
上传到hdfs
package com.example.kafkademo2.kafak3;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class HdfsPut {
//上传文件到hdfs
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.set("replication", "1");
conf.set("dfs.blocksize","64m");
URI uri = null;
String user = "root";
FileSystem fs = null;
try {
uri = new URI("hdfs://hdp-1:9000");
fs = FileSystem.get(uri, conf, user);
//上传文件
fs.copyFromLocalFile(new Path("d:/testlog/access.log"), new Path("/flumekafka/a.log"));
fs.close();
} catch (URISyntaxException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
8,hive建表(hive>)
create external table flumetable (ip string ) row format delimited location '/flumekafka/';
9,上传数据到hive表,并查询总PV(hive>)
select count(*) from default.flumetable;
查询结果