一.Sqoop
1.Preface
https://en.wikipedia.org/wiki/Sqoop
2.Installation
①transfer tar under path: /usr/local/
②tar xzvf xxxx
③vi /etc/profile
source /etc/profile
3.Practice
①Command Exapmle
sqoop import --connect jdbc:mysql://127.0.0.1:3306/hive --username hive --password hive --table TBLS
sqoop --options-file /root/import.txt --table TEST
②Testing Connection
(1)show databases in mysql
sqoop list-databases --connect "jdbc:mysql://hadoop:3306/?useUnicode=true&characterEncoding=UTF-8" --username root --password root
(2)show tables
sqoop list-tables --connect jdbc:mysql://hadoop:3306/hive --username root --password root
(3)import & export:
First:
create table test (id int,name varchar(10)) charset=utf8;
Second:
insert into test values (1,'北京');
insert into test values (2,'南京');
insert into test values (3,'东京');
insert into test values (4,'西京');
insert into test values (5,'新街口');
insert into test values (6,'五道口');
insert into test values (7,'菜市口');
insert into test values (8,'梅市口');
insert into test values (9,'珠市口');
insert into test values (10,'磁器口');
commit;
create table test2 (id int,name varchar(10)) charset=utf8;
(1)mysql import into hdfs
sqoop import --connect jdbc:mysql://hadoop:3306/test --username root --password root --table test --target-dir /usr/hive/warehouse/test -m 1
sqoop import --connect jdbc:mysql://hadoop:3306/test --username root --password root --table test -m 1
-m
*if we do not add --target-dir, its default path is /user/username/tablename on hdfs
(2)HDFS import into mysql
sqoop export --connect jdbc:mysql://hadoop:3306/test --table test2 --username root --password root --export-dir hdfs://hadoop:9000/usr/hive/warehouse/test
*if we encounter Chinese character gash:
sqoop export --connect "jdbc:mysql://hadoop:3306/test?useUnicode=true&characterEncoding=UTF-8" --table test2 --username root --password root --export-dir hdfs://hadoop:9000/usr/hive/warehouse/test
(3) MYSQL-HIVE
sqoop import --connect jdbc:mysql://hadoop:3306/test --username root --password root --table test --hive-import --hive-database sqoop --create-hive-table -hive-table test -m 1
sqoop import --connect jdbc:mysql://hadoop:3306/test --username root --password root --table a --hive-import --hive-database hi --create-hive-table -hive-table a -m 1
RDBMS-HDFS<>HIVE
(multi,do not use it as soon as possible)
sqoop import-all-tables --connect jdbc:mysql://hadoop:3306/test --username root --password root --as-textfile --warehouse-dir /output/
sqoop import-all-tables --connect jdbc:mysql://hadoop:3306/test --username root --password root --hive-import --as-textfile --create-hive-table -hive-table testsum -m 1
(4)(HIVE/HDFS->MYSQL) common command
sqoop-export \
--connect jdbc:mysql://hadoop:3306/test \
--username root \
--password root \
--table b \
--export-dir /usr/hive/warehouse/a \
--columns id,name \
--update-mode allowinsert \
--update-key category_id \
--fields-terminated-by '\001' \
--lines-terminated-by '\n' \
--input-null-string '\\N' \
--input-null-non-string '\\N'
二.Flume
1.Preface
https://en.wikipedia.org/wiki/Apache_Flume
2.Installation
①transfer tar under path: /usr/local/
②tar xzvf xxxx
③vi /etc/profile
source /etc/profile
3.Practice
①agent test:
vi netcat-logger.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
flume-ng agent --conf conf --conf-file netcat-logger.conf --name a1 -Dflume.root.logger=INFO,console
②collect data in hdfs
vi tail-hdfs.conf
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
#exec 指的是命令
# Describe/configure the source
a1.sources.r1.type = exec
#F根据文件名追踪, f根据文件的nodeid追踪
a1.sources.r1.command = tail -F /var/test.log
a1.sources.r1.channels = c1
# Describe the sink
#下沉目标
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
#指定目录, flume帮做目的替换
a1.sinks.k1.hdfs.path = hdfs://hadoop:9000/flume/events/%y-%m-%d/%H%M/
#文件的命名, 前缀
a1.sinks.k1.hdfs.filePrefix = events-
#10 分钟就改目录
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute
#文件滚动之前的等待时间(秒) 一般是30秒
a1.sinks.k1.hdfs.rollInterval = 3
#文件滚动的大小限制(bytes)
a1.sinks.k1.hdfs.rollSize = 1024
#写入多少个event数据后滚动文件(事件个数)
a1.sinks.k1.hdfs.rollCount = 100
#一次将多少个events FLUSH到HDFS
a1.sinks.k1.hdfs.batchSize = 100
#用本地时间格式化目录
a1.sinks.k1.hdfs.useLocalTimeStamp = true
#下沉后, 生成的文件类型,默认是Sequencefile,可用DataStream,则为普通文本
a1.sinks.k1.hdfs.fileType = DataStream
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
*The more detailed introduction of parameter of configuration:
http://lxw1234.com/archives/2015/10/527.htm
flume-ng agent --conf conf --conf-file tail-hdfs.conf --name a1 -Dflume.root.logger=INFO,console
vi num.txt
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
I LOVE FLUME !!!
$$$$$$$$$$$$$$$$
$$$$$$$$$$$$$$$$
我爱FLUME
vi test.sh
#!/bin/bash
touch /var/test.log
while [ 1 -eq 1 ]
do
echo start
cat 111111.txt >> /var/test.log
echo
echo
echo "我是分隔符"
sleep 2
done
1.Preface
2.Installation
①mkdir -p /usr/local/azkaban
②transfer tar under path: /usr/local/azkaban
③tar xzvf xxxx
④mv azkaban-2.5.0/ sql
mv azkaban-web-2.5.0 server
mv azkaban-executor-2.5.0/ executor
⑤vi /etc/profile
source /etc/profile
⑥mysql -uroot -prootcreate database azkaban;
use azkaban;
source /usr/local/azkaban/sql/create-all-sql-2.5.0.sql
⑦keytool -keystore keystore -alias jetty -genkey -keyalg RSA
cp keystore /usr/local/azkaban/server/
⑧cd server/conf
vi azkaban.properties
#Azkaban Personalization Settings
azkaban.name=Test
azkaban.label=My Local Azkaban
azkaban.color=#FF3601
azkaban.default.servlet.path=/index
web.resource.dir=/usr/local/azkaban/server/web/
default.timezone.id=Asia/Shanghai
#Azkaban UserManager class
user.manager.class=azkaban.user.XmlUserManager
user.manager.xml.file=/usr/local/azkaban/server/conf/azkaban-users.xml
#Loader for projects
executor.global.properties=conf/global.properties
azkaban.project.dir=projects
database.type=mysql
mysql.port=3306
mysql.host=192.168.16.100
mysql.database=azkaban
mysql.user=root
mysql.password=root
mysql.numconnections=100
# Velocity dev mode
velocity.dev.mode=false
# Azkaban Jetty server properties.
jetty.maxThreads=25
jetty.ssl.port=8443
jetty.port=8081
jetty.keystore=/usr/local/azkaban/server/keystore
jetty.password=96xxxxx
jetty.keypassword=96xxxx
jetty.truststore=/usr/local/azkaban/server/keystore
jetty.trustpassword=96xxxx
# Azkaban Executor settings
executor.port=12321
# mail settings
mail.sender=
mail.host=
job.failure.email=
job.success.email=
lockdown.create.projects=false
cache.directory=cache
vi azkaban-user.xml
<user username="admin" password="admin" roles="admin,metrics" />
⑨executor
cd /usr/local/azkaban/executor/conf
vi azkaban.properties
#Azkaban
default.timezone.id=Asia/Shanghai
# Azkaban JobTypes Plugins
azkaban.jobtype.plugin.dir=/usr/local/azkaban/executor/plugins/jobtypes
#Loader for projects
executor.global.properties=/usr/local/azkaban/executor/conf/global.properties
azkaban.project.dir=projects
database.type=mysql
mysql.port=3306
mysql.host=192.168.16.100
mysql.database=azkaban
mysql.user=root
mysql.password=root
mysql.numconnections=100
# Azkaban Executor settings
executor.maxThreads=50
executor.port=12321
executor.flow.threads=30
azkaban-executor-start.sh