Flume:适用于日志文件实时采集
Sqoop:适用于能与大数据集群直接通信的关系型数据库间的大批量数据传输
DataX(异构数据源离线同步工具):在异构数据库/文件系统之间高速交换数据
Kettle:图形化界面
[root@master packages]# cd ..
[root@master soft]# ls
0?? jdk1.8.0_171 shell01
A?? packages show
data phoenix-4.15.0 sqoop-1.4.7.bin__hadoop-2.6.0
hadoop-2.7.6 redis test.txt
hbase-1.4.6 redis-6.2.6 zookeeper-3.4.6
hive-1.2.1 scripts
jars shell
[root@master soft]# mv sqoop-1.4.7.bin__hadoop-2.6.0/ sqoop-1.4.7
[root@master soft]# cd sqoop-1.4.7/
[root@master sqoop-1.4.7]# ls
bin ivy README.txt
build.xml ivy.xml sqoop-1.4.7.jar
CHANGELOG.txt lib sqoop-patch-review.py
COMPILING.txt LICENSE.txt sqoop-test-1.4.7.jar
conf NOTICE.txt src
docs pom-old.xml testdata
[root@master sqoop-1.4.7]# pwd
/usr/local/soft/sqoop-1.4.7
[root@master sqoop-1.4.7]# vim /etc/profile
[root@master sqoop-1.4.7]# source /etc/profile
[root@master sqoop-1.4.7]# ls
bin ivy README.txt
build.xml ivy.xml sqoop-1.4.7.jar
CHANGELOG.txt lib sqoop-patch-review.py
COMPILING.txt LICENSE.txt sqoop-test-1.4.7.jar
conf NOTICE.txt src
docs pom-old.xml testdata
[root@master sqoop-1.4.7]# cd conf/
[root@master conf]# ls
oraoop-site-template.xml sqoop-site-template.xml
sqoop-env-template.cmd sqoop-site.xml
sqoop-env-template.sh
[root@master conf]# cp sqoop-env-template.sh sqoop-env.sh
[root@master conf]# vim sqoop-env.sh
export HADOOP_COMMON_HOME=/usr/local/soft/hadoop-2.7.6
export HADOOP_MAPRED_HOME=/usr/local/soft/hadoop-2.7.6/share/hadoop/mapreduce
export HBASE_HOME=/usr/local/soft/hbase-1.4.6
export HIVE_HOME=/usr/local/soft/hive-1.2.1
export ZOOCFGDIR=/usr/local/soft/zookeeper-3.4.6/conf
export ZOOKEEPER_HOME=/usr/local/soft/zookeeper-3.4.6
[root@master sqoop-1.4.7]# cd bin/
[root@master bin]# ls
configure-sqoop sqoop-import-all-tables
configure-sqoop.cmd sqoop-import-mainframe
sqoop sqoop-job
sqoop.cmd sqoop-list-databases
sqoop-codegen sqoop-list-tables
sqoop-create-hive-table sqoop-merge
sqoop-eval sqoop-metastore
sqoop-export sqoop-version
sqoop-help start-metastore.sh
sqoop-import stop-metastore.sh
[root@master bin]# pwd
/usr/local/soft/sqoop-1.4.7/bin
[root@master bin]# vim configure-sqoop
## Moved to be a runtime check in sqoop.
#if [ ! -d "${HCAT_HOME}" ]; then
# echo "Warning: $HCAT_HOME does not exist! HCatalog jobs will fail."
# echo 'Please set $HCAT_HOME to the root of your HCatalog installation.'
#fi#if [ ! -d "${ACCUMULO_HOME}" ]; then
# echo "Warning: $ACCUMULO_HOME does not exist! Accumulo imports will fail."
# echo 'Please set $ACCUMULO_HOME to the root of your Accumulo installation.'
#fi
[root@master bin]# sqoop version
22/04/18 16:38:25 INFO sqoop.Sqoop: Running Sqoop version: 1.4.7
Sqoop 1.4.7
git commit id 2328971411f57f0cb683dfb79d19d4d19d185dd8
Compiled by maugli on Thu Dec 21 15:59:58 STD 2017
[root@master bin]# ls
configure-sqoop sqoop-import-all-tables
configure-sqoop.cmd sqoop-import-mainframe
sqoop sqoop-job
sqoop.cmd sqoop-list-databases
sqoop-codegen sqoop-list-tables
sqoop-create-hive-table sqoop-merge
sqoop-eval sqoop-metastore
sqoop-export sqoop-version
sqoop-help start-metastore.sh
sqoop-import stop-metastore.sh
[root@master bin]# cd ..
[root@master sqoop-1.4.7]# pwd
/usr/local/soft/sqoop-1.4.7
[root@master sqoop-1.4.7]# ls
bin ivy README.txt
build.xml ivy.xml sqoop-1.4.7.jar
CHANGELOG.txt lib sqoop-patch-review.py
COMPILING.txt LICENSE.txt sqoop-test-1.4.7.jar
conf NOTICE.txt src
docs pom-old.xml testdata
[root@master sqoop-1.4.7]# cd lib/
[root@master lib]# ls
ant-contrib-1.0b3.jar
ant-eclipse-1.0-jvm1.2.jar
avro-1.8.1.jar
avro-mapred-1.8.1-hadoop2.jar
commons-codec-1.4.jar
commons-compress-1.8.1.jar
commons-io-1.4.jar
commons-jexl-2.1.1.jar
commons-lang3-3.4.jar
commons-logging-1.1.1.jar
hsqldb-1.8.0.10.jar
jackson-annotations-2.3.1.jar
jackson-core-2.3.1.jar
jackson-core-asl-1.9.13.jar
jackson-databind-2.3.1.jar
jackson-mapper-asl-1.9.13.jar
kite-data-core-1.1.0.jar
kite-data-hive-1.1.0.jar
kite-data-mapreduce-1.1.0.jar
kite-hadoop-compatibility-1.1.0.jar
opencsv-2.3.jar
paranamer-2.7.jar
parquet-avro-1.6.0.jar
parquet-column-1.6.0.jar
parquet-common-1.6.0.jar
parquet-encoding-1.6.0.jar
parquet-format-2.2.0-rc1.jar
parquet-generator-1.6.0.jar
parquet-hadoop-1.6.0.jar
parquet-jackson-1.6.0.jar
slf4j-api-1.6.1.jar
snappy-java-1.1.1.6.jar
xz-1.5.jar
[root@master lib]# cp /usr/local/soft/hive-1.2.1/lib/mysql-connector-java-5.1.49.jar ./
[root@master lib]# sqoop list-databases -connect jdbc:mysql://master:3306?useSSL=false -username root -password 123456
22/04/18 16:42:22 INFO sqoop.Sqoop: Running Sqoop version: 1.4.7
22/04/18 16:42:22 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
22/04/18 16:42:22 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset.
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/soft/hadoop-2.7.6/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/soft/hbase-1.4.6/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
information_schema
SpringBootDemo16
db1
db2
db3
hive
mysql
performance_schema
sys
[root@master data]# ls
data_skew.txt score.sql testArray
deal_tb.txt scoreStruct testArray2.txt
dept.txt score.txt testLieToLine.txt
DIANXIN.csv students_dt.txt theZenOfPython.txt
DIANXIN.sql student.sql theZen.txt
emp.txt students.txt udtfData.txt
new_db.sql students_year_month.txt wordcount
new_score.txt subject.txt words.txt
scoreMap ${system:java.io.tmpdir}
[root@master data]# pwd
/usr/local/soft/data
mysql> show databases;
+--------------------+
| Database |
+--------------------+
| information_schema |
| SpringBootDemo16 |
| db1 |
| db2 |
| db3 |
| hive |
| mysql |
| performance_schema |
| sys |
+--------------------+
9 rows in set (0.00 sec)
mysql> create database student;
Query OK, 1 row affected (0.01 sec)
mysql> use student;
Database changed
mysql> source /usr/local/soft/data/student.sql;
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.01 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected, 1 warning (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.02 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.01 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 1000 rows affected (0.01 sec)
Records: 1000 Duplicates: 0 Warnings: 0
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected, 1 warning (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.01 sec)
Query OK, 0 rows affected (0.00 sec)
mysql> source /usr/local/soft/data/score.sql;
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected, 1 warning (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.01 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 6000 rows affected (0.03 sec)
Records: 6000 Duplicates: 0 Warnings: 0
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected, 1 warning (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
Query OK, 0 rows affected (0.00 sec)
mysql> show tables;
+-------------------+
| Tables_in_student |
+-------------------+
| score |
| student |
+-------------------+
2 rows in set (0.00 sec)
mysql> select count(*) from student;
+----------+
| count(*) |
+----------+
| 1000 |
+----------+
1 row in set (0.01 sec)
mysql> select * from student limit 10;
+------------+-----------+------+--------+--------------+
| id | name | age | gender | clazz |
+------------+-----------+------+--------+--------------+
| 1500100001 | 施笑槐 | 22 | 女 | 文科六班 |
| 1500100002 | 吕金鹏 | 24 | 男 | 文科六班 |
| 1500100003 | 单乐蕊 | 22 | 女 | 理科六班 |
| 1500100004 | 葛德曜 | 24 | 男 | 理科三班 |
| 1500100005 | 宣谷芹 | 22 | 女 | 理科五班 |
| 1500100006 | 边昂雄 | 21 | 男 | 理科二班 |
| 1500100007 | 尚孤风 | 23 | 女 | 文科六班 |
| 1500100008 | 符半双 | 22 | 女 | 理科六班 |
| 1500100009 | 沈德昌 | 21 | 男 | 理科一班 |
| 1500100010 | 羿彦昌 | 23 | 男 | 理科六班 |
+------------+-----------+------+--------+--------------+
10 rows in set (0.00 sec)
mysql> select * from score limit 10;
+------------+------------+-------+
| id | subject_id | score |
+------------+------------+-------+
| 1500100001 | 1000001 | 98 |
| 1500100001 | 1000002 | 5 |
| 1500100001 | 1000003 | 137 |
| 1500100001 | 1000004 | 29 |
| 1500100001 | 1000005 | 85 |
| 1500100001 | 1000006 | 52 |
| 1500100002 | 1000001 | 139 |
| 1500100002 | 1000002 | 102 |
| 1500100002 | 1000003 | 44 |
| 1500100002 | 1000004 | 18 |
+------------+------------+-------+
10 rows in set (0.01 sec)