实现mysql与elasticsearch的数据同步
JDBC importer for Elasticsearch
The Java Database Connection (JDBC) importer allows to fetch data from JDBC sources for indexing into Elasticsearch.
The JDBC importer was designed for tabular data. If you have tables with many joins, the JDBC importer is limited in the way to reconstruct deeply nested objects to JSON and process object semantics like object identity. Though it would be possible to extend the JDBC importer with a mapping feature where all the object properties could be specified, the current solution is focused on rather simple tabular data streams.
一、安装 elasticsearch-jdbc-2.3.2.0-dist
[root@hadoop0 bigdata]# ls
apache-flume-1.6.0-bin apache-tomcat-7.0.69.zip hbase-1.1.5 jstorm-0.9.6.2 solr-5.5.2 sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz taokeeper-monitor.tar.gz
apache-flume-1.6.0-bin.tar.gz
elasticsearch-jdbc-2.3.2.0-dist.zip hbase-1.1.5-bin.tar.gz jstorm-0.9.6.2.zip solr-5.5.2.zip stomr096 tomcat7 apache-hive-2.0.1-bin.tar.gz hadoop272 hive2.0 kafka sqoop-1.4.6 stormtest-0.0.1-SNAPSHOT.jar zookeeper
[root@hadoop0 bigdata]# unzip elasticsearch-jdbc-2.3.2.0-dist.zip
[root@hadoop0 bigdata]# ls
apache-flume-1.6.0-bin apache-tomcat-7.0.69.zip hadoop272 hive2.0 kafka sqoop-1.4.6 stormtest-0.0.1-SNAPSHOT.jar zookeeper
apache-flume-1.6.0-bin.tar.gz elasticsearch-jdbc-2.3.2.0 hbase-1.1.5 jstorm-0.9.6.2 solr-5.5.2 sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz
taokeeper-monitor.tar.gz
apache-hive-2.0.1-bin.tar.gz elasticsearch-jdbc-2.3.2.0-dist.zip
hbase-1.1.5-bin.tar.gz jstorm-0.9.6.2.zip solr-5.5.2.zip stomr096 tomcat7
[root@hadoop0 bigdata]# cd elasticsearch-jdbc-2.3.2.0
[root@hadoop0 elasticsearch-jdbc-2.3.2.0]# ls
bin lib
[root@hadoop0 elasticsearch-jdbc-2.3.2.0]# cd bin/
二、查看示例
[root@hadoop0 bin]# ls
geo.dump mysql-delete-document.sh mysql-geo-shapes.sh mysql-metawiki.sh mysql-schedule.sh mysql-state-example.sh oracle-connection-properties.sh postgresql-simple-example.sh
log4j2.xml mysql-geo-points.sh mysql-ignore-null-values.sh mysql-schedule-acknowledge.sh mysql-simple-example.bat mysql-wikimedia-example.sh postgresql-geo.sh
[root@hadoop0 bin]# cat mysql-schedule.sh
#!/bin/sh
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
bin=${DIR}/../bin
lib=${DIR}/../lib
echo '
{
"type" : "jdbc",
"jdbc" : {
"metrics" : {
"lastexecutionstart" : "2015-05-10T10:58:00.038Z",
"lastexecutionend" : "2015-05-10T10:58:00.044Z",
"counter" : 1234
},
"schedule" : "0 0-59 0-23 ? * *",
"url" : "jdbc:mysql://localhost:3306/test",
"user" : "",
"password" : "",
"sql" : "select *, id as _id, \"myjdbc\" as _index, \"mytype\" as _type from test",
"index" : "myjdbc",
"type" : "mytype",
"index_settings" : {
"index" : {
"number_of_shards" : 1
}
}
}
}
' | java \
-cp "${lib}/*" \
-Dlog4j.configurationFile=${bin}/log4j2.xml \
org.xbib.tools.Runner \
org.xbib.tools.JDBCImporter
[root@hadoop0 bin]# cat mysql-delete-document.sh
#!/bin/sh
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
bin=${DIR}/../bin
lib=${DIR}/../lib
echo '{
"type" : "jdbc",
"jdbc" : {
"url" : "jdbc:mysql://localhost:3306/test",
"user" : "",
"password" : "",
"sql" : "select deletethisdoc as _id, delete as _optype from orders"
}
}
' | java \
-cp "${lib}/*" \
-Dlog4j.configurationFile=${bin}/log4j2.xml \
org.xbib.tools.Runner \
org.xbib.tools.JDBCImporter
[root@hadoop0 bin]# cd ../
[root@hadoop0 elasticsearch-jdbc-2.3.2.0]# pwd
/opt/bigdata/elasticsearch-jdbc-2.3.2.0
三、编写导入例子
[root@hadoop0 elasticsearch-jdbc-2.3.2.0]# vi import.sh
#!/bin/sh
bin=/opt/bigdata/elasticsearch-jdbc-2.3.2.0/bin
lib=/opt/bigdata/elasticsearch-jdbc-2.3.2.0/lib
echo '{
"type" : "jdbc",
"jdbc": {
"elasticsearch.autodiscover":true,
"elasticsearch.cluster":"ffcs-test",
"url":"jdbc:mysql://localhost:3306/test",
"user":"root",
"password":"123456",
"sql":"select * from ffcs",
"index" : "test",
"type" : "ffcs"
}
}' | java \
-cp "${lib}/*" \
-Dlog4j.configurationFile=${bin}/log4j2.xml \
org.xbib.tools.Runner \
org.xbib.tools.JDBCImporter
四、测试解决JDK版本问题
[root@hadoop0 elasticsearch-jdbc-2.3.2.0]# sh import.sh
Exception in thread "main" java.lang.UnsupportedClassVersionError: org/xbib/tools/Runner : Unsupported major.minor version 52.0
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:800)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:449)
at java.net.URLClassLoader.access$100(URLClassLoader.java:71)
at java.net.URLClassLoader$1.run(URLClassLoader.java:361)
at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
at sun.launcher.LauncherHelper.checkAndLoadMain(LauncherHelper.java:482)
[root@hadoop0 elasticsearch-jdbc-2.3.2.0]#
[root@hadoop0 bigdata]# cd jdk
jdk1.8.0_131/ jdk-8u131-linux-i586.tar.gz
[root@hadoop0 bigdata]# cd jdk1.8.0_131/
[root@hadoop0 jdk1.8.0_131]# ls
bin COPYRIGHT db include javafx-src.zip jre lib LICENSE man README.html release src.zip THIRDPARTYLICENSEREADME-JAVAFX.txt THIRDPARTYLICENSEREADME.txt
[root@hadoop0 jdk1.8.0_131]# cd bin/
[root@hadoop0 bin]# ls
appletviewer idlj java javafxpackager javapackager jcmd jdb jinfo jmc jrunscript jstat keytool pack200 rmid serialver unpack200 xjc
ControlPanel jar javac javah java-rmi.cgi jconsole jdeps jjs jmc.ini jsadebugd jstatd native2ascii policytool rmiregistry servertool wsgen
extcheck jarsigner javadoc javap javaws jcontrol jhat jmap jps jstack jvisualvm orbd rmic schemagen tnameserv wsimport
[root@hadoop0 bin]# pwd
/opt/bigdata/jdk1.8.0_131/bin
[root@hadoop0 bin]# wheris java
-bash: wheris: command not found
[root@hadoop0 bin]# whereis java
java: /usr/bin/java /etc/java /usr/lib/java /usr/share/java /opt/jdk1.7.0_79/bin/java /usr/share/man/man1/java.1.gz
[root@hadoop0 bin]# ls -l /usr/bin/java
lrwxrwxrwx. 1 root root 22 Mar 3 2016 /usr/bin/java -> /etc/alternatives/java
[root@hadoop0 bin]# rm -rf /usr/bin/java
[root@hadoop0 bin]# ln -l /opt/bigdata/jdk1.8.0_131/bin
ln: invalid option -- 'l'
Try `ln --help' for more information.
[root@hadoop0 bin]#
[root@hadoop0 bin]# ln -l /opt/bigdata/jdk1.8.0_131/bin/java /usr/bin/java
ln: invalid option -- 'l'
Try `ln --help' for more information.
[root@hadoop0 bin]# ln -s /opt/bigdata/jdk1.8.0_131/bin/java /usr/bin/java
[root@hadoop0 bin]# java -version
java version "1.8.0_131"
Java(TM) SE Runtime Environment (build 1.8.0_131-b11)
Java HotSpot(TM) Client VM (build 25.131-b11, mixed mode)
[root@hadoop0 bin]# pwd
/opt/bigdata/jdk1.8.0_131/bin
[root@hadoop0 bin]# cd ../..//
[root@hadoop0 bigdata]# ls
apache-flume-1.6.0-bin apache-tomcat-7.0.69.zip hadoop272 hive2.0 jstorm-0.9.6.2 solr-5.5.2 sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz taokeeper-monitor.tar.gz
apache-flume-1.6.0-bin.tar.gz elasticsearch-jdbc-2.3.2.0 hbase-1.1.5 jdk1.8.0_131 jstorm-0.9.6.2.zip solr-5.5.2.zip stomr096 tomcat7
apache-hive-2.0.1-bin.tar.gz elasticsearch-jdbc-2.3.2.0-dist.zip hbase-1.1.5-bin.tar.gz jdk-8u131-linux-i586.tar.gz kafka sqoop-1.4.6 stormtest-0.0.1-SNAPSHOT.jar zookeeper
[root@hadoop0 bigdata]# cd elasticsearch-jdbc-2.3.2.0/
[root@hadoop0 elasticsearch-jdbc-2.3.2.0]# ls
bin import.sh lib
五、测试OK
[root@hadoop0 elasticsearch-jdbc-2.3.2.0]# vi import.sh
#!/bin/sh
bin=/opt/bigdata/elasticsearch-jdbc-2.3.2.0/bin
lib=/opt/bigdata/elasticsearch-jdbc-2.3.2.0/lib
echo '{
"type" : "jdbc",
"jdbc": {
"elasticsearch.autodiscover":true,
"elasticsearch.cluster":"my-application",
"url":"jdbc:mysql://192.168.1.102:3306/test",
"user":"root",
"password":"root",
"sql":"select * from people",
"elasticsearch" : {
"host" : "192.168.1.111",
"port" : 9300
},
"index" : "index_users3",
"type" : "ffcs"
}
}' | java \
-cp "${lib}/*" \
-Dlog4j.configurationFile=${bin}/log4j2.xml \
org.xbib.tools.Runner \
org.xbib.tools.JDBCImporter
[root@hadoop0 elasticsearch-jdbc-2.3.2.0]# sh import.sh
[root@hadoop0 elasticsearch-jdbc-2.3.2.0]#