1.首先用sqoop将mysql数据定时导入到hdfs中,然后用spark streaming实时读取hdfs的数据,并把数据写入elasticsearch中。代码如下
------bigdata.project.spark----------
package bigdata.project.spark
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.elasticsearch.spark.sql._
object sparkstreamingcopynew {
def main(args: Array[String]): Unit = {
val sparkconf = new SparkConf().setMaster("local[2]").setAppName("sparkstreamingcopynew")
sparkconf.set("es.nodes", "localhost")
sparkconf.set("es.port", "9200")
sparkconf.set("es.index.auto.create", "true")
sparkconf.set("spark.driver.allowMultipleContexts","true")
sparkconf.set("empty", "true")