数据同步 RDMBS to RDMBS
package com. sutpc. bigdata. sync
import java. util. Properties
import org. apache. log4j. {
Level, Logger}
import org. apache. spark. sql. SparkSession
object DataSync {
def main( args: Array[ String ] ) : Unit = {
val app = s"${this.getClass.getSimpleName}" . filter( ! _. equals( '$' ) )
Logger. getLogger( "org" ) . setLevel( Level. ERROR)
val spark: SparkSession = SparkSession. builder
. appName( app)
. master( "local[*]" )
. config( "spark.shuffle.consolidateFiles" , "true" )
. config( "spark.serializer" , "org.apache.spark.serializer.KryoSerializer" )
. config( "spark.streaming.kafka.maxRatePerPartition" , "500" )
. config( "spark.streaming.stopGracefullyOnShutdown" , "true" )
. config( "spark.network.timeout" , "600" )
. config( "spark.streaming.kafka.consumer.poll.ms" , "60000" )
. config( "spark.core.connection.ack.wait.timeout" , "900" )
. config( "spark.rpc.message.maxSize" , "50" )
. config( "spark.akka.timeout" , "900" )
. getOrCreate( )
val transpass_url = s"jdbc:postgresql://10.10.201.28:5432/postgres"
val transpass_prop = new Properties( )
transpass_prop. put( "user" , "postgres" )
transpass_prop. put( "password" , "123456" )
transpass_prop. put( "driver" , "org.postgresql.Driver" )
val key_url = s"jdbc:postgresql://10.10.201.50:54320/vehicle"
val key_prop = new Properties( )
key_prop. put( "user" , "postgres" )
key_prop. put( "password" , "123456" )
key_prop. put( "driver" , "org.postgresql.Driver" )
import spark. implicits. _
spark. read. jdbc( transpass_url, "public.huangpu_road" , transpass_prop) . write. jdbc( key_url, "public.huangpu_road" , key_prop)
}
}
数据同步 Hive to Hive
object SparkHive2Hive {
def main( args: Array[ String ] ) : Unit = {
val startTime = DateTime. now( )
val app = s"${this.getClass.getSimpleName}" . filter( ! _. equals( '$' ) )
Logger. getLogger( "org" ) . setLevel( Level. ERROR)
val spark = SparkSession
. builder( )
. master( "local[*]" )
. appName( app)
. config( "spark.sql.parquet.writeLegacyFormat" , true )
. getOrCreate( )
spark. sql( "set spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict" )
spark. read. parquet( "/import/transpaas_data/t_phone_home_distribute" )
. write
. mode( "append" )
. format( "Hive" )
. partitionBy( "source" , "city" , "year" , "month" , "day" )
. saveAsTable( "transpaas_tag.t_phone_home_distribute" )
spark. sql( "set spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict" )
spark. read. parquet( "/import/transpaas_data/t_phone_home_work_rela" )
. write
. mode( "append" )
. format( "Hive" )
. partitionBy( "source" , "year" , "month" , "day" )
. saveAsTable( "transpaas_tag.t_phone_home_work_rela" )
spark. sql( "set spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict" )
spark. read. parquet( "/import/transpaas_data/t_phone_home_work_rela" )
. write
. mode( "append" )
. format( "Hive" )
. partitionBy( "source" , "year" , "month" , "day" )
. saveAsTable( "transpaas_tag.t_phone_home_work_rela" )
spark. sql( "set spark.hadoop.hive.exec.dynamic.partition.mode=nonstrict" )
spark. read. parquet( "/import/transpaas_data/t_phone_work_distribute" )
. write
. mode