import org.apache.hadoop.fs.Path
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
object MysqlToHdfs {
def main(args: Array[String]) {
val conf = new SparkConf().setMaster("spark://ip:7077").setAppName("test")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val df1 = sqlContext.read.format("jdbc").options(Map("url" ->"jdbc:mysql://192.0.0.1:3306/db1","driver"->"com.mysql.jdbc.Driver","dbtable"->"(select field1 from table1) as t1","user"->"xxx","password"->"xxx")).load()
val df2 = sqlContext.read.format("jdbc").options(Map("url" ->"jdbc:mysql://192.0.0.1:3306/db2","driver"->"com.mysql.jdbc.Driver","dbtable"->"(select b from t2 ) as t2","user"->"xxx","password"->"xxx")).load()
df1.registerTempTable("aa")
df2.registerTempTable("bb")
val resDF = sqlContext.sql("SELECT \n t1 join t2 ")
val hadoopConf = sc.hadoopConfiguration
val hdfs = org.apache.hadoop.fs.FileSystem.get(hadoopConf)
val path = new Path(args(0))
if(hdfs.exists(path)){
hdfs.delete(path,true)
}
resDF.rdd.map(r =>{
r.get(0)+"\001"+r.get(1)+"\001"+r.get(2)+"\001"+r.get(3)+"\001"+r.get(4)+"\001"+r.get(5)+"\001"+r.get(6)+"\001"+r.get(7)+"\001"+r.get(8)+"\001"+r.get(9)+"\001"+r.get(10)
}).repartition(3).saveAsTextFile(args(0))
//resDF.rdd.repartition(3).saveAsTextFile(args(0))
}
val jdbc = "url";
classOf[com.mysql.jdbc.Driver]
val conn = DriverManager.getConnection(jdbc)
//conn.prepareStatement("delete from t1").execute()
conn.prepareStatement("sql").execute()
val prep = conn.prepareStatement("INSERT INTO t1 " +
"VALUES (?,?,?,?,?,?,?,?,?,?,?)"
)
val arr = targetDataResult.map(row=>{
var r1 = row(0)
val line = r1+"\001"+r2+"\001"+r3+"\001"+r4+"\001"+r5+"\001"+r6+"\001"+r7+"\001"+r8+"\001"+r9+"\001"+r10+"\001"+r11
line
}).collect()
for( i
val arr1 =i.split("\001")
prep.setString(1, arr1(0))
prep.executeUpdate
}
}