我们在intellij Idea中新建scala工程,
具体代码
import java.sql.{DriverManager, PreparedStatement, Connection}
import org.apache.spark.{SparkContext, SparkConf}
/**
* Created by xubowen on 2017/4/9.
*/
object RDDtoMysql {
def Insert(iterator: Iterator[(String, Int)]): Unit = {
var conn: Connection = null
var ps: PreparedStatement = null
val database = "sparks"
val user = "root"
val password = "xbw"
val conn_str = "jdbc:mysql://localhost:3306/"+database+"?user="+user+"&password="+password
val sql = "INSERT INTO spark (username,number) VALUES (?, ?)"
try {
conn = DriverManager.getConnection(conn_str)
iterator.foreach(data => {
ps = conn.prepareStatement(sql)
ps.setString(1, data._1)
ps.setInt(2, data._2)
ps.executeUpdate()
})
} catch {
case e: Exception => println("Mysql Exception")
} finally {
if (ps != null) {
ps.close()
}
if (conn != null) {
conn.close()
}
}
}
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("RDDToMysql").setMaster("local")
val sc = new SparkContext(conf)
//val lines= sc.textFile("file:///Users/xubowen/Desktop/spark.txt", 1)
val lines= sc.textFile(args(0), 1)
val words = lines.flatMap { line => line.split(" ") } //words同样是RDD类型
val pairs = words.map { word => (word,1) }
val wordCounts = pairs.reduceByKey(_+_) //对相同的key,进行value的累加(包括Local和Reducer级别同时Reduce)
wordCounts.foreachPartition(Insert)
}
}
打包为jar文件,file->project structure->artifacts->+ jar->name->output layout->+ module output->工程->ok->build->build artifacts->build 在目录中找到jar上传到服务器。
新建hdfs文件夹
hadoop fs -mkdir /input
放文件到hdfs
hadoop fs -put /root/file/spark.txt /input
spark-submit --master spark://master:10086 --name RDDToMysql --class RDDtoMysql --executor-memory 1G --total-executor-cores 2 --jars /home/examples/mysql.jar /home/examples/RDDtoMysql.jar hdfs://master:10086/input/spark.txt
我们可以在数据库看到结果了。
数据庞大,我只拉取了部分。