scala> val rdd =sc.parallelize(List((1,"aa"),(2,"bb"),(4,"cc")))
rdd: org.apache.spark.rdd.RDD[(Int, String)]= ParallelCollectionRDD[7] at parallelize at <console>:24
scala> rdd.saveAsSequenceFile("./seq")
scala> val rdd =sc.parallelize(List((1,"aa"),(2,"bb"),(4,"cc")))
rdd: org.apache.spark.rdd.RDD[(Int, String)]= ParallelCollectionRDD[11] at parallelize at <console>:24
scala> rdd.saveAsObjectFile("./obj")
object ReadHadoopFile {
def main(args: Array[String]){
val sparkConf =newSparkConf().setMaster("local[2]").setAppName("HadoopFileApp")
val sc =newSparkContext(sparkConf)
val input = sc.newAPIHadoopFile[LongWritable,
Text,
TextInputFormat]("/output/part*",
classOf[TextInputFormat],
classOf[LongWritable],
classOf[Text])println("有多少条数据:"+ input.count)
input.foreach(print(_))
input.first
sc.stop()}}
保存到hadoop
object WriteHadoopFile {
def main(args: Array[String]){
val sparkConf =newSparkConf().setMaster("local[2]").setAppName("HadoopFileApp")
val sc =newSparkContext(sparkConf)
val initialRDD = sc.parallelize(Array(("hadoop",30),("hive",71),("cat",11)))
initialRDD.saveAsHadoopFile("/output/",
classOf[Text],
classOf[LongWritable],
classOf[TextOutputFormat[Text, LongWritable]])
sc.stop()}}
MySQL的输入输出
从MySQL读取数据
def main (args: Array[String]){
val sparkConf =newSparkConf().setMaster ("local[2]").setAppName
("HBaseApp")
val sc =newSparkContext(sparkConf)
val rdd =newJdbcRDD(
sc,()=>{
Class.forName ("com.mysql.jdbc.Driver").newInstance () DriverManager.getConnection
("jdbc:mysql://linux01:3306/company","root","123456")},"select * from staff where id >= ? and id <= ?;",1,100,1,
r =>(r.getString (1), r.getString (2), r.getString (3))).cache ()
println (rdd.count ()) rdd.foreach (println (_))
sc.stop ()}
查看MySQL中数据
def main(args: Array[String]){
val sparkConf =newSparkConf().setMaster("local[2]").setAppName("HBaseApp")
val sc =newSparkContext(sparkConf)
val data = sc.parallelize(List(("Irelia","Female"),("Ezreal","Male"),("Alistar","Female")))
data.foreachPartition(insertData)}
def insertData(iterator: Iterator[(String, String)]): Unit ={
val conn = DriverManager.getConnection("jdbc:mysql://linux01:3306/company","root","123456")
iterator.foreach(data =>{
val ps = conn.prepareStatement("insert into staff(name, sex) values
(?,?)")
ps.setString(1, data._1)
ps.setString(2, data._2)
ps.executeUpdate()})}