1.从数据库读数据创建DF
/**SQLComtext 创建 DataFrame 1**/
def createDataFrame(sqlCtx: SQLContext): Unit = {
val prop = new Properties()
prop.put("user","root")
prop.put("password","abc314")
prop.put("driver","com.mysql.jdbc.Driver")
val dataDF = sqlCtx.read.jdbc("jdbc:mysql://localhost:3306/test","sy_users",prop)
dataDF.show()
2. 通过SQLContext的createDataFrame方法构建DF
/**SQLComtext 创建 DataFrame 2**/
def createDtaFrame(sparkCtx:SparkContext,sqlCtx:SQLContext):Unit = {
val rowRDD = sparkCtx.textFile("D://TxtData/studentInfo.txt").map(_.split(",")).map(p => Row(p(0),p(1).toInt,p(2)))
val schema = StructType(
Seq(
StructField("name",StringType,true),
StructField("age",IntegerType,true),
StructField("studentNo",StringType,true)
)
)
val dataDF = sqlCtx.createDataFrame(rowRDD,schema)
//df注册到内存表
dataDF.registerTempTable("Student")
val result = sqlCtx.sql("select * from Student")
result.show()
// dataDF.select("name").show()
// dataDF.filter(dataDF("age") <14).show()
// dataDF.where("age <> ''").show()
}
3.通过隐式转换构建DF
/** SQLComtext 创建 DataFrame 3 **/
case class Person(str: String, i: Int, str1: String)
def createDF(sparkCtx:SparkContext,sqlCtx:SQLContext):Unit = {
import sqlCtx.implicits._
val dataDF = sparkCtx.textFile("D://TxtData/studentInfo.txt")
.map(_.split(",")).map(p => Person(p(0),p(1).toInt,p(2))).toDF()
dataDF.show()
}
推荐第三种方式,简洁明了!
4.DF入Hive库实例
result.where("userID <> '-' and newsID <> ''").registerTempTable("temp_newsTable")
hiveCtx.sql("INSERT OVERWRITE TABLE mmbigdata.ods_mm_news " +
"partition (year='"+year+"',month='"+month+"',day='"+day+"',type='view') select * from temp_newsTable")