spark创建dataFrame方式有很多种
一:通过类反射机制
举两个例子
1.通过List<java bean>创建dataFrame
/**
* Applies a schema to a List of Java Beans.
*
* WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
* SELECT * queries will return the columns in an undefined order.
* @since 1.6.0
*/
def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = {
val attrSeq = getSchema(beanClass)
val rows = SQLContext.beansToRows(data.asScala.iterator, beanClass, attrSeq)
Dataset.ofRows(self, LocalRelation(attrSeq, rows.toSeq))
}
2. 通过JavaRDD<java bean>创建dataFrame
/**
* Applies a schema to an RDD of Java Beans.
*
* WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
* SELECT * queries will return the columns in an undefined order.
*
* @since 2.0.0
*/
def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
createDataFrame(rdd.rdd, beanClass)
}
方式1样例代码:
ArrayList<GciGri> list = new ArrayList<GciGri>();
GciGri g = new GciGri();
g.setGci((gci));
g.setGri((gri));
list.add(g);
spark.createDataFrame(list, GciGri.class).createOrReplaceTempView("testtesttest");
package cn.com.dtmobile.test;
import java.io.Serializable;
public class GciGri implements Serializable {
private static final long serialVersionUID = 1L;
private int Gci;
private int Gri;
public int getGci() {
return Gci;
}
public void setGci(int gci) {
Gci = gci;
}
public int getGri() {
return Gri;
}
public void setGri(int gri) {
Gri = gri;
}
}
方式2跟方式1差不多一样,就不贴demo了
二:编程指定schema
// Create an RDD JavaRDD<String> peopleRDD = spark.sparkContext() .textFile("examples/src/main/resources/people.txt", 1) .toJavaRDD(); // The schema is encoded in a string String schemaString = "name age"; // Generate the schema based on the string of schema List<StructField> fields = new ArrayList<>(); for (String fieldName : schemaString.split(" ")) { StructField field = DataTypes.createStructField(fieldName, DataTypes.StringType, true); fields.add(field); } StructType schema = DataTypes.createStructType(fields); // Convert records of the RDD (people) to Rows JavaRDD<Row> rowRDD = peopleRDD.map((Function<String, Row>) record -> { String[] attributes = record.split(","); return RowFactory.create(attributes[0], attributes[1].trim()); }); // Apply the schema to the RDD Dataset<Row> peopleDataFrame = spark.createDataFrame(rowRDD, schema); // Creates a temporary view using the DataFrame peopleDataFrame.createOrReplaceTempView("people"); // SQL can be run over a temporary view created using DataFrames Dataset<Row> results = spark.sql("SELECT name FROM people");
跟上面的基本一样,但是通过传入List<Row>生成
String schemaString = "AoD,EoD,angle,gain";
List<StructField> fields = new ArrayList<>();
for (String fieldName : schemaString.split(",")) {
StructField field = DataTypes.createStructField(fieldName, DataTypes.DoubleType, true);
fields.add(field);
}
StructType schema = DataTypes.createStructType(fields);
List<Row> Horizontal = new ArrayList<Row>();
Row row = RowFactory.create(60,24,180,1.3520);
Horizontal.add(row);
spark.createDataFrame(Horizontal, schema).write().mode(SaveMode.Overwrite).saveAsTable("beam_pattern");