对spark进行操作
1获取spark环境 JavaSparkContext getSparkContext
创建dataframe
//获取spark上下文信息
JavaSparkContext sc = this.getSparkContext();
if (sc == null) {
throw new TransStepException("spark env no run");
}
SQLContext sqlContext = new SQLContext(sc);
StructType structType = DataTypes.createStructType(structFieldList);
DataFrame newDF = sqlContext.createDataFrame(rdd内容, structType);
通过dataframe创建rdd
DataRows dataRows = getRows();
DataFrame df = ((SparkDataRows) dataRows).getDataFrame();
JavaRDD<Row> javaRDD = df.javaRDD();
通过dataframe获取表头
StructType st = df.schema();
structType转化为dataframe
StructField[] structFields = st.fields();
创建表头
List<StructField> structFields = new ArrayList<StructField>();
structFields.add(String name,数据类型type, 是否允许为空));// 默认字符串类型
StructType structType = DataTypes.createStructType(structFieldList);