import java.util.ArrayList;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;public classTxtToParquetDemo {public static voidmain(String[] args) {
SparkConf conf= new SparkConf().setAppName("TxtToParquet").setMaster("local");
SparkSession spark=SparkSession.builder().config(conf).getOrCreate();
reflectTransform(spark);//Java反射
dynamicTransform(spark);//动态转换
}/**
* 通过Java反射转换
* @param spark*/
private static voidreflectTransform(SparkSession spark)
{
JavaRDD source = spark.read().textFile("stuInfo.txt").javaRDD();
JavaRDD rowRDD = source.map(line ->{
String parts[]= line.split(",");
Student stu= newStudent();
stu.setSid(parts[0]);
stu.setSname(parts[1]);
stu.setSage(Integer.valueOf(parts[2]));returnstu;
});
Dataset df = spark.createDataFrame(rowRDD, Student.class);
df.select("sid", "sname", "sage").
coalesce(1).write().mode(SaveMode.Append).parquet("parquet.res");
}/**
* 动态转换
* @param spark*/
private static voiddynamicTransform(SparkSession spark)
{
JavaRDD source = spark.read().textFile("stuInfo.txt").javaRDD();
JavaRDD rowRDD = source.map( line ->{
String[] parts= line.split(",");
String sid= parts[0];
String sname= parts[1];int sage = Integer.parseInt(parts[2]);returnRowFactory.create(
sid,
sname,
sage
);
});
ArrayList fields = new ArrayList();
StructField field= null;
field= DataTypes.createStructField("sid", DataTypes.StringType, true);
fields.add(field);
field= DataTypes.createStructField("sname", DataTypes.StringType, true);
fields.add(field);
field= DataTypes.createStructField("sage", DataTypes.IntegerType, true);
fields.add(field);
StructType schema=DataTypes.createStructType(fields);
Dataset df =spark.createDataFrame(rowRDD, schema);
df.coalesce(1).write().mode(SaveMode.Append).parquet("parquet.res1");
}
}