SPARK2.0.2版本
1. 读取SPARK表
创建sparkSession,执行SQL,每行记录输出Object[],再对每行记录做转换,增加时间戳
public RDD<WindowedValue<Object[]>> getSourceRDD(PipelineOptions pipelineOptions, SparkContext sparkContext)
{
SparkSession sparkSession = SparkSession.builder().appName("XXX").master(sparkContext.master())
.enableHiveSupport().getOrCreate();
sparkSession.sql("use " + this.read.database); //数据库名,默认是default
Dataset rowset = sparkSession.sql(this.read.sql); //SQL 语句
RDD rows = rowset.rdd().map(new JavaMapRow2ObjectArray()
{
public Object[] call(Row row) {
Object[] colArr = new Object[row.size()];
for (int i = 0; i < row.size(); ++i) {
colArr[i] = row.get(i);
}
return colArr; }
}
, ScalaUtil.getClassTag([Ljava.lang.Object.class));
RDD windowrows = rows.map(new JavaMap2WindowValue()
{
public WindowedValue<Object[]> call(Object[] record) {
return WindowedValue.timestampedValueInGlobalWindow(record, BoundedWindow.TIMESTAMP_MIN_VALUE); }
}
, ClassTag..MODULE$.apply(WindowedValue.class));
return windowrows;
}
转换函数类,用于给每行记录加时间戳
public abstract class JavaMap2WindowValue<T> extends AbstractFunction1<T, WindowedValue<T>>
implements Serializable
{
public WindowedValue<T> apply(T record)
{
return call(record);
}
public abstract WindowedValue<T> call(T paramT);
}
工具类
public class ScalaUtil
{
public static <T> ClassTag<T> getClassTag(Class<T> clazz)
{
return ClassTag..MODULE$.apply(clazz);
}
public static <K, V> ClassTag<Tuple2<K, V>> getTuple2ClassTag()
{
return ((ClassTag)getClassTag(Tuple2.class));
}
public static <T> Seq<T> toScalaSeq(List<T> list) {
return JavaConversions.asScalaBuffer(list);
}
public static void main(String[] args) {
System.out.print(ClassTag..MODULE$.apply(Tuple2.class)); }
public static <T> Manifest<T> getManifest(Class<T> clazz) {
return ManifestFactory.classType(clazz);
}
}