仅供自己参考,其他人可能看不懂。
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.rdd.RandomRDD;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.*;
import org.apache.spark.sql.hive.HiveContext;
import org.apache.spark.sql.types.IntegerType;
import org.apache.spark.sql.types.StructType;
import org.bouncycastle.asn1.dvcs.Data;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Created by hadoop on 15-12-11.
*/
public class ceshi {
public static void main(String[] args){
SparkConf conf=new SparkConf().setAppName("alongloc6").setMaster("spark://192.168.3.31:7077");
JavaSparkContext sc=new JavaSparkContext(conf);
JavaRDD<String> distFile=sc.textFile("hdfs://192.168.3.31:9000/ceshi/wai.csv");
//System.out.println("along3:"+distFile.first());
List<Integer> listint= Arrays.asList(1,2,3,4,5);
JavaRDD<Integer> distData=sc.parallelize(listint);
//int zong=distFile.map(s->s.length()).reduce((a,b)->a+b);
//int zong= distData.reduce((a, b)->a+b);
//listint.forEach(n-> {if(n>3) System.out.println(n.toString()); });
//System.out.println(distFile.filter(s -> s.length()>20).count());
/* System.out.println( distFile.filter(
new Function<String, Boolean>() {
public Boolean call(String s){
return s.contains("石膏线");
}
}
).count() );*/
//distFile.foreach(n->System.out.println("ah"));
// long z=distData.filter(a-> a>3).count();
// long z2=distData.reduce((a,b)->a+b);
//JavaRDD<Integer> ge=distFile.map(p->p.length());
//System.out.println(distFile.count() +" ge : "+ge.first());
SQLContext sqlcont=new SQLContext(sc);
//DataFrame df=sqlcont.read().json("hdfs://192.168.3.31:9000/ceshi/jia2.json");
//df.show();
System.out.println("aaaaaaaaaaaaaaaaaaaaaaaa");
//df.select("dw","xm").show();
//df.filter(df.col("age").gt(21)).show();
//df.orderBy("age").show();
//df.write().parquet("hdfs://192.168.3.31:9000/ceshi/xmage2.parquet");
//DataFrame df2=sqlcont.read().parquet("hdfs://192.168.3.31:9000/ceshi/xmage2.parquet");
//df2.where("age>11").show();
/* HiveContext hivecon=new HiveContext(sc.sc());
hivecon.sql("create table mytab(key INT,xm STRING)");
hivecon.sql("insert into mytab values(11,'jingjing')");
hivecon.sql("from mytab select key,xm").show(); */
/*HiveContext hivecon=new HiveContext(sc.sc());
DataFrame df3=hivecon.read().json("hdfs://192.168.3.31:9000/ceshi/jia2.json");
//df3.write().orc("hdfs://192.168.3.31:9000/ceshi/xmage2.orc");
//hivecon.read().orc("hdfs://192.168.3.31:9000/ceshi/xmage2.orc").show();
DataFrame df4=hivecon.read().orc("hdfs://192.168.3.31:9000/ceshi/xmage2.orc");
df4.save("hdfs://192.168.3.31:9000/ceshi/xmage2.orc","orc", SaveMode.Append);
hivecon.read().orc("hdfs://192.168.3.31:9000/ceshi/xmage2.orc").show();*/
//System.out.println(df4.filter(df4.col("age").gt(12)).first().toString());
/* 很好的数据导入接口,用户可以json方式送数据
List<String> jsonData=Arrays.asList(
"[{\"dw\":\"公安厅\",\"age\":40,\"xm\":\"贵\"},{\"dw\":\"学校\",\"age\":20,\"xm\":\"小雪\"}]"
);
JavaRDD<String> rdd2=sc.parallelize(jsonData);
DataFrame df6 =sqlcont.read().json(rdd2);
df6.write().mode(SaveMode.Append).parquet("hdfs://192.168.3.31:9000/ceshi/xmage2.parquet");
sqlcont.read().parquet("hdfs://192.168.3.31:9000/ceshi/xmage2.parquet").show();
*/
DataFrame df7=sqlcont.read().parquet("hdfs://192.168.3.31:9000/ceshi/xmage2.parquet");
df7.filter(df7.col("age").gt(18)).show();
df7.registerTempTable("mytab");
sqlcont.sql("select xm,age,dw from mytab where age>19 and age<40").show();
//long md=distFile.map(s->s.length()).reduce((a,b)->{if(a>b) return a; else return b;});
//System.out.println(md);
}
}