/** *WordCount */ public class WordCountJavaTest { public static void main(String[] args) throws Exception{ final ExecutionEnvironment env=ExecutionEnvironment.getExecutionEnvironment(); env.setNumberOfExecutionRetries(3);//重试次数,启动容错 /** List list=new ArrayList(); String s="hello wold hello";//添加数据源 String[]words=s.split("\\W+");//格式化 for(String s1:words){ list.add(s1); } DataSet<Tuple2<String,Integer>> ds=env.fromCollection(list)//添加数据源 */ //DataSet<Tuple2<String,Integer>> ds=env.readTextFile("text") DataSet<Tuple2<String,Integer>> ds=env.fromElements("hello wold hello")//添加数据源 //************************************************************************************************* // Map MapPartition FlatMap filter .map((s)->s).returns(String.class) /** .map(new MapFunction<String,String>() { @Override public String map(String s) throws Exception { //可以在这里打开maysql,并写入 return s; } })*/ //区别map,写mysql时每个分区打开一个连接 .rebalance()//自动分区,防止数据倾斜,默认8个分区 stream按顺序分区 //.partitionByHash(0)//对第一个元素分区,可以指定字段 //.partitionByRange(0)//对第一个元素分区,可以指定字段 .mapPartition(new MapPartitionFunction<String, String>() { @Override public void mapPartition(Iterable<String> iterable, Collector<String> collector) throws Exception { Iterator<String> s=iterable.iterator(); while(s.hasNext()){ String out=s.next(); collector.collect(out); } } }) .distinct() .flatMap(new Linespite()) ;//格式化 FlatMapOperator SingleOutputStreamOperator //****************************************************************************************************** ds.print();//输出 DataSet out =ds.groupBy(0).sum(1);//计算 只支持Tuple2源数据类型 out.print();//输出 } private static class Linespite implements FlatMapFunction<String,Tuple2<String,Integer>>{ @Override public void flatMap(String o, Collector<Tuple2<String,Integer>> out) throws Exception { String[] s=o.toLowerCase().split("\\W+"); for(String t:s){ if(t.length()>0){ out.collect(new Tuple2(t,1)); } } } } }
/**
*join
*/
public class JoinTest { public static void main(String[] avrg)throws Exception{ final ExecutionEnvironment env=ExecutionEnvironment.getExecutionEnvironment(); List<Tuple2<String,String>> l1=new ArrayList<Tuple2<String,String>>(); l1.add(new Tuple2<String,String>("1","乔峰")); l1.add(new Tuple2<String,String>("2","段誉")); l1.add(new Tuple2<String,String>("3","慕容")); List<Tuple2<String,String>> l2=new ArrayList<Tuple2<String,String>>(); l2.add(new Tuple2<String,String>("1","丐帮")); l2.add(new Tuple2<String,String>("2","大理")); l2.add(new Tuple2<String,String>("3","燕")); DataSet nameds=env.fromCollection(l1); DataSet sourceds=env.fromCollection(l2); DataSet outds=nameds.join(sourceds).where(0).equalTo(0) .with(new JoinFunction<Tuple2<String,String>,Tuple2<String,String>,Tuple3<String,String,String>>() { @Override public Tuple3<String,String,String> join(Tuple2<String,String> o, Tuple2<String,String> o2) throws Exception { return new Tuple3(o.f0,o.f1,o2.f1); } }); outds.print(); } }