文章目录
实现一个具有Stream的链式、惰性特点的容器
- 完整项目代码已上传GitHub,地址:https://github.com/AlionSSS/lazy-chain-collection
1. Stream 示例
- 示例
Stream.of(1,2,3,4) .map(i -> i * i) .filter(i2 -> i2 >= 4) .forEach(r -> System.out.println("r = " + r));
2. ChainCollection 链式-非惰性
2.1 定义function接口(你也可以用Java自带的)
- MyConsumer
public interface MyConsumer<In> { void accept(In element); }
- MyFunction
public interface MyFunction<In, Out> { Out apply(In element); }
- MyPredicate
public interface MyPredicate<In> { boolean test(In element); }
- MySupplier
public interface MySupplier<Out> { Out get(); }
2.2 容器抽象接口
import com.skey.chainprogrammingdemo.func.MyConsumer;
import com.skey.chainprogrammingdemo.func.MyFunction;
import com.skey.chainprogrammingdemo.func.MyPredicate;
import java.util.Comparator;
import java.util.List;
public interface Chain<In> {
/**
* map操作
* @param function Map函数 [one to one]
* @param <Out> 返回的类型
* @return Chain<Out>
*/
<Out> Chain<Out> map(MyFunction<In, Out> function);
/**
* flatMap操作
* @param function FlatMap函数 [one to multi]
* @param <Out> 返回的类型
* @return Chain<Out>
*/
<Out> Chain<Out> flatMap(MyFunction<In, Iterable<Out>> function);
/**
* filter过滤操作
* @param predicate Predicate函数 [one to one/zero]
* @return Chain<In>
*/
Chain<In> filter(MyPredicate<In> predicate);
/**
* sort排序操作
* @param comparator 比较器 [multi to sorted multi]
* @return Chain<In>
*/
Chain<In> sort(Comparator<In> comparator);
/**
* distinct去重操作
* @return Chain<In>
*/
Chain<In> distinct();
/**
* 处理操作,无返回
* @param consumer Consumer函数
*/
void foreach(MyConsumer<In> consumer);
/**
* 获取处理结果
* @return List<In>
*/
List<In> collect();
}
2.3 实现容器
import com.skey.chainprogrammingdemo.func.MyConsumer;
import com.skey.chainprogrammingdemo.func.MyFunction;
import com.skey.chainprogrammingdemo.func.MyPredicate;
import java.util.*;
public class ChainCollection<In> implements Chain<In> {
private ArrayList<In> list;
private ChainCollection() {
list = new ArrayList<>();
}
@SafeVarargs
public static <T> Chain<T> valueOf(T... elements) {
ChainCollection<T> collection = new ChainCollection<>();
for (T element : elements) {
collection.add(element);
}
return collection;
}
private void add(In element) {
list.add(element);
}
@Override
public <Out> Chain<Out> map(MyFunction<In, Out> function) {
ChainCollection<Out> collection = new ChainCollection<>();
for (In a : list) {
collection.add(function.apply(a));
}
return collection;
}
@Override
public <Out> Chain<Out> flatMap(MyFunction<In, Iterable<Out>> function) {
ChainCollection<Out> collection = new ChainCollection<>();
for (In a : list) {
for (Out b : function.apply(a)) {
collection.add(b);
}
}
return collection;
}
@Override
public Chain<In> filter(MyPredicate<In> predicate) {
ChainCollection<In> collection = new ChainCollection<>();
for (In a : list) {
if (predicate.test(a)) {
collection.add(a);
}
}
return collection;
}
@Override
public void foreach(MyConsumer<In> consumer) {
for (In a : list) {
consumer.accept(a);
}
}
@Override
public Chain<In> sort(Comparator<In> comparator) {
// 自己实现以一个排序算法
// 这里为了简便,直接调Java自带的排序算法
ChainCollection<In> collection = new ChainCollection<>();
for (In a : list) {
collection.add(a);
}
collection.list.sort(comparator);
return collection;
}
@Override
public Chain<In> distinct() {
ChainCollection<In> collection = new ChainCollection<>();
HashSet<In> set = new HashSet<>(list);
for (In a : set) {
collection.add(a);
}
return collection;
}
@Override
public List<In> collect() {
return list;
}
}
2.4 Test
ChainCollection.valueOf("5", "3", "16", "5", "2", "8")
.map(Integer::valueOf)
.map(i -> i * i)
.filter(i2 -> i2 > 10)
.sort((o1, o2) -> o2 - o1)
.distinct()
.foreach(System.out::println);
3. LazyChainCollection 链式-惰性
3.1 惰性节点
- Node
public class Node<Out> { public Node<?> pre; public Node<?> next; @Override public String toString() { return getClass().getSimpleName(); } }
- FunctionNode
import com.skey.chainprogrammingdemo.func.MyFunction; public class FunctionNode<In, Out> extends Node<Out> { public MyFunction<In, Out> function; public FunctionNode(MyFunction<In, Out> function) { this.function = function; } }
- FlatFunctionNode
import com.skey.chainprogrammingdemo.func.MyFunction; public class FlatFunctionNode<In, Out> extends Node<Out> { public MyFunction<In, Iterable<Out>> flatFunction; public FlatFunctionNode(MyFunction<In, Iterable<Out>> flatFunction) { this.flatFunction = flatFunction; } }
- PredicateNode
import com.skey.chainprogrammingdemo.func.MyPredicate; public class PredicateNode<In> extends Node<In> { public MyPredicate<In> predicate; public PredicateNode(MyPredicate<In> predicate) { this.predicate = predicate; } }
- SortNode
import java.util.Comparator; public class SortNode<In> extends Node<In> { public Comparator<In> comparator; public SortNode(Comparator<In> comparator) { this.comparator = comparator; } }
- ConsumerNode
import com.skey.chainprogrammingdemo.func.MyConsumer; public class ConsumerNode<In> extends Node<In> { public MyConsumer<In> consumer; public ConsumerNode(MyConsumer<In> consumer) { this.consumer = consumer; } }
- DistinctNode
public class DistinctNode<In> extends Node<In> { public DistinctNode() { } }
3.2 Stage
- Stage
import com.skey.chainprogrammingdemo.node.Node; public class Stage<T> { public Node<T> node; public Stage(Node<T> node) { this.node = node; } @Override public String toString() { return this.getClass().getSimpleName() + "[" + getNodeString(new StringBuilder(), node) + "]"; } private String getNodeString(StringBuilder builder, Node<?> node) { if (node == null) { return builder.substring(0, builder.length() - 1); } else { return getNodeString(builder.append(node).append(","), node.next); } } }
- ShuffleMapStage
import com.skey.chainprogrammingdemo.node.Node; public class ShuffleMapStage<T> extends Stage<T> { public ShuffleMapStage(Node<T> node) { super(node); } }
- ResultStage
import com.skey.chainprogrammingdemo.node.Node; public class ResultStage<T> extends Stage<T> { public ResultStage(Node<T> node) { super(node); } }
3.3 实现容器
-
运行流程
- 先使用LazyChainCollection连接各个Node(Node代表一次函数操作)
[Node/Collection链接示意图] +-----------------------------------------+ | collection1 | collection2 | collection3 | | ↓ | ↓ | ↓ | | node1 -> <- node2 -> <- node3 | | ↓ | ↓ | ↓ | | func1 | func2 | func3 | +-----------------------------------------+
- StageScheduler将Node链表按Shuffle切分为多个Stage
[Stage划分示意图] +-------------------------------------------------------------------------+ | ShuffleMapStage1 | ShuffleMapStage2 | ResultStage3 | |-------------------------------------------------------------------------| | node1 -> node2 -> null | node3 -> null | node4 -> node5 -> ... | +-------------------------------------------------------------------------+
- StageScheduler按顺序运行各个Stage
[Stage运行与数据流转] +--------------------------+ | Stage1 | Stage2 | Stage3 | |--------------------------| | data1 => data2 => data3 | +--------------------------+
-
LazyChainCollection 代码
/** * Description: 核心容器 惰性 * * @author ALion */ public class LazyChainCollection<In> implements Chain<In> { Collection<Object> data; Node<?> currentNode; private LazyChainCollection(Collection<Object> data, Node<?> currentNode) { this.data = data; this.currentNode = currentNode; } @SafeVarargs public static <T> Chain<T> valueOf(T... elements) { return new LazyChainCollection<>( new ArrayList<>(Arrays.asList(elements)), new Node<>() ); } @Override public <Out> Chain<Out> map(MyFunction<In, Out> function) { return link(new FunctionNode<>(function)); } @Override public <Out> Chain<Out> flatMap(MyFunction<In, Iterable<Out>> function) { return link(new FlatFunctionNode<>(function)); } @Override public Chain<In> filter(MyPredicate<In> predicate) { return link(new PredicateNode<>(predicate)); } @Override public Chain<In> sort(Comparator<In> comparator) { return link(new SortNode<>(comparator)); } @Override public Chain<In> distinct() { return link(new DistinctNode<>()); } @Override public void foreach(MyConsumer<In> consumer) { LazyChainCollection<In> collection = (LazyChainCollection<In>) link(new ConsumerNode<>(consumer)); new StageScheduler<>(collection).schedule(); } @Override public List<In> collect() { List<In> result = new ArrayList<>(); LazyChainCollection<In> collection = (LazyChainCollection<In>) link(new ConsumerNode<In>(result::add)); new StageScheduler<>(collection).schedule(); return result; } /** * 创建新的Collection,并与当前的node连接 * <pre> * [Node/Collection链接示意图] * +-----------------------------------------+ * | collection1 | collection2 | collection3 | * | ↓ | ↓ | ↓ | * | node1 -> <- node2 -> <- node3 | * | ↓ | ↓ | ↓ | * | func1 | func2 | func3 | * +-----------------------------------------+ * </pre> * * @param node 新的Node * @param <X> Node最终类型 * @return Chain */ private <X> Chain<X> link(Node<X> node) { LazyChainCollection<X> collection = new LazyChainCollection<>( data, node ); this.currentNode.next = collection.currentNode; collection.currentNode.pre = this.currentNode; return collection; } }
-
StageScheduler 代码
/** * Description: Stage调度器 * * @author ALion */ public class StageScheduler<T> { private LazyChainCollection<T> collection; public StageScheduler(LazyChainCollection<T> collection) { this.collection = collection; } /** * 开始调度 * <pre> * [Stage运行与数据流转] * +--------------------------+ * | Stage1 | Stage2 | Stage3 | * |--------------------------| * | data1 => data2 => data3 | * +--------------------------+ * </pre> */ public void schedule() { System.out.println("---> 开始执行任务!解析Stage..."); List<Stage<?>> stages = parseStage(collection.currentNode); System.out.println("---> Stage 解析完毕!stages = " + stages); System.out.println("---> 提交Stage ===>"); // 数据存储点 Collection<Object> data = collection.data; for (Stage<?> stage : stages) { System.out.println("---> Start Stage[" + stage + "]"); if (stage instanceof ShuffleMapStage) { // 处理 ShuffleMapStage handleStageToStage(data, (ShuffleMapStage<?>) stage); } else if (stage instanceof ResultStage) { // 处理 ResultStage handleStage(data, stage); } else { throw new IllegalArgumentException( "No such type Stage[" + stage + "]! " + "Stage must be in [ShuffleMapStage,ResultStage]!" ); } } } /** * 为Node链划分Stage * <pre> * [Stage划分示意图] * +-------------------------------------------------------------------------+ * | ShuffleMapStage1 | ShuffleMapStage2 | ResultStage3 | * |-------------------------------------------------------------------------| * | node1 -> node2 -> null | node3 -> null | node4 -> node5 -> ... | * +-------------------------------------------------------------------------+ * </pre> * * @return Stage列表 */ private List<Stage<?>> parseStage(Node<?> node) { List<Stage<?>> stages = new ArrayList<>(); // 找到第一个Node,构建第一个Stage Node<?> current = findFirstNode(node).next; // 不要起始node,因为LazyChainCollection创建时,该node是空的 stages.add(new ShuffleMapStage<>(current)); // 添加第一个Node到stages中 while (current != null) { Node<?> next = current.next; if (current instanceof SortNode || current instanceof DistinctNode) { // 切割Stage current.next = null; // 当前Node向后指向null next.pre = null; // 后一个Node向前指向null // 新起一个Stage stages.add(new ShuffleMapStage<>(next)); } else if (current instanceof ConsumerNode) { // 如果找到ConsumerNode,更新最后的Stage为ResultStage Node<?> stageFirstNode = findFirstNode(current); stages.set(stages.size() - 1, new ResultStage<>(stageFirstNode)); } // 移动指针到下一个节点 current = next; } return stages; } /** * 向前查找第一个Node * * @param node 任意节点 * @return 第一个Node */ private Node<?> findFirstNode(Node<?> node) { if (node.pre == null) { return node; } else { return findFirstNode(node.pre); } } /** * 向后查找最后一个Node * * @param node 任意节点 * @return 最后一个Node */ private Node<?> findLastNode(Node<?> node) { if (node.next == null) { return node; } else { return findLastNode(node.next); } } /** * 处理ShuffleMapStage到下一个Stage的操作 * * @param data 数据容器 * @param stage 当前Stage */ private <In> void handleStageToStage(Collection<In> data, ShuffleMapStage<?> stage) { Node<?> lastNode = findLastNode(stage.node); if (lastNode instanceof SortNode) { SortNode<In> sortNode = (SortNode<In>) lastNode; // TreeSet代表了接下来的 排序Shuffle // 最好找一个只排序不去重的容器(更快),这里为了便利使用TreeSet TreeSet<In> treeSet = new TreeSet<>(new Comparator<In>() { @Override public int compare(In o1, In o2) { // 防止去重 int num = sortNode.comparator.compare(o1, o2); return num == 0 ? 1 : num; } }); // 切换Stage最后一个Node为ConsumerNode[添加结果到treeSet] lastNode.pre.next = new ConsumerNode<>(treeSet::add); // 开始处理 handleStage(data, stage); System.out.println("------> Current Stage: shuffle[sort] => data"); data.clear(); data.addAll(treeSet); } else if (lastNode instanceof DistinctNode) { // HashSet代表了接下来的 去重Shuffle HashSet<In> hashSet = new HashSet<>(); // 切换Stage最后一个Node为ConsumerNode[添加结果到hashSet] lastNode.pre.next = new ConsumerNode<>(hashSet::add); // 开始处理 handleStage(data, stage); System.out.println("------> Current Stage: shuffle[distinct] => data"); data.clear(); data.addAll(hashSet); } else { // 除了Sort/distinct外,可能你还有其他shuffle需求,在else处修改 throw new IllegalArgumentException( "No such type Node[" + lastNode + "]! " + "Shuffle Node must be in [DistinctNode,SortNode]!" ); } } /** * 处理 Stage */ private <A> void handleStage(Collection<A> list, Stage<?> stage) { for (A element : list) { handleStageNode(element, stage.node); } System.out.println("------> Current Stage: map over"); } /** * 递归处理Node后续所有节点 */ private <A, B> void handleStageNode(A element, Node<B> node) { if (node != null) { if (node instanceof FunctionNode) { FunctionNode<A, B> functionNode = (FunctionNode<A, B>) node; B apply = functionNode.function.apply(element); handleStageNode(apply, node.next); } else if (node instanceof FlatFunctionNode) { FlatFunctionNode<A, B> flatFunctionNode = (FlatFunctionNode<A, B>) node; Iterable<B> iter = flatFunctionNode.flatFunction.apply(element); for (B b : iter) { handleStageNode(b, node.next); } } else if (node instanceof PredicateNode) { PredicateNode<A> predicateNode = (PredicateNode<A>) node; boolean test = predicateNode.predicate.test(element); if (test) { handleStageNode(element, node.next); } } else if (node instanceof ConsumerNode) { ConsumerNode<A> consumerNode = (ConsumerNode<A>) node; consumerNode.consumer.accept(element); } else { throw new IllegalArgumentException( "No such type Node[" + node + "]! " + "Map Node must be in [FunctionNode,FlatFunctionNode,PredicateNode,ConsumerNode]!" ); } } } }
3.4 Test
- Person
public class Person { String name; int age; String address; Person(String name, int age, String address) { this.name = name; this.age = age; this.address = address; } @Override public String toString() { return "Person{" + "name='" + name + '\'' + ", age=" + age + ", address='" + address + '\'' + '}'; } }
- Test1 不会被触发
LazyChainCollection.valueOf( "1,2,4", "2,6", "3,2,8,10", "4,8" ) .flatMap(str -> Arrays.asList(str.split(","))) .map(c -> Integer.parseInt(c)) .map(i -> i * i) .filter(i2 -> i2 > 10) .sort((o1, o2) -> o2 - o1)
- Test 2 会被触发
LazyChainCollection.valueOf( "xiaowang,26,chongqing", "XIAOMING,18,beijing", "zhongsi,26,beijing", "xiaoli,a2x7,chengdu", "ZHANGSAN,16,guangzhou", "hanhan,19,chongqing", "LISI,23,changsha", "lisi,29,shanghai" ).map(line -> line.toLowerCase()) .filter(line -> !line.startsWith("xiao")) .flatMap(line -> { List<Person> personList = new ArrayList<>(); try { String[] fields = line.split(","); String name = fields[0]; int age = Integer.parseInt(fields[1]); String address = fields[2]; // 添加一个过滤条件 if (age > 18) { personList.add(new Person(name, age, address)); } } catch (NumberFormatException e) { // int 解析异常,不要该数据,直接忽略 // e.printStackTrace(); } return personList; }) .sort((p1, p2) -> p2.age - p1.age) .map(p -> p.name.toUpperCase()) .distinct() .collect() .forEach(System.out::println);