3.4.4.3.3. OPC_SwitchOpcode与OPC_SwitchType等
在OPC_MoveChild处,NodeStack记录下子节点,而在OPC_MoveParent处,NodeStack释放栈顶元素。另外,在OPC_RecordNode及OPC_RecordChildN处RecordedNodes记录下这个节点与其父节点,如果父节点存在的话(OPC_CheckComplexPat在调用CheckComplexPattern()时,需要父节点对象)。这样就实现了在模式树里的遍历。
SelectionDAGISel::SelectCodeCommon(续)
2736 case OPC_RecordNode: {
2737 // Remember this node, it may end up being an operand in the pattern.
2738 SDNode *Parent = nullptr;
2739 if (NodeStack.size() > 1)
2740 Parent = NodeStack[NodeStack.size()-2].getNode();
2741 RecordedNodes.push_back(std::make_pair(N, Parent));
2742 continue;
2743 }
2744
2745 case OPC_RecordChild0: case OPC_RecordChild1:
2746 case OPC_RecordChild2: case OPC_RecordChild3:
2747 case OPC_RecordChild4: case OPC_RecordChild5:
2748 case OPC_RecordChild6: case OPC_RecordChild7: {
2749 unsigned ChildNo = Opcode-OPC_RecordChild0;
2750 if (ChildNo >= N.getNumOperands())
2751 break; // Match fails if out of range child #.
2752
2753 RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
2754 N.getNode()));
2755 continue;
2756 }
2757 case OPC_RecordMemRef:
2758 MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
2759 continue;
2760
2761 case OPC_CaptureGlueInput:
2762 // If the current node has an input glue, capture it in InputGlue.
2763 if (N->getNumOperands() != 0 &&
2764 N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
2765 InputGlue = N->getOperand(N->getNumOperands()-1);
2766 continue;
2767
2768 case OPC_MoveChild: {
2769 unsigned ChildNo = MatcherTable[MatcherIndex++];
2770 if (ChildNo >= N.getNumOperands())
2771 break; // Match fails if out of range child #.
2772 N = N.getOperand(ChildNo);
2773 NodeStack.push_back(N);
2774 continue;
2775 }
case OPC_MoveChild0: case OPC_MoveChild1: <-- v7.0增加
case OPC_MoveChild2: case OPC_MoveChild3:
case OPC_MoveChild4: case OPC_MoveChild5:
case OPC_MoveChild6: case OPC_MoveChild7: {
unsigned ChildNo = Opcode-OPC_MoveChild0;
if (ChildNo >= N.getNumOperands())
break; // Match fails if out of range child #.
N = N.getOperand(ChildNo);
NodeStack.push_back(N);
continue;
}
2776
2777 case OPC_MoveParent:
2778 // Pop the current node off the NodeStack.
2779 NodeStack.pop_back();
2780 assert(!NodeStack.empty() && "Node stack imbalance!");
2781 N = NodeStack.back();
2782 continue;
2783
2784 case OPC_CheckSame:
2785 if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
2786 continue;
2787
2788 case OPC_CheckChild0Same: case OPC_CheckChild1Same:
2789 case OPC_CheckChild2Same: case OPC_CheckChild3Same:
2790 if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes,
2791 Opcode-OPC_CheckChild0Same))
2792 break;
2793 continue;
2794
2795 case OPC_CheckPatternPredicate:
2796 if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
2797 continue;
2798 case OPC_CheckPredicate:
2799 if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
2800 N.getNode()))
2801 break;
2802 continue;
2803 case OPC_CheckComplexPat: {
2804 unsigned CPNum = MatcherTable[MatcherIndex++];
2805 unsigned RecNo = MatcherTable[MatcherIndex++];
2806 assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
2807
2808 // If target can modify DAG during matching, keep the matching state
2809 // consistent.
2810 std::unique_ptr<MatchStateUpdater> MSU;
2811 if (ComplexPatternFuncMutatesDAG())
2812 MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes,
2813 MatchScopes));
2814
2815 if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
2816 RecordedNodes[RecNo].first, CPNum,
2817 RecordedNodes))
2818 break;
2819 continue;
2820 }
2821 case OPC_CheckOpcode:
2822 if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
2823 continue;
2824
2825 case OPC_CheckType:
2826 if (!::CheckType(MatcherTable, MatcherIndex, N, TLI,
2827 CurDAG->getDataLayout()))
2828 break;
2829 continue;
2830
2831 case OPC_SwitchOpcode: {
2832 unsigned CurNodeOpcode = N.getOpcode();
2833 unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
2834 unsigned CaseSize;
2835 while (1) {
2836 // Get the size of this case.
2837 CaseSize = MatcherTable[MatcherIndex++];
2838 if (CaseSize & 128)
2839 CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
2840 if (CaseSize == 0) break;
2841
2842 uint16_t Opc = MatcherTable[MatcherIndex++];
2843 Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
2844
2845 // If the opcode matches, then we will execute this case.
2846 if (CurNodeOpcode == Opc)
2847 break;
2848
2849 // Otherwise, skip over this case.
2850 MatcherIndex += CaseSize;
2851 }
2852
2853 // If no cases matched, bail out.
2854 if (CaseSize == 0) break;
2855
2856 // Otherwise, execute the case we found.
2857 DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart
2858 << " to " << MatcherIndex << "\n");
2859 continue;
2860 }
2861
2862 case OPC_SwitchType: {
2863 MVT CurNodeVT = N.getSimpleValueType();
2864 unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
2865 unsigned CaseSize;
2866 while (1) {
2867 // Get the size of this case.
2868 CaseSize = MatcherTable[MatcherIndex++];
2869 if (CaseSize & 128)
2870 CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
2871 if (CaseSize == 0) break;
2872
2873 MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
2874 if (CaseVT == MVT::iPTR)
2875 CaseVT = TLI->getPointerTy(CurDAG->getDataLayout());
2876
2877 // If the VT matches, then we will execute this case.
2878 if (CurNodeVT == CaseVT)
2879 break;
2880
2881 // Otherwise, skip over this case.
2882 MatcherIndex += CaseSize;
2883 }
2884
2885 // If no cases matched, bail out.
2886 if (CaseSize == 0) break;
2887
2888 // Otherwise, execute the case we found.
2889 DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
2890 << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
2891 continue;
2892 }
2893 case OPC_CheckChild0Type: case OPC_CheckChild1Type:
2894 case OPC_CheckChild2Type: case OPC_CheckChild3Type:
2895 case OPC_CheckChild4Type: case OPC_CheckChild5Type:
2896 case OPC_CheckChild6Type: case OPC_CheckChild7Type:
2897 if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
2898 CurDAG->getDataLayout(),
2899 Opcode - OPC_CheckChild0Type))
2900 break;
2901 continue;
2902 case OPC_CheckCondCode:
2903 if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
2904 continue;
2905 case OPC_CheckValueType:
2906 if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI,
2907 CurDAG->getDataLayout()))
2908 break;
2909 continue;
2910 case OPC_CheckInteger:
2911 if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
2912 continue;
2913 case OPC_CheckChild0Integer: case OPC_CheckChild1Integer:
2914 case OPC_CheckChild2Integer: case OPC_CheckChild3Integer:
2915 case OPC_CheckChild4Integer:
2916 if (!::CheckChildInteger(MatcherTable, MatcherIndex, N,
2917 Opcode-OPC_CheckChild0Integer)) break;
2918 continue;
2919 case OPC_CheckAndImm:
2920 if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
2921 continue;
2922 case OPC_CheckOrImm:
2923 if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
2924 continue;
2925
2926 case OPC_CheckFoldableChainNode: {
2927 assert(NodeStack.size() != 1 && "No parent node");
2928 // Verify that all intermediate nodes between the root and this one have
2929 // a single use.
2930 bool HasMultipleUses = false;
2931 for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
2932 if (!NodeStack[i].hasOneUse()) {
2933 HasMultipleUses = true;
2934 break;
2935 }
2936 if (HasMultipleUses) break;
2937
2938 // Check to see that the target thinks this is profitable to fold and that
2939 // we can fold it without inducing cycles in the graph.
2940 if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
2941 NodeToMatch) ||
2942 !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
2943 NodeToMatch, OptLevel,
2944 true/*We validate our own chains*/))
2945 break;
2946
2947 continue;
2948 }
对嵌套在OPC_SwitchOpcode里的OPC_SwitchOpcode与OPC_SwitchType的处理,与对嵌套在的OPC_SwitchOpcode的处理是相似的。在2926行,OPC_CheckFoldableChainNode表示当前DAG具备折叠的可能,即有多个节点产生Chain或glue结果,需要检查是否满足折叠条件(即折叠产生Chain或glue结果的节点不会形成环)。
1840 bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
1841 SDNode *Root) const {
1842 if (OptLevel == CodeGenOpt::None) return false;
1843 return N.hasOneUse();
1844 }
变量OptLevel标记优化强度, CodeGenOpt::None对应“-O0”。在这种情况下不进行折叠。N是在2600行创建的使用NodeToMatch第一个结果的SDValue对象,如果它有多个使用者,也不能进行折叠。注意参数Root就是SelectionDAGISel::SelectCodeCommon()的参数NodeToMatch,它是待匹配指令DAG的根节点。
1848 bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
1849 CodeGenOpt::Level OptLevel,
1850 bool IgnoreChains) {
1851 if (OptLevel == CodeGenOpt::None) return false;
1852
1853 // If Root use can somehow reach N through a path that that doesn't contain
1854 // U then folding N would create a cycle. e.g. In the following
1855 // diagram, Root can reach N through X. If N is folded into into Root, then
1856 // X is both a predecessor and a successor of U.
1857 //
1858 // [N*] //
1859 // ^ ^ //
1860 // / \ //
1861 // [U*] [X]? //
1862 // ^ ^ //
1863 // \ / //
1864 // \ / //
1865 // [Root*] //
1866 //
1867 // * indicates nodes to be folded together.
1868 //
1869 // If Root produces glue, then it gets (even more) interesting. Since it
1870 // will be "glued" together with its glue use in the scheduler, we need to
1871 // check if it might reach N.
1872 //
1873 // [N*] //
1874 // ^ ^ //
1875 // / \ //
1876 // [U*] [X]? //
1877 // ^ ^ //
1878 // \ \ //
1879 // \ | //
1880 // [Root*] | //
1881 // ^ | //
1882 // f | //
1883 // | / //
1884 // [Y] / //
1885 // ^ / //
1886 // f / //
1887 // | / //
1888 // [GU] //
1889 //
1890 // If GU (glue use) indirectly reaches N (the load), and Root folds N
1891 // (call it Fold), then X is a predecessor of GU and a successor of
1892 // Fold. But since Fold and GU are glued together, this will create
1893 // a cycle in the scheduling graph.
1894
1895 // If the node has glue, walk down the graph to the "lowest" node in the
1896 // glueged set.
1897 EVT VT = Root->getValueType(Root->getNumValues()-1);
1898 while (VT == MVT::Glue) {
1899 SDNode *GU = findGlueUse(Root);
1900 if (!GU)
1901 break;
1902 Root = GU;
1903 VT = Root->getValueType(Root->getNumValues()-1);
1904
1905 // If our query node has a glue result with a use, we've walked up it. If
1906 // the user (which has already been selected) has a chain or indirectly uses
1907 // the chain, our WalkChainUsers predicate will not consider it. Because of
1908 // this, we cannot ignore chains in this predicate.
1909 IgnoreChains = false;
1910 }
1911
1912
1913 SmallPtrSet<SDNode*, 16> Visited; <-- v7.0删除
1914 return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
return !findNonImmUse(Root, N.getNode(), U, IgnoreChains); <-- v7.0增加
1915 }
SelectionDAGISel::IsLegalToFold()方法里一大段的注释说明了要检查的两种情况。其中的Root就是SelectCodeCommon()中的NodeToMatch,而参数N实际上就是NodeStack[NodeStack.size()-1],因此N和NodeStack[NodeStack.size()-2]也就是注释中所提到的N与U节点(N,U不一定是Root的直接子节点)。1899行的方法findGlueUse()查找所有使用Root(即NodeToMatch)修改的标志寄存器的SDNode对象。因为findGlueUse()在1898行的while循环里,如果使用者也会修改标志寄存器(产生glue节点),我们会继续寻找这个标志寄存器的使用者。
1786 static SDNode *findGlueUse(SDNode *N) {
1787 unsigned FlagResNo = N->getNumValues()-1;
1788 for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
1789 SDUse &Use = I.getUse();
1790 if (Use.getResNo() == FlagResNo)
1791 return Use.getUser();
1792 }
1793 return nullptr;
1794 }
方法findNonImmUse()则检查findGlueUse()找到的SDNode对象(参数Use)是否间接使用了当前节点(即N,参数Def)。因为U是N的直接父节点,因此它代表一个直接使用(如果使用的话),作为参数ImmedUse传入。
1799 static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
1800 SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
1801 bool IgnoreChains) {
1802 // The NodeID's are given uniques ID's where a node ID is guaranteed to be
1803 // greater than all of its (recursive) operands. If we scan to a point where
1804 // 'use' is smaller than the node we're scanning for, then we know we will
1805 // never find it.
1806 //
1807 // The Use may be -1 (unassigned) if it is a newly allocated node. This can
1808 // happen because we scan down to newly selected nodes in the case of glue
1809 // uses.
1810 if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
1811 return false;
1812
1813 // Don't revisit nodes if we already scanned it and didn't fail, we know we
1814 // won't fail if we scan it again.
1815 if (!Visited.insert(Use).second)
1816 return false;
1817
1818 for (const SDValue &Op : Use->op_values()) {
1819 // Ignore chain uses, they are validated by HandleMergeInputChains.
1820 if (Op.getValueType() == MVT::Other && IgnoreChains)
1821 continue;
1822
1823 SDNode *N = Op.getNode();
1824 if (N == Def) {
1825 if (Use == ImmedUse || Use == Root)
1826 continue; // We are not looking for immediate use.
1827 assert(N != Root);
1828 return true;
1829 }
1830
1831 // Traverse up the operand chain.
1832 if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
1833 return true;
1834 }
1835 return false;
1836 }
在DAG中的SDNode是以拓扑序(深度优先遍历)编号的,因此子节点的序号必定小于父节点。这就是1810行判断的依据。
如果IsProfitableToFold()与IsLegalToFold()都返回false,匹配就出错了。也就是说如果MatcherTable里出现了OPC_CheckFoldableChainNode,匹配的指令必须满足折叠条件才能继续。
V7.0这样修改了findNonImmUse()方法。2257行的SDNode::isOnlyUserOf()方法检查ImmedUse是否是Def唯一的使用者,如果是,就不用担心折叠回产生环了。否则,继续查找。 2252 static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, 2253 bool IgnoreChains) { 2254 SmallPtrSet<const SDNode *, 16> Visited; 2255 SmallVector<const SDNode *, 16> WorkList; 2256 // Only check if we have non-immediate uses of Def. 2257 if (ImmedUse->isOnlyUserOf(Def)) 2258 return false; 2259 2260 // We don't care about paths to Def that go through ImmedUse so mark it 2261 // visited and mark non-def operands as used. 2262 Visited.insert(ImmedUse); 2263 for (const SDValue &Op : ImmedUse->op_values()) { 2264 SDNode *N = Op.getNode(); 2265 // Ignore chain deps (they are validated by 2266 // HandleMergeInputChains) and immediate uses 2267 if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) 2268 continue; 2269 if (!Visited.insert(N).second) 2270 continue; 2271 WorkList.push_back(N); 2272 } 2273 2274 // Initialize worklist to operands of Root. 2275 if (Root != ImmedUse) { 2276 for (const SDValue &Op : Root->op_values()) { 2277 SDNode *N = Op.getNode(); 2278 // Ignore chains (they are validated by HandleMergeInputChains) 2279 if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) 2280 continue; 2281 if (!Visited.insert(N).second) 2282 continue; 2283 WorkList.push_back(N); 2284 } 2285 } 2286 2287 return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true); 2288 } 2262~2285行的过程很简单,将Root与ImmedUse的所有操作数都保存在WorkList容器里(Def本身除外)。然后通过下面的方法进行遍历。对def,如果WorkList里的任一节点是它的前驱,这个方法将返回true。 834 static bool hasPredecessorHelper(const SDNode *N, 835 SmallPtrSetImpl<const SDNode *> &Visited, 836 SmallVectorImpl<const SDNode *> &Worklist, 837 unsigned int MaxSteps = 0, 838 bool TopologicalPrune = false) { 839 SmallVector<const SDNode *, 8> DeferredNodes; 840 if (Visited.count(N)) 841 return true; 842 843 // Node Id's are assigned in three places: As a topological 844 // ordering (> 0), during legalization (results in values set to 845 // 0), new nodes (set to -1). If N has a topolgical id then we 846 // know that all nodes with ids smaller than it cannot be 847 // successors and we need not check them. Filter out all node 848 // that can't be matches. We add them to the worklist before exit 849 // in case of multiple calls. Note that during selection the topological id 850 // may be violated if a node's predecessor is selected before it. We mark 851 // this at selection negating the id of unselected successors and 852 // restricting topological pruning to positive ids. 853 854 int NId = N->getNodeId(); 855 // If we Invalidated the Id, reconstruct original NId. 856 if (NId < -1) 857 NId = -(NId + 1); 858 859 bool Found = false; 860 while (!Worklist.empty()) { 861 const SDNode *M = Worklist.pop_back_val(); 862 int MId = M->getNodeId(); 863 if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) && 864 (MId > 0) && (MId < NId)) { 865 DeferredNodes.push_back(M); 866 continue; 867 } 868 for (const SDValue &OpV : M->op_values()) { 869 SDNode *Op = OpV.getNode(); 870 if (Visited.insert(Op).second) 871 Worklist.push_back(Op); 872 if (Op == N) 873 Found = true; 874 } 875 if (Found) 876 break; 877 if (MaxSteps != 0 && Visited.size() >= MaxSteps) 878 break; 879 } 880 // Push deferred nodes back on worklist. 881 Worklist.append(DeferredNodes.begin(), DeferredNodes.end()); 882 // If we bailed early, conservatively return found. 883 if (MaxSteps != 0 && Visited.size() >= MaxSteps) 884 return true; 885 return Found; 886 } 这个方法使用SDNode的NodeId作为依据进行操作。NodeId有4个来源。第一个作为拓扑序,这时NodeId > 0。因此NodeId比N小的节点都不可能是它的后继,我们可以在搜索的过程里裁剪掉这些节点(如果TopologicalPrune为true)。第二个是来自合法化过程中,这时NodeId置零。第三个就是新节点,这时NodeId是-1。最后,在选择过程中,如果一个节点的前驱在它之前被选中,该节点的NodeId将被设置为负数,防止裁剪过程伤及它。 参数MaxSteps如果不为0,控制查找的步数。如果在这个指定步数里找不到,就保守地认为找到了。 |