3.4.3.5. 辅助函数的生成
输出了MatcherTable后,EmitMatcherTable()在823行输出了一个汇总信息。对于3.6.1版本的X86,它是这样的(v7.0也是类似的):
// Opcode Histogram:
// #OPC_Scope = 4240
// #OPC_RecordNode = 2791
// #OPC_RecordChild = 10029
// #OPC_RecordMemRef = 2740
// #OPC_CaptureGlueInput = 14
// #OPC_MoveChild = 8301
// #OPC_MoveParent = 15070
// #OPC_CheckSame = 0
// #OPC_CheckChildSame = 809
// #OPC_CheckPatternPredicate = 11373
// #OPC_CheckPredicate = 9754
// #OPC_CheckOpcode = 7674
// #OPC_SwitchOpcode = 612
// #OPC_CheckType = 9002
// #OPC_SwitchType = 2075
// #OPC_CheckChildType = 1456
// #OPC_CheckInteger = 4
// #OPC_CheckChildInteger = 1145
// #OPC_CheckCondCode = 0
// #OPC_CheckValueType = 36
// #OPC_CheckComplexPat = 7168
// #OPC_CheckAndImm = 11
// #OPC_CheckOrImm = 0
// #OPC_CheckFoldableChainNode = 2665
// #OPC_EmitInteger = 260
// #OPC_EmitStringInteger = 869
// #OPC_EmitRegister = 0
// #OPC_EmitConvertToTarget = 1922
// #OPC_EmitMergeInputChains = 7252
// #OPC_EmitCopyToReg = 450
// #OPC_EmitNode = 1043
// #OPC_EmitNodeXForm = 71
// #OPC_MarkGlueResults = 0
// #OPC_CompleteMatch = 163
// #OPC_MorphNodeTo = 13006
在825~827行,完成了SelectCode()这个函数的定义。接下来,开始生成一系列的辅助方法。这些方法是指令选择器在解析MatcherTable进行指令选择时所必须的。这些信息都是在TD文件中给出的,描述了特定DAG结构匹配到特定机器指令所需要满足的条件或需要经历的操作。
615 void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
616 // Emit pattern predicates.
617 if (!PatternPredicates.empty()) {
618 OS << "bool CheckPatternPredicate(unsigned PredNo) const override {\n"; <-- v7.0删除
BeginEmitFunction(OS, "bool", <-- v7.0增加
"CheckPatternPredicate(unsigned PredNo) const", true/*AddOverride*/);
OS << "{\n";
619 OS << " switch (PredNo) {\n";
620 OS << " default: llvm_unreachable(\"Invalid predicate in table?\");\n";
621 for (unsigned i = 0, e = PatternPredicates.size(); i != e; ++i)
622 OS << " case " << i << ": return " << PatternPredicates[i] << ";\n";
623 OS << " }\n";
624 OS << "}\n\n";
EndEmitFunction(OS); <-- v7.0增加
625 }
626
627 // Emit Node predicates.
628 // FIXME: Annoyingly, these are stored by name, which we never even emit. Yay? <-- v7.0删除
629 StringMap<TreePattern*> PFsByName;
630
631 for (CodeGenDAGPatterns::pf_iterator I = CGP.pf_begin(), E = CGP.pf_end();
632 I != E; ++I)
633 PFsByName[I->first->getName()] = I->second.get();
634
635 if (!NodePredicates.empty()) {
636 OS << "bool CheckNodePredicate(SDNode *Node,\n"; <-- v7.0删除
637 OS << " unsigned PredNo) const override {\n";
BeginEmitFunction(OS, "bool", <-- v7.0增加
"CheckNodePredicate(SDNode *Node, unsigned PredNo) const",
true/*AddOverride*/);
OS << "{\n";
638 OS << " switch (PredNo) {\n";
639 OS << " default: llvm_unreachable(\"Invalid predicate in table?\");\n";
640 for (unsigned i = 0, e = NodePredicates.size(); i != e; ++i) {
641 // Emit the predicate code corresponding to this pattern.
642 TreePredicateFn PredFn = NodePredicates[i];
643
644 assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
645 OS << " case " << i << ": { // " << NodePredicates[i].getFnName() <<'\n'; <-- v7.0删除
646
647 OS << PredFn.getCodeToRunOnSDNode() << "\n }\n";
OS << " case " << i << ": { \n"; <-- v7.0增加
for (auto *SimilarPred :
NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
OS << " // " << TreePredicateFn(SimilarPred).getFnName() <<'\n';
OS << PredFn.getCodeToRunOnSDNode() << "\n }\n";
648 }
649 OS << " }\n";
650 OS << "}\n\n";
EndEmitFunction(OS); <-- v7.0增加
651 }
652
653 // Emit CompletePattern matchers.
654 // FIXME: This should be const.
655 if (!ComplexPatterns.empty()) { <-- v7.0删除
656 OS << "bool CheckComplexPattern(SDNode *Root, SDNode *Parent,\n";
657 OS << " SDValue N, unsigned PatternNo,\n";
658 OS << " SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) override {\n";
BeginEmitFunction(OS, "bool", ß v7.0增加
"CheckComplexPattern(SDNode *Root, SDNode *Parent,\n"
" SDValue N, unsigned PatternNo,\n"
" SmallVectorImpl<std::pair<SDValue, SDNode*>> &Result)",
true/*AddOverride*/);
659 OS << " unsigned NextRes = Result.size();\n";
660 OS << " switch (PatternNo) {\n";
661 OS << " default: llvm_unreachable(\"Invalid pattern # in table?\");\n";
662 for (unsigned i = 0, e = ComplexPatterns.size(); i != e; ++i) {
663 const ComplexPattern &P = *ComplexPatterns[i];
664 unsigned NumOps = P.getNumOperands();
665
666 if (P.hasProperty(SDNPHasChain))
667 ++NumOps; // Get the chained node too.
668
669 OS << " case " << i << ":\n";
670 OS << " Result.resize(NextRes+" << NumOps << ");\n";
671 OS << " return " << P.getSelectFunc();
672
673 OS << "(";
674 // If the complex pattern wants the root of the match, pass it in as the
675 // first argument.
676 if (P.hasProperty(SDNPWantRoot))
677 OS << "Root, ";
678
679 // If the complex pattern wants the parent of the operand being matched,
680 // pass it in as the next argument.
681 if (P.hasProperty(SDNPWantParent))
682 OS << "Parent, ";
683
684 OS << "N";
685 for (unsigned i = 0; i != NumOps; ++i)
686 OS << ", Result[NextRes+" << i << "].first";
687 OS << ");\n";
if (InstrumentCoverage) { <-- v7.0增加
OS << " if (Succeeded)\n";
OS << " dbgs() << \"\\nCOMPLEX_PATTERN: " << P.getSelectFunc()
<< "\\n\" ;\n";
OS << " return Succeeded;\n";
OS << " }\n";
688 }
689 OS << " }\n";
690 OS << "}\n\n";
EndEmitFunction(OS); <-- v7.0增加
691 }
692
693
694 // Emit SDNodeXForm handlers.
695 // FIXME: This should be const.
696 if (!NodeXForms.empty()) {
697 OS << "SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) override {\n"; <-- v7.0删除
BeginEmitFunction(OS, "SDValue", <-- v7.0增加
"RunSDNodeXForm(SDValue V, unsigned XFormNo)", true/*AddOverride*/);
OS << "{\n";
698 OS << " switch (XFormNo) {\n";
699 OS << " default: llvm_unreachable(\"Invalid xform # in table?\");\n";
700
701 // FIXME: The node xform could take SDValue's instead of SDNode*'s.
702 for (unsigned i = 0, e = NodeXForms.size(); i != e; ++i) {
703 const CodeGenDAGPatterns::NodeXForm &Entry =
704 CGP.getSDNodeTransform(NodeXForms[i]);
705
706 Record *SDNode = Entry.first;
707 const std::string &Code = Entry.second;
708
709 OS << " case " << i << ": { ";
710 if (!OmitComments)
711 OS << "// " << NodeXForms[i]->getName();
712 OS << '\n';
713
714 std::string ClassName = CGP.getSDNodeInfo(SDNode).getSDClassName();
715 if (ClassName == "SDNode")
716 OS << " SDNode *N = V.getNode();\n";
717 else
718 OS << " " << ClassName << " *N = cast<" << ClassName
719 << ">(V.getNode());\n";
720 OS << Code << "\n }\n";
721 }
722 OS << " }\n";
723 OS << "}\n\n";
EndEmitFunction(OS); <-- v7.0增加
724 }
725 }
一共有这几个地方会定义谓词。一个是Pattern定义里的Predicates,另一个就是Instruction定义里的Predicates。这些谓词分别保存在容器PatternPredicates与NodePredicates中。由于前者是字符串类型容器,因此对前者的处理相当简单,它生成如下函数:
bool CheckPatternPredicate(unsigned PredNo) const override {
switch (PredNo) {
default: llvm_unreachable("Invalid predicate in table?");
case 0: return (Subtarget->hasAVX()) && (!Subtarget->hasVLX());
case 1: return (Subtarget->hasSSE1() && !Subtarget->hasAVX());
case 2: return (Subtarget->hasAVX512()) && (Subtarget->hasVLX());
case 3: return (Subtarget->hasSSE2() && !Subtarget->hasAVX());
case 4: return (Subtarget->hasSSE2());
case 5: return (Subtarget->hasAVX2()) && (!Subtarget->hasVLX());
case 6: return (Subtarget->hasAVX512());
case 7: return (Subtarget->hasAVX());
case 8: return (Subtarget->hasF16C());
case 9: return (Subtarget->hasAVX() && !Subtarget->hasAVX512());
case 10: return (Subtarget->hasSSE41() && !Subtarget->hasAVX());
case 11: return (Subtarget->hasAVX512()) && (Subtarget->is64Bit());
case 12: return (Subtarget->hasVLX());
case 13: return (Subtarget->is64Bit());
case 14: return (Subtarget->hasAVX() && !Subtarget->hasAVX2());
case 15: return (Subtarget->hasAVX2());
case 16: return (TM.getCodeModel() == CodeModel::Small ||TM.getCodeModel() == CodeModel::Kernel) && (TM.getRelocationModel() == Reloc::Static);
case 17: return (Subtarget->hasMOVBE());
case 18: return (Subtarget->hasBWI());
case 19: return (Subtarget->hasDQI());
case 20: return (Subtarget->hasAVX512()) && (!Subtarget->hasDQI());
case 21: return (!Subtarget->hasSSE1());
case 22: return (!Subtarget->hasSSE2());
case 23: return (Subtarget->hasBWI()) && (Subtarget->hasVLX());
case 24: return (Subtarget->hasMMX());
case 25: return (Subtarget->hasLZCNT());
case 26: return (Subtarget->hasBMI());
case 27: return (Subtarget->hasCMov());
case 28: return (!Subtarget->hasCMov());
case 29: return (!Subtarget->hasSSE1()) && (Subtarget->hasCMov());
case 30: return (!Subtarget->hasSSE2()) && (Subtarget->hasCMov());
case 31: return (!Subtarget->slowIncDec());
case 32: return (!Subtarget->hasSSE41());
case 33: return (Subtarget->hasBMI2());
case 34: return (Subtarget->hasTBM());
case 35: return (!Subtarget->is64Bit());
case 36: return (Subtarget->hasVLX()) && (Subtarget->hasBWI());
case 37: return (!Subtarget->hasDQI());
case 38: return (Subtarget->hasSSSE3());
case 39: return (Subtarget->hasSSE42());
case 40: return (Subtarget->hasSSE1());
case 41: return (Subtarget->has3DNow());
case 42: return (Subtarget->hasXOP());
case 43: return (Subtarget->hasSHA());
case 44: return (Subtarget->hasSSE42() && !Subtarget->hasAVX());
case 45: return (Subtarget->hasSSSE3() && !Subtarget->hasAVX());
case 46: return (Subtarget->hasAVX()) && (Subtarget->hasAES());
case 47: return (Subtarget->hasAES());
case 48: return (Subtarget->hasAVX()) && (Subtarget->hasPCLMUL());
case 49: return (Subtarget->hasPCLMUL());
case 50: return (Subtarget->hasSSE4A());
case 51: return (Subtarget->hasSSE3() && !Subtarget->hasAVX());
case 52: return (Subtarget->hasFMA4());
case 53: return (Subtarget->hasPRFCHW());
case 54: return (Subtarget->hasADX());
case 55: return (Subtarget->hasDQI()) && (Subtarget->hasVLX());
case 56: return (!Subtarget->is64Bit()) && (TM.getRelocationModel() != Reloc::PIC_);
case 57: return (!Subtarget->isTarget64BitLP64());
case 58: return (Subtarget->isTarget64BitLP64());
case 59: return (Subtarget->hasSSE3());
case 60: return (Subtarget->hasRTM());
case 61: return (Subtarget->hasAVX()) && (!Subtarget->is64Bit());
case 62: return (Subtarget->hasSSE2() && !Subtarget->hasAVX()) && (!Subtarget->is64Bit());
case 63: return (Subtarget->hasAVX()) && (Subtarget->is64Bit());
case 64: return (Subtarget->hasSSE2() && !Subtarget->hasAVX()) && (Subtarget->is64Bit());
case 65: return (Subtarget->hasMMX()) && (!Subtarget->is64Bit());
case 66: return (Subtarget->hasMMX()) && (Subtarget->is64Bit());
case 67: return (Subtarget->hasFSGSBase()) && (Subtarget->is64Bit());
case 68: return (!Subtarget->is64Bit()) && (!Subtarget->callRegIndirect());
case 69: return (Subtarget->is64Bit()) && (!Subtarget->callRegIndirect());
case 70: return (Subtarget->IsLegalToCallImmediateAddr(TM));
case 71: return (Subtarget->hasPOPCNT());
case 72: return (Subtarget->hasSSE2() && !Subtarget->hasAVX()) && (OptForSize);
case 73: return (Subtarget->hasAVX() && !Subtarget->hasAVX512()) && (OptForSize);
case 74: return (Subtarget->hasAVX512()) && (OptForSize);
case 75: return (Subtarget->hasSSE2() && !Subtarget->hasAVX()) && (!OptForSize);
case 76: return (Subtarget->hasAVX() && !Subtarget->hasAVX512()) && (!OptForSize);
case 77: return (Subtarget->hasAVX512()) && (!OptForSize);
case 78: return (Subtarget->hasVLX()) && (Subtarget->hasAVX512());
case 79: return (Subtarget->hasAVX512()) && (!Subtarget->hasVLX());
case 80: return (Subtarget->hasCmpxchg16b());
case 81: return (TM.getCodeModel() != CodeModel::Small &&TM.getCodeModel() != CodeModel::Kernel);
case 82: return (TM.getCodeModel() == CodeModel::Kernel);
case 83: return (Subtarget->hasRDRAND());
case 84: return (Subtarget->hasRDSEED());
case 85: return (Subtarget->hasRTM() || Subtarget->hasHLE());
case 86: return (!Subtarget->isTargetPS4());
case 87: return (Subtarget->isTargetPS4());
case 88: return (Subtarget->hasFMA());
case 89: return (Subtarget->hasSSE1() && !Subtarget->hasAVX()) && (OptForSize);
case 90: return (Subtarget->hasERI());
case 91: return (Subtarget->hasSSE41());
case 92: return (Subtarget->hasVLX()) && (Subtarget->hasDQI());
}
}
至于NodePredicates,它是TreePredicateFn类型的容器,因此需要下面的方法来输出实质的代码。因为TreePredicateFn是TreePatternNode的一个成员,而TreePatternNode来自一个PatFrag定义。在PatFrag的定义里可以指定PredicateCode及ImmediateCode。ImmediateCode必须以int64_t类型的变量Imm为判断条件。而PredicateCode则要灵活一些,可以是任意类型的对象。
752 std::string TreePredicateFn::getCodeToRunOnSDNode() const {
753 // Handle immediate predicates first.
754 std::string ImmCode = getImmCode();
755 if (!ImmCode.empty()) {
756 std::string Result =
757 " int64_t Imm = cast<ConstantSDNode>(Node)->getSExtValue();\n";
758 return Result + ImmCode;
759 }
760
761 // Handle arbitrary node predicates.
762 assert(!getPredCode().empty() && "Don't have any predicate code!");
763 std::string ClassName;
764 if (PatFragRec->getOnlyTree()->isLeaf())
765 ClassName = "SDNode";
766 else {
767 Record *Op = PatFragRec->getOnlyTree()->getOperator();
768 ClassName = PatFragRec->getDAGPatterns().getSDNodeInfo(Op).getSDClassName();
769 }
770 std::string Result;
771 if (ClassName == "SDNode")
772 Result = " SDNode *N = Node;\n";
773 else
774 Result = " " + ClassName + "*N = cast<" + ClassName + ">(Node);\n";
775
776 return Result + getPredCode();
777 }
这个生成的方法CheckNodePredicate()有接近1000行的代码,我们只看分别由ImmediateCode与PredicateCode生成的代码片段。
case 85: { // Predicate_def32
SDNode *N = Node;
return N->getOpcode() != ISD::TRUNCATE &&
N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
N->getOpcode() != ISD::CopyFromReg &&
N->getOpcode() != ISD::AssertSext &&
N->getOpcode() != X86ISD::CMOV;
}
case 86: { // Predicate_fpimm0
ConstantFPSDNode*N = cast<ConstantFPSDNode>(Node);
return N->isExactlyValue(+0.0);
}
还有ComplexPattern的匹配方法要生成。前面在向MatcherTable输出CheckComplexPatMatcher对象时,getComplexPat()方法将ComplexPattern定义保存在ComplexPatterns容器里(EmitMatcher()的402行),同时对这些ComplexPattern进行了从0开始的编号。因此,X86会生成下面这个方法:
bool CheckComplexPattern(SDNode *Root, SDNode *Parent,
SDValue N, unsigned PatternNo,
SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) override {
unsigned NextRes = Result.size();
switch (PatternNo) {
default: llvm_unreachable("Invalid pattern # in table?");
case 0:
Result.resize(NextRes+5);
return SelectAddr(Parent, N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first);
case 1:
Result.resize(NextRes+6);
return SelectScalarSSELoad(Root, N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first, Result[NextRes+5].first);
case 2:
Result.resize(NextRes+6);
return SelectScalarSSELoad(Root, N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first, Result[NextRes+5].first);
case 3:
Result.resize(NextRes+5);
return SelectLEAAddr(N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first);
case 4:
Result.resize(NextRes+5);
return SelectLEA64_32Addr(N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first);
case 5:
Result.resize(NextRes+5);
return SelectLEAAddr(N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first);
case 6:
Result.resize(NextRes+1);
return SelectMOV64Imm32(N, Result[NextRes+0].first);
case 7:
Result.resize(NextRes+5);
return SelectTLSADDRAddr(N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first);
case 8:
Result.resize(NextRes+5);
return SelectTLSADDRAddr(N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first);
case 9:
Result.resize(NextRes+5);
return SelectTLSADDRAddr(N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first);
case 10:
Result.resize(NextRes+5);
return SelectTLSADDRAddr(N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first);
case 11:
Result.resize(NextRes+5);
return SelectVectorAddr(Parent, N, Result[NextRes+0].first, Result[NextRes+1].first, Result[NextRes+2].first, Result[NextRes+3].first, Result[NextRes+4].first);
}
}
最后生成的是SDNodeXForm对象所要执行的转换操作。同样也是在向MatcherTable输出EmitNodeXFormMatcher对象时,getNodeXFormID()方法将SDNodeXForm对象保存到NodeXForms容器里,同时对这些SDNodeXForm进行了从0开始的编号。因此,X86会生成下面这个方法:
SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) override {
switch (XFormNo) {
default: llvm_unreachable("Invalid xform # in table?");
case 0: { // EXTRACT_get_vextract128_imm
SDNode *N = V.getNode();
return getI8Imm(X86::getExtractVEXTRACT128Immediate(N), SDLoc(N));
}
case 1: { // GetLo8XForm
ConstantSDNode *N = cast<ConstantSDNode>(V.getNode());
// Transformation function: get the low 8 bits.
return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
}
case 2: { // GetLo32XForm
ConstantSDNode *N = cast<ConstantSDNode>(V.getNode());
// Transformation function: get the low 32 bits.
return getI32Imm((unsigned)N->getZExtValue(), SDLoc(N));
}
case 3: { // CountTrailingOnes
ConstantSDNode *N = cast<ConstantSDNode>(V.getNode());
// Count the trailing ones in the immediate.
return getI8Imm(countTrailingOnes(N->getZExtValue()), SDLoc(N));
}
case 4: { // ROT32L2R_imm8
ConstantSDNode *N = cast<ConstantSDNode>(V.getNode());
// Convert a ROTL shamt to a ROTR shamt on 32-bit integer.
return getI8Imm(32 - N->getZExtValue(), SDLoc(N));
}
case 5: { // ROT64L2R_imm8
ConstantSDNode *N = cast<ConstantSDNode>(V.getNode());
// Convert a ROTL shamt to a ROTR shamt on 64-bit integer.
return getI8Imm(64 - N->getZExtValue(), SDLoc(N));
}
case 6: { // INSERT_get_vinsert128_imm
SDNode *N = V.getNode();
return getI8Imm(X86::getInsertVINSERT128Immediate(N), SDLoc(N));
}
case 7: { // INSERT_get_vinsert256_imm
SDNode *N = V.getNode();
return getI8Imm(X86::getInsertVINSERT256Immediate(N), SDLoc(N));
}
case 8: { // I8Imm
ConstantSDNode *N = cast<ConstantSDNode>(V.getNode());
// Transformation function: get the low 8 bits.
return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
}
case 9: { // EXTRACT_get_vextract256_imm
SDNode *N = V.getNode();
return getI8Imm(X86::getExtractVEXTRACT256Immediate(N), SDLoc(N));
}
}
}
V7.0生成这样的版本: #ifdef GET_DAGISEL_DECL bool CheckPatternPredicate(unsigned PredNo) const override; #endif #if defined(GET_DAGISEL_BODY) || DAGISEL_INLINE bool DAGISEL_CLASS_COLONCOLON CheckPatternPredicate(unsigned PredNo) const #if DAGISEL_INLINE override #endif { switch (PredNo) { … } } #endif // GET_DAGISEL_BODY 由于一些定义的变化以及处理上的改进,比如TreePattern共享相同的谓词,v7.0上述方法都有不小的变化。不过这里我们不深入其中的差异。 |