3.4.2.9. 推导、验证指令的性质
在Instruction的定义里可以看到许多bit域,其中mayLoad,mayStore,hasSideEffects是未初始化的。如果目标机器指令的定义也没有提供进一步的初始化,就需要根据指令匹配模板的内容、属性来推导这些域。这些工作由下面的方法来完成。
3209 void CodeGenDAGPatterns::InferInstructionFlags() {
3210 const std::vector<const CodeGenInstruction*> &Instructions =
3211 Target.getInstructionsByEnumValue();
3212
3213 // First try to infer flags from the primary instruction pattern, if any.
3214 SmallVector<CodeGenInstruction*, 8> Revisit;
3215 unsigned Errors = 0;
3216 for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
3217 CodeGenInstruction &InstInfo =
3218 const_cast<CodeGenInstruction &>(*Instructions[i]);
3219
3220 // Get the primary instruction pattern.
3221 const TreePattern *Pattern = getInstruction(InstInfo.TheDef).getPattern();
3222 if (!Pattern) {
3223 if (InstInfo.hasUndefFlags())
3224 Revisit.push_back(&InstInfo);
3225 continue;
3226 }
3227 InstAnalyzer PatInfo(*this);
3228 PatInfo.Analyze(Pattern);
3229 Errors += InferFromPattern(InstInfo, PatInfo, InstInfo.TheDef);
3230 }
首先通过CodeGenTarget::getInstructionsByEnumValue()方法返回一个以特定方式排序的指令序列。
166 const std::vector<const CodeGenInstruction*> &
167 getInstructionsByEnumValue() const {
168 if (InstrsByEnum.empty()) ComputeInstrsByEnum();
169 return InstrsByEnum;
170 }
排序由ComputeInstrsByEnum()方法完成。下面294行的FixedInstrs给出的指令都包含在名字空间TargetOpcode里,它们必须以这个给定的顺序排列(v7.0把这些指令描述放在TargetOpcodes.def文件中,通过宏来自动生成FixedInstrs。效果是一样的)。
指令排序的次序是:在TargetOpcode名字空间里以指定序排列的指令(即FixedInstrs指定的指令序),余下的指令按名字排序(v7.0做了进一步的厘清:接下来是按名字排序的伪指令(伪指令没有为代码生成器所需的编码信息),最后才是按名字排序的其他指令)。
292 void CodeGenTarget::ComputeInstrsByEnum() const {
293 // The ordering here must match the ordering in TargetOpcodes.h.
294 static const char *const FixedInstrs[] = {
295 "PHI", "INLINEASM", "CFI_INSTRUCTION", "EH_LABEL",
296 "GC_LABEL", "KILL", "EXTRACT_SUBREG", "INSERT_SUBREG",
297 "IMPLICIT_DEF", "SUBREG_TO_REG", "COPY_TO_REGCLASS", "DBG_VALUE",
298 "REG_SEQUENCE", "COPY", "BUNDLE", "LIFETIME_START",
299 "LIFETIME_END", "STACKMAP", "PATCHPOINT", "LOAD_STACK_GUARD",
300 "STATEPOINT", "LOCAL_ESCAPE", "FAULTING_LOAD_OP",
301 nullptr};
302 const auto &Insts = getInstructions();
303 for (const char *const *p = FixedInstrs; *p; ++p) {
304 const CodeGenInstruction *Instr = GetInstByName(*p, Insts, Records);
305 assert(Instr && "Missing target independent instruction");
306 assert(Instr->Namespace == "TargetOpcode" && "Bad namespace");
307 InstrsByEnum.push_back(Instr);
308 }
309 unsigned EndOfPredefines = InstrsByEnum.size();
310
311 for (const auto &I : Insts) {
312 const CodeGenInstruction *CGI = I.second.get();
313 if (CGI->Namespace != "TargetOpcode")
314 InstrsByEnum.push_back(CGI);
315 }
316
317 assert(InstrsByEnum.size() == Insts.size() && "Missing predefined instr");
318
319 // All of the instructions are now in random order based on the map iteration.
320 // Sort them by name.
321 std::sort(InstrsByEnum.begin() + EndOfPredefines, InstrsByEnum.end(),
322 [](const CodeGenInstruction *Rec1, const CodeGenInstruction *Rec2) {
323 return Rec1->TheDef->getName() < Rec2->TheDef->getName(); <-- v7.0删除
const auto &D1 = *Rec1->TheDef; <-- v7.0增加
const auto &D2 = *Rec2->TheDef;
return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) <
std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName());
324 });
325 }
CodeGenDAGPatterns::InferInstructionFlags() 3223行的hasUndefFlags()方法只是检测是否有未初始化的mayLoad,mayStore,hasSideEffects。在匹配模式都没有的情况下,我们需要特殊的处理。先把它们保存在Revisit容器里。其他的指令则进行以下的分析。
2712 void Analyze(const PatternToMatch *Pat) {
2713 AnalyzeNode(Pat->getSrcPattern());
2714 }
2741 void AnalyzeNode(const TreePatternNode *N) {
2742 if (N->isLeaf()) {
2743 if (DefInit *DI = dyn_cast<DefInit>(N->getLeafValue())) {
2744 Record *LeafRec = DI->getDef();
2745 // Handle ComplexPattern leaves.
2746 if (LeafRec->isSubClassOf("ComplexPattern")) {
2747 const ComplexPattern &CP = CDP.getComplexPattern(LeafRec);
2748 if (CP.hasProperty(SDNPMayStore)) mayStore = true;
2749 if (CP.hasProperty(SDNPMayLoad)) mayLoad = true;
2750 if (CP.hasProperty(SDNPSideEffect)) hasSideEffects = true;
2751 }
2752 }
2753 return;
2754 }
2755
2756 // Analyze children.
2757 for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
2758 AnalyzeNode(N->getChild(i));
2759
2760 // Ignore set nodes, which are not SDNodes.
2761 if (N->getOperator()->getName() == "set") {
2762 isBitcast = IsNodeBitcast(N);
2763 return;
2764 }
2765
2766 // Notice properties of the node.
2767 if (N->NodeHasProperty(SDNPMayStore, CDP)) mayStore = true;
2768 if (N->NodeHasProperty(SDNPMayLoad, CDP)) mayLoad = true;
2769 if (N->NodeHasProperty(SDNPSideEffect, CDP)) hasSideEffects = true;
2770 if (N->NodeHasProperty(SDNPVariadic, CDP)) isVariadic = true;
2771
2772 if (const CodeGenIntrinsic *IntInfo = N->getIntrinsicInfo(CDP)) {
2773 // If this is an intrinsic, analyze it.
2774 if (IntInfo->ModRef >= CodeGenIntrinsic::ReadArgMem)
2775 mayLoad = true;// These may load memory.
2776
2777 if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteArgMem)
2778 mayStore = true;// Intrinsics that can write to memory are 'mayStore'.
2779
2780 if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteMem)
2781 // WriteMem intrinsics can have other strange effects.
2782 hasSideEffects = true;
2783 }
2784 }
收集了匹配模式有关的属性信息后,由下面的方法根据这些信息推导指令的相关设置。对象InstInfo的mayStore_Unset,mayStore_Unset,mayLoad_Unset如果是true,分别表示其指令定义中的mayLoad,mayStore,hasSideEffects是未初始化的。
2788 static bool InferFromPattern(CodeGenInstruction &InstInfo,
2789 const InstAnalyzer &PatInfo,
2790 Record *PatDef) {
2791 bool Error = false;
2792
2793 // Remember where InstInfo got its flags.
2794 if (InstInfo.hasUndefFlags())
2795 InstInfo.InferredFrom = PatDef;
2796
2797 // Check explicitly set flags for consistency.
2798 if (InstInfo.hasSideEffects != PatInfo.hasSideEffects &&
2799 !InstInfo.hasSideEffects_Unset) {
2800 // Allow explicitly setting hasSideEffects = 1 on instructions, even when
2801 // the pattern has no side effects. That could be useful for div/rem
2802 // instructions that may trap.
2803 if (!InstInfo.hasSideEffects) {
2804 Error = true;
2805 PrintError(PatDef->getLoc(), "Pattern doesn't match hasSideEffects = " +
2806 Twine(InstInfo.hasSideEffects));
2807 }
2808 }
2809
2810 if (InstInfo.mayStore != PatInfo.mayStore && !InstInfo.mayStore_Unset) {
2811 Error = true;
2812 PrintError(PatDef->getLoc(), "Pattern doesn't match mayStore = " +
2813 Twine(InstInfo.mayStore));
2814 }
2815
2816 if (InstInfo.mayLoad != PatInfo.mayLoad && !InstInfo.mayLoad_Unset) {
2817 // Allow explicitly setting mayLoad = 1, even when the pattern has no loads.
2818 // Some targets translate imediates to loads.
2819 if (!InstInfo.mayLoad) {
2820 Error = true;
2821 PrintError(PatDef->getLoc(), "Pattern doesn't match mayLoad = " +
2822 Twine(InstInfo.mayLoad));
2823 }
2824 }
2825
2826 // Transfer inferred flags.
2827 InstInfo.hasSideEffects |= PatInfo.hasSideEffects;
2828 InstInfo.mayStore |= PatInfo.mayStore;
2829 InstInfo.mayLoad |= PatInfo.mayLoad;
2830
2831 // These flags are silently added without any verification.
2832 InstInfo.isBitcast |= PatInfo.isBitcast;
2833
2834 // Don't infer isVariadic. This flag means something different on SDNodes and
2835 // instructions. For example, a CALL SDNode is variadic because it has the
2836 // call arguments as operands, but a CALL instruction is not variadic - it
2837 // has argument registers as implicit, not explicit uses.
2838
2839 return Error;
2840 }
接下来验证PatternToMatch对象。在这个形式中,指令部分出现在目标模板里。3240行的getInstructionsInTree()获取目标模板里出现的指令Record对象。如果Record对象有多个,取消验证。
CodeGenDAGPatterns::InferInstructionFlags(续)
3232 // Second, look for single-instruction patterns defined outside the
3233 // instruction.
3234 for (ptm_iterator I = ptm_begin(), E = ptm_end(); I != E; ++I) {
3235 const PatternToMatch &PTM = *I;
3236
3237 // We can only infer from single-instruction patterns, otherwise we won't
3238 // know which instruction should get the flags.
3239 SmallVector<Record*, 8> PatInstrs;
3240 getInstructionsInTree(PTM.getDstPattern(), PatInstrs);
3241 if (PatInstrs.size() != 1)
3242 continue;
3243
3244 // Get the single instruction.
3245 CodeGenInstruction &InstInfo = Target.getInstruction(PatInstrs.front());
3246
3247 // Only infer properties from the first pattern. We'll verify the others.
3248 if (InstInfo.InferredFrom)
3249 continue;
3250
3251 InstAnalyzer PatInfo(*this);
3252 PatInfo.Analyze(&PTM);
3253 Errors += InferFromPattern(InstInfo, PatInfo, PTM.getSrcRecord());
3254 }
3255
3256 if (Errors)
3257 PrintFatalError("pattern conflicts");
3258
3259 // Revisit instructions with undefined flags and no pattern.
3260 if (Target.guessInstructionProperties()) {
3261 for (unsigned i = 0, e = Revisit.size(); i != e; ++i) {
3262 CodeGenInstruction &InstInfo = *Revisit[i];
3263 if (InstInfo.InferredFrom)
3264 continue;
3265 // The mayLoad and mayStore flags default to false.
3266 // Conservatively assume hasSideEffects if it wasn't explicit.
3267 if (InstInfo.hasSideEffects_Unset)
3268 InstInfo.hasSideEffects = true;
3269 }
3270 return;
3271 }
3272
3273 // Complain about any flags that are still undefined.
3274 for (unsigned i = 0, e = Revisit.size(); i != e; ++i) {
3275 CodeGenInstruction &InstInfo = *Revisit[i];
3276 if (InstInfo.InferredFrom)
3277 continue;
3278 if (InstInfo.hasSideEffects_Unset)
3279 PrintError(InstInfo.TheDef->getLoc(),
3280 "Can't infer hasSideEffects from patterns");
3281 if (InstInfo.mayStore_Unset)
3282 PrintError(InstInfo.TheDef->getLoc(),
3283 "Can't infer mayStore from patterns");
3284 if (InstInfo.mayLoad_Unset)
3285 PrintError(InstInfo.TheDef->getLoc(),
3286 "Can't infer mayLoad from patterns");
3287 }
3288 }
如果Revisit容器不是空,具体的处理取决于指令定义中的guessInstructionProperties域(3260行的方法返回它的值)。如果它是1,表示TableGen可以猜一个安全的值,否则就只能以出错来处理了。这些安全值是mayLoad = mayStore = false,hasSideEffects = true。
回到CodeGenDAGPatterns的构造函数,推导了指令的属性后,还需要验证这些设置是否与指令本身属性兼容。
3292 void CodeGenDAGPatterns::VerifyInstructionFlags() {
3293 unsigned Errors = 0;
3294 for (ptm_iterator I = ptm_begin(), E = ptm_end(); I != E; ++I) {
3295 const PatternToMatch &PTM = *I;
3296 SmallVector<Record*, 8> Instrs;
3297 getInstructionsInTree(PTM.getDstPattern(), Instrs);
3298 if (Instrs.empty())
3299 continue;
3300
3301 // Count the number of instructions with each flag set.
3302 unsigned NumSideEffects = 0;
3303 unsigned NumStores = 0;
3304 unsigned NumLoads = 0;
3305 for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
3306 const CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
3307 NumSideEffects += InstInfo.hasSideEffects;
3308 NumStores += InstInfo.mayStore;
3309 NumLoads += InstInfo.mayLoad;
3310 }
3311
3312 // Analyze the source pattern.
3313 InstAnalyzer PatInfo(*this);
3314 PatInfo.Analyze(&PTM);
3315
3316 // Collect error messages.
3317 SmallVector<std::string, 4> Msgs;
3318
3319 // Check for missing flags in the output.
3320 // Permit extra flags for now at least.
3321 if (PatInfo.hasSideEffects && !NumSideEffects)
3322 Msgs.push_back("pattern has side effects, but hasSideEffects isn't set");
3323
3324 // Don't verify store flags on instructions with side effects. At least for
3325 // intrinsics, side effects implies mayStore.
3326 if (!PatInfo.hasSideEffects && PatInfo.mayStore && !NumStores)
3327 Msgs.push_back("pattern may store, but mayStore isn't set");
3328
3329 // Similarly, mayStore implies mayLoad on intrinsics.
3330 if (!PatInfo.mayStore && PatInfo.mayLoad && !NumLoads)
3331 Msgs.push_back("pattern may load, but mayLoad isn't set");
3332
3333 // Print error messages.
3334 if (Msgs.empty())
3335 continue;
3336 ++Errors;
3337
3338 for (unsigned i = 0, e = Msgs.size(); i != e; ++i)
3339 PrintError(PTM.getSrcRecord()->getLoc(), Twine(Msgs[i]) + " on the " +
3340 (Instrs.size() == 1 ?
3341 "instruction" : "output instructions"));
3342 // Provide the location of the relevant instruction definitions.
3343 for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
3344 if (Instrs[i] != PTM.getSrcRecord())
3345 PrintError(Instrs[i]->getLoc(), "defined here");
3346 const CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
3347 if (InstInfo.InferredFrom &&
3348 InstInfo.InferredFrom != InstInfo.TheDef &&
3349 InstInfo.InferredFrom != PTM.getSrcRecord())
3350 PrintError(InstInfo.InferredFrom->getLoc(), "inferred from patttern");
3351 }
3352 }
3353 if (Errors)
3354 PrintFatalError("Errors in DAG patterns");
3355 }
至此,CodeGenDAGPatterns的构造函数全部执行完成。进而DAGISelEmitter的构造函数也返回了,并且立即执行DAGISelEmitter::run()方法,进行指令选择代码的生成。
V7.0的处理
V7.0最显著的改动在于ComputeInstrsByEnum()中用到的静态常量字符串数组FixedInstrs现在不是在这个函数里声明的,而由下面的语句,根据TargetOpcodes.def文件的内容自动生成的:
352 static const char *const FixedInstrs[] = {
353 #define HANDLE_TARGET_OPCODE(OPC) #OPC,
354 #include "llvm/Support/TargetOpcodes.def"
355 nullptr};
整个处理余下部分基本上大同小异,我们不再具体列举。