LLVM学习笔记(13)

3.3.6. 输出代码

3.3.6.1. 枚举常量

回到RegisterInfoEmitter::run(),现在可以调用RegisterInfoEmitter::runEnums()来向输出文件导出枚举常量了。这个输出文件被命名为TargetGenRegisterInfo.inc。以X86机器为例,产生的文件就是X86GenRegisterInfo.inc。首先,向这个文件输出的是若干枚举类型定义以标识寄存器、寄存器类,以及寄存器索引。

71       void RegisterInfoEmitter::runEnums(raw_ostream &OS,

72                                          CodeGenTarget &Target, CodeGenRegBank &Bank) {

73         const auto &Registers = Bank.getRegisters();

74      

75         // Register enums are stored as uint16_t in the tables. Make sure we'll fit.

76         assert(Registers.size() <= 0xffff && "Too many regs to fit in tables");

77      

78         std::string Namespace =

79             Registers.front().TheDef->getValueAsString("Namespace");

80      

81         emitSourceFileHeader("Target Register Enum Values", OS);

82      

83         OS << "\n#ifdef GET_REGINFO_ENUM\n";

84         OS << "#undef GET_REGINFO_ENUM\n";

85      

86         OS << "namespace llvm {\n\n";

87      

88         OS << "class MCRegisterClass;\n"

89            << "extern const MCRegisterClass " << Namespace

90            << "MCRegisterClasses[];\n\n";

91      

92         if (!Namespace.empty())

93           OS << "namespace " << Namespace << " {\n";

94         OS << "enum {\n  NoRegister,\n";

95      

96         for (const auto &Reg : Registers)

97           OS << "  " << Reg.getName() << " = " << Reg.EnumValue << ",\n";

98         assert(Registers.size() == Registers.back().EnumValue &&

99                "Register enum value mismatch!");

100       OS << "  NUM_TARGET_REGS \t// " << Registers.size()+1 << "\n";

101       OS << "};\n";

102       if (!Namespace.empty())

103         OS << "}\n";       ß v7.0删除

 OS << "} // end namespace " << Namespace << "\n";  ß v7.0增加

104    

105       const auto &RegisterClasses = Bank.getRegClasses();

106      if (!RegisterClasses.empty()) {

107    

108         // RegisterClass enums are stored as uint16_t in the tables.

109         assert(RegisterClasses.size() <= 0xffff &&

110                "Too many register classes to fit in tables");

111    

112         OS << "\n// Register classes\n";

113         if (!Namespace.empty())

114           OS << "namespace " << Namespace << " {\n";

115         OS << "enum {\n";

116         for (const auto &RC : RegisterClasses)

117           OS << "  " << RC.getName() << "RegClassID"

118              << " = " << RC.EnumValue << ",\n";

119         OS << "\n  };\n";

120         if (!Namespace.empty())

121           OS << "}\n";       <-- v7.0删除

                OS << "} // end namespace " << Namespace << "\n\n";  <-- v7.0增加

122       }

123    

124       const std::vector<Record*> &RegAltNameIndices = Target.getRegAltNameIndices();

125       // If the only definition is the default NoRegAltName, we don't need to

126       // emit anything.

127       if (RegAltNameIndices.size() > 1) {

128         OS << "\n// Register alternate name indices\n";

129         if (!Namespace.empty())

130           OS << "namespace " << Namespace << " {\n";

131         OS << "enum {\n";

132         for (unsigned i = 0, e = RegAltNameIndices.size(); i != e; ++i)

133           OS << "  " << RegAltNameIndices[i]->getName() << ",\t// " << i << "\n";

134         OS << "  NUM_TARGET_REG_ALT_NAMES = " << RegAltNameIndices.size() << "\n";

135         OS << "};\n";

136         if (!Namespace.empty())

137           OS << "}\n";       <-- v7.0删除

                OS << "} // end namespace " << Namespace << "\n\n";   <-- v7.0增加

138       }

139    

140       auto &SubRegIndices = Bank.getSubRegIndices();

141       if (!SubRegIndices.empty()) {

142         OS << "\n// Subregister indices\n";

143         std::string Namespace = SubRegIndices.front().getNamespace();

144         if (!Namespace.empty())

145           OS << "namespace " << Namespace << " {\n";

146         OS << "enum {\n  NoSubRegister,\n";

147         unsigned i = 0;

148         for (const auto &Idx : SubRegIndices)

149           OS << "  " << Idx.getName() << ",\t// " << ++i << "\n";

150         OS << "  NUM_TARGET_SUBREGS\n};\n";

151         if (!Namespace.empty())

152           OS << "}\n";

                 OS << "} // end namespace " << Namespace << "\n\n";        <-- v7.0增加

153       }

154    

155       OS << "} // End llvm namespace\n";

156       OS << "#endif // GET_REGINFO_ENUM\n\n";

157     }

81行的emitSourceFileHeader()为文件输出了位于头部的注释。接着是输出条件控制宏指令。在LLVM里,总是这样来包括这个输出文件的(以X86为例,文件X86MCTargetDesc.h):

#define GET_REGINFO_ENUM

#include "X86GenRegisterInfo.inc"

这是因为TargetGenRegisterInfo.inc是一个很大的文件,它包括了很多内容,而通常只需要包含其中的一部分。为此,每一部分都由类似的条件宏来包含。

TD文件里的Register类的Namespace域,作为包含寄存器枚举常量及寄存器类别枚举常量的名字空间,这里在X86GenRegisterInfo.inc文件里输出这样的语句:

class MCRegisterClass;

extern const MCRegisterClass X86MCRegisterClasses[];

比如X86MCRegisterClasses是由X86寄存器的名字空间“X86”与“MCRegisterClasses”合成的名字。这个数组定义在X86GenRegisterInfo.inc文件的下面。92~103行输出表示寄存器的枚举常量,输出的枚举常量类似于(以X86为例):

namespace X86 {

enum {

  NoRegister,

  AH = 1,

  AL = 2,

  …

  NUM_TARGET_REGS // 246          <-- v7.0这个数字是277

};

}

这些枚举常量是以寄存器名字的顺序输出的。106~122行则输出代表寄存器类的枚举常量(以X86为例),注意CodeGenRegisterClass在容器中的索引与其EnumValue-1是一致的:

namespace X86 {

enum {

  GR8RegClassID = 0,

  GR8_NOREXRegClassID = 1,

  …

  VR512_with_sub_xmm_in_FR32RegClassID = 79,    ß v7.0这个数字是85

  };

}

X86目标机器没有使用RegAltNameIndex定义,跳过124~138行代码。余下代码则输出代表寄存器索引的枚举常量,以X86为例:

namespace X86 {

enum {

  NoSubRegister,

  sub_8bit,      // 1

  sub_8bit_hi, // 2

  sub_8bit_hi_phony,    // 3     <-- v7.0增加

  sub_16bit,    // 3

  sub_16bit_hi,       // 5        <-- v7.0增加

  sub_32bit,    // 4

  sub_xmm,    // 5

  sub_ymm,    // 6

  NUM_TARGET_SUBREGS

};

}

} // End llvm namespace

#endif // GET_REGINFO_ENUM

​​​​​​​3.3.6.2. MC使用的寄存器描述

这部分输出代码将构成新的部分,因此由宏GET_REGINFO_MC_DESC包含。MC(machine code)使用这些代码来描述寄存器。MC是一个雄心勃勃的目标文件与汇编(object-file-and-assembly)框架。它在几年前已是LLVM的一部分,以替换之前的汇编生成器。目前MC用于所有的(或至少重要的)LLVM目标机器汇编与目标文件的生成。MC还启动了“MCJIT”,这是一个基于MC层的JIT框架(http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html有MC子项目的一个概述)。

3.3.6.2.1. ​​​​​​​MC对寄存器的定义

MC对寄存器的封装与抽象是类MCRegisterInfo,它比较重要的成员有以下这些。如何对目标机器填充作为基类部分的MCRegisterInfo实例,正是这部分代码生成的重要任务。

135     class MCRegisterInfo {

136     public:

137       typedef const MCRegisterClass *regclass_iterator;

138    

139       /// DwarfLLVMRegPair - Emitted by tablegen so Dwarf<->LLVM reg mappings can be

140       /// performed with a binary search.

141       struct DwarfLLVMRegPair {

142         unsigned FromReg;

143         unsigned ToReg;

144    

145         bool operator<(DwarfLLVMRegPair RHS) const { return FromReg < RHS.FromReg; }

146       };

147    

148       /// SubRegCoveredBits - Emitted by tablegen: bit range covered by a subreg

149       /// index, -1 in any being invalid.

150       struct SubRegCoveredBits {

151         uint16_t Offset;

152         uint16_t Size;

153       };

154     private:

155       const MCRegisterDesc *Desc;                 // Pointer to the descriptor array

156       unsigned NumRegs;                           // Number of entries in the array

157       unsigned RAReg;                             // Return address register

158       unsigned PCReg;                             // Program counter register

159       const MCRegisterClass *Classes;             // Pointer to the regclass array

160       unsigned NumClasses;                        // Number of entries in the array

161       unsigned NumRegUnits;                       // Number of regunits.

162       const MCPhysReg (*RegUnitRoots)[2];         // Pointer to regunit root table.

163       const MCPhysReg *DiffLists;                 // Pointer to the difflists array

164       const unsigned *RegUnitMaskSequences;       // Pointer to lane mask sequences

165                                                   // for register units.

166       const char *RegStrings;                     // Pointer to the string table.

167       const char *RegClassStrings;                // Pointer to the class strings.

168       const uint16_t *SubRegIndices;              // Pointer to the subreg lookup

169                                                   // array.

170       const SubRegCoveredBits *SubRegIdxRanges;   // Pointer to the subreg covered

171                                                   // bit ranges array.

172       unsigned NumSubRegIndices;                  // Number of subreg indices.

173       const uint16_t *RegEncodingTable;           // Pointer to array of register

174                                                   // encodings.

175    

176       unsigned L2DwarfRegsSize;

177       unsigned EHL2DwarfRegsSize;

178       unsigned Dwarf2LRegsSize;

179       unsigned EHDwarf2LRegsSize;

180       const DwarfLLVMRegPair *L2DwarfRegs;        // LLVM to Dwarf regs mapping

181       const DwarfLLVMRegPair *EHL2DwarfRegs;      // LLVM to Dwarf regs mapping EH

182       const DwarfLLVMRegPair *Dwarf2LRegs;        // Dwarf to LLVM regs mapping

183       const DwarfLLVMRegPair *EHDwarf2LRegs;      // Dwarf to LLVM regs mapping EH

184       DenseMap<unsigned, int> L2SEHRegs;          // LLVM to SEH regs mapping

185    

186     public:

187       /// DiffListIterator - Base iterator class that can traverse the

188       /// differentially encoded register and regunit lists in DiffLists.

189       /// Don't use this class directly, use one of the specialized sub-classes

190       /// defined below.

191       class DiffListIterator {

192         uint16_t Val;

193         const MCPhysReg *List;

194    

195       protected:

196         /// Create an invalid iterator. Call init() to point to something useful.

197         DiffListIterator() : Val(0), List(nullptr) {}

198    

199         /// init - Point the iterator to InitVal, decoding subsequent values from

200         /// DiffList. The iterator will initially point to InitVal, sub-classes are

201         /// responsible for skipping the seed value if it is not part of the list.

202         void init(MCPhysReg InitVal, const MCPhysReg *DiffList) {

203           Val = InitVal;

204           List = DiffList;

205         }

206    

207         /// advance - Move to the next list position, return the applied

208         /// differential. This function does not detect the end of the list, that

209         /// is the caller's responsibility (by checking for a 0 return value).

210         unsigned advance() {

211           assert(isValid() && "Cannot move off the end of the list.");

212           MCPhysReg D = *List++;

213           Val += D;

214           return D;

215         }

216    

217       public:

218    

219         /// isValid - returns true if this iterator is not yet at the end.

220         bool isValid() const { return List; }

221    

222         /// Dereference the iterator to get the value at the current position.

223         unsigned operator*() const { return Val; }

224    

225         /// Pre-increment to move to the next position.

226         void operator++() {

227           // The end of the list is encoded as a 0 differential.

228           if (!advance())

229             List = nullptr;

230         }

231       };

232    

233       // These iterators are allowed to sub-class DiffListIterator and access

234       // internal list pointers.

235       friend class MCSubRegIterator;

236       friend class MCSubRegIndexIterator;

237       friend class MCSuperRegIterator;

238       friend class MCRegUnitIterator;

239       friend class MCRegUnitMaskIterator;

240       friend class MCRegUnitRootIterator;

下面我们将要看到,在寄存器可以通过唯一的数值区分时(CodeGenRegister中的EnumValue)。寄存器间的关系就可以通过一系列的数值数组来表示。因此,上面的MCPhysReg实际上是uint16_t的typedef。

为了使这些数值表示有较小的冗余度,而且有高的访问效率,TableGen使用差分编码表来记录这些关系,155行MCRegisterDesc类型的Desc成员将记录这些差分表的使用情况。191行的嵌套类DiffListIterator为各个差分表的迭代器(235~240行的迭代器)提供了基类与基本的差分表遍历实现。

类似的,MC使用类MCRegisterClass来描述寄存器类。MCRegisterClass包含的数据成员有这些:

30       class MCRegisterClass {

31       public:

32         typedef const MCPhysReg* iterator;

33         typedef const MCPhysReg* const_iterator;

34      

35         const iterator RegsBegin;

36         const uint8_t *const RegSet;

37         const uint32_t NameIdx;

38         const uint16_t RegsSize;

39         const uint16_t RegSetSize;

40         const uint16_t ID;

41         const uint16_t RegSize, Alignment; // Size & Alignment of register in bytes

42         const int8_t CopyCost;

43         const bool Allocatable;

这里的代码生成将为目标机器产生MCRegisterClass所需的内容,并生成对应的MCRegisterClass数组。

3.3.6.2.2. ​​​​​​​差分编码

因为寄存器、寄存器索引及寄存器类的EnumValue都是连续的正整数,那么使用遵循一定顺序(即尽量与EnumValue的分配次序一致)的差分编码将能得到很好的编码效率。差分编码是这样的一个序列:d1, d2, d3, …, dn。给定一个初始值Init,将得到这样一个序列:Init+d1, Init+d1+d2, Init+d1+d2+d3, …, Init+d1+d2+..+dn。

进一步的,我们也可以不从序列的头部开始,比如给定另一个初始值init2,并指定从差分序列的偏移3处开始,那么可以得到序列:Init2+d3, Init2+d3+d4, Init2+d3+d4+…+dn。

这样,使用不同的操作数,不同的偏移,若干序列可以通过同一个差分序列生成。因此,只要编码得当,就可以得到可观的压缩率。下面RegisterInfoEmitter::runMCDesc()的主要工作就是生成合适的(多个)差分序列。

782     void

783     RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,

784                                    CodeGenRegBank &RegBank) {

785       emitSourceFileHeader("MC Register Information", OS);

786

787       OS << "\n#ifdef GET_REGINFO_MC_DESC\n";

788       OS << "#undef GET_REGINFO_MC_DESC\n";

789    

790       const auto &Regs = RegBank.getRegisters();

791    

792       auto &SubRegIndices = RegBank.getSubRegIndices();

793       // The lists of sub-registers and super-registers go in the same array.  That

794       // allows us to share suffixes.

795       typedef std::vector<const CodeGenRegister*> RegVec;

796    

797       // Differentially encoded lists.

798       SequenceToOffsetTable<DiffVec> DiffSeqs;

799       SmallVector<DiffVec, 4> SubRegLists(Regs.size());

800       SmallVector<DiffVec, 4> SuperRegLists(Regs.size());

801       SmallVector<DiffVec, 4> RegUnitLists(Regs.size());

802       SmallVector<unsigned, 4> RegUnitInitScale(Regs.size());

803    

804       // List of lane masks accompanying register unit sequences.

805       SequenceToOffsetTable<MaskVec> LaneMaskSeqs;

806       SmallVector<MaskVec, 4> RegUnitLaneMasks(Regs.size());

807    

808       // Keep track of sub-register names as well. These are not differentially

809       // encoded.

810       typedef SmallVector<const CodeGenSubRegIndex*, 4> SubRegIdxVec;

811       SequenceToOffsetTable<SubRegIdxVec, deref<llvm::less>> SubRegIdxSeqs;

812      SmallVector<SubRegIdxVec, 4> SubRegIdxLists(Regs.size());

813    

814       SequenceToOffsetTable<std::string> RegStrings;

815    

816       // Precompute register lists for the SequenceToOffsetTable.

817       unsigned i = 0;

818       for (auto I = Regs.begin(), E = Regs.end(); I != E; ++I, ++i) {

819         const auto &Reg = *I;

820         RegStrings.add(Reg.getName());

821    

822         // Compute the ordered sub-register list.

823         SetVector<const CodeGenRegister*> SR;

824         Reg.addSubRegsPreOrder(SR, RegBank);

825         diffEncode(SubRegLists[i], Reg.EnumValue, SR.begin(), SR.end());

826         DiffSeqs.add(SubRegLists[i]);

827    

828         // Compute the corresponding sub-register indexes.

829         SubRegIdxVec &SRIs = SubRegIdxLists[i];

830         for (unsigned j = 0, je = SR.size(); j != je; ++j)

831           SRIs.push_back(Reg.getSubRegIndex(SR[j]));

832         SubRegIdxSeqs.add(SRIs);

833    

834         // Super-registers are already computed.

835         const RegVec &SuperRegList = Reg.getSuperRegs();

836         diffEncode(SuperRegLists[i], Reg.EnumValue, SuperRegList.begin(),

837                    SuperRegList.end());

838         DiffSeqs.add(SuperRegLists[i]);

839    

840         // Differentially encode the register unit list, seeded by register number.

841         // First compute a scale factor that allows more diff-lists to be reused:

842         //

843         //   D0 -> (S0, S1)

844         //   D1 -> (S2, S3)

845         //

846         // A scale factor of 2 allows D0 and D1 to share a diff-list. The initial

847         // value for the differential decoder is the register number multiplied by

848         // the scale.

849         //

850         // Check the neighboring registers for arithmetic progressions.

851         unsigned ScaleA = ~0u, ScaleB = ~0u;

852         SparseBitVector<> RUs = Reg.getNativeRegUnits();

853        if (I != Regs.begin() &&

854             std::prev(I)->getNativeRegUnits().count() == RUs.count())

855           ScaleB = *RUs.begin() - *std::prev(I)->getNativeRegUnits().begin();

856         if (std::next(I) != Regs.end() &&

857             std::next(I)->getNativeRegUnits().count() == RUs.count())

858           ScaleA = *std::next(I)->getNativeRegUnits().begin() - *RUs.begin();

859         unsigned Scale = std::min(ScaleB, ScaleA);

860         // Default the scale to 0 if it can't be encoded in 4 bits.

861         if (Scale >= 16)

862           Scale = 0;

863         RegUnitInitScale[i] = Scale;

864         DiffSeqs.add(diffEncode(RegUnitLists[i], Scale * Reg.EnumValue, RUs));

865    

866         const auto &RUMasks = Reg.getRegUnitLaneMasks();

867         MaskVec &LaneMaskVec = RegUnitLaneMasks[i];

868         assert(LaneMaskVec.empty());

869         LaneMaskVec.insert(LaneMaskVec.begin(), RUMasks.begin(), RUMasks.end());

870         // Terminator mask should not be used inside of the list.

871     #ifndef NDEBUG

872         for (unsigned M : LaneMaskVec) {

873           assert(M != ~0u && "terminator mask should not be part of the list");

874         }

875     #endif

876         LaneMaskSeqs.add(LaneMaskVec);

877       }

878    

879       // Compute the final layout of the sequence table.

880       DiffSeqs.layout();

881       LaneMaskSeqs.layout();

882       SubRegIdxSeqs.layout();

模板类SequenceToOffsetTable提供差分编码的实作。因此,有几个SequenceToOffsetTable的具现类就将有几个差分编码表。这里,将用到789行的DiffSeqs(以DiffVec即SmallVector<uint16_t, 4>具现),814行的RegStrings(以std::string具现),及945行的RegClassStrings(以std::string具现)。

SequenceToOffsetTable::add方法提供了向差分编码表添加序列的功能。

69         void add(const SeqT &Seq) {

70           assert(Entries == 0 && "Cannot call add() after layout()");

71           typename SeqMap::iterator I = Seqs.lower_bound(Seq);

72      

73           // If SeqMap contains a sequence that has Seq as a suffix, I will be

74           // pointing to it.

75           if (I != Seqs.end() && isSuffix(Seq, I->first))

76             return;

77      

78           I = Seqs.insert(I, std::make_pair(Seq, 0u));

79      

80           // The entry before I may be a suffix of Seq that can now be erased.

81           if (I != Seqs.begin() && isSuffix((--I)->first, Seq))

82             Seqs.erase(I);

83         }

SequenceToOffsetTable成员Seqs是类型SeqMap(std::map<SeqT, unsigned, SeqLess>,SeqT是具现时的模板实参)。71行的lower_bound返回第一个不小于Seq的迭代器,SeqLess执行比较操作:

41         struct SeqLess : public std::binary_function<SeqT, SeqT, bool> {

42           Less L;

43           bool operator()(const SeqT &A, const SeqT &B) const {

44             return std::lexicographical_compare(A.rbegin(), A.rend(),

45                                                 B.rbegin(), B.rend(), L);

46           }

47         };

这是个嵌套在SequenceToOffsetTable定义中的仿函数,42行的Less是SequenceToOffsetTable的模板参数,缺省为std::less<typename SeqT::value_type>(比如SeqT是DiffVec,SeqT::value_type就是uint16_t)。比较按字母、数字序,从尾到头进行。

在SequenceToOffsetTable::add()的75行,如果Seq是找到的序列的后缀(即一部分),那么可以忽略Seq。否则,插入一个“ (Seq, 0) ”。另外,如果找到序列前一个序列是Seq的后缀,也要删除这个序列。这样可以保证一个最大唯一序列。isSuffix的定义是这样的:

60         static bool isSuffix(const SeqT &A, const SeqT &B) {

61           return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin());

62         }

在823行开始,对子寄存器进行差分编码。首先要对寄存器DAG进行排序,以生成效果理想的差分编码表。因此函数CodeGenRegister::addSubRegsPreOrder()以前序遍历当前寄存器为根的寄存器DAG,将子寄存器保存入参数OSet中。之所以要前序遍历,因为寄存器的RegUnit就是在前序遍历寄存器DAG的过程中生成的。这样能得到更高效的差分编码表。

508     void

509     CodeGenRegister::addSubRegsPreOrder(SetVector<const CodeGenRegister*> &OSet,

510                                         CodeGenRegBank &RegBank) const {

511       assert(SubRegsComplete && "Must precompute sub-registers");

512       for (unsigned i = 0, e = ExplicitSubRegs.size(); i != e; ++i) {

513         CodeGenRegister *SR = ExplicitSubRegs[i];

514         if (OSet.insert(SR))

515           SR->addSubRegsPreOrder(OSet, RegBank);

516       }

517       // Add any secondary sub-registers that weren't part of the explicit tree.

518       for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();

519            I != E; ++I)

520         OSet.insert(I->second);

521     }

diffEncode()方法输出一个差分序列,序列的初始值由第二个参数指定(对子寄存器表,初始值是寄存器的EnumValue)。这个序列通过SequenceToOffsetTable::add()添加到差分表DiffSeqs。

575     static

576     DiffVec &diffEncode(DiffVec &V, unsigned InitVal, SparseBitVector<> List) {

577       assert(V.empty() && "Clear DiffVec before diffEncode.");

578       uint16_t Val = uint16_t(InitVal);

579    

580       for (uint16_t Cur : List) {

581         V.push_back(Cur - Val);

582         Val = Cur;

583       }

584       return V;

585     }

接着是对寄存器索引的差分编码,差分序列保存在SubRegIdxSeqs。在836行对包含当前寄存器的寄存器集进行差分编码,结果也保存在DiffSeqs中。

接下来对寄存器单元(实际上是RegUnit的序号)进行差分编码,这里仅考虑TD文件中定义的寄存器,不包括因调整权重加入的单元。840~750行注释谈到使用比例因子可以提高差分列表的重用率,这个比例因子应用于初始值上。以843~844行的例子来说,假定D0、D1的EnumValue是相邻的,S0~S3的EnumValue也是相邻的(因为在TD文件中,它们通常是顺序定义的),如果对D0、D1进行差分编码,假定D0是5,D1是6,S0~S3分别是1~4,那么对应子寄存器的差分序列分别是-4 (5-4, S0)、1 (5-4+1, S1)与-3 (6-3, S2)、1 (6-3+1, S3)。而如果将初始值扩大2倍,则得到两个都是-9, 1的序列,这里的2得自S3-S1。因此,853~859行分别计算该寄存器与前后寄存器第一个单元序号的差值(以D0、D1为例,就是S2-S0)。

接下来866~876行将寄存器的Lane掩码保存入另一个SequenceToOffsetTable—LaneMaskSeqs。Lane掩码都是一个向量(866行的getRegUnitLaneMasks返回一个ArrayRef<unsigned>容器)。

最后,调用SequenceToOffsetTable::layout()方法使SequenceToOffsetTable对象最终定型。这个方法每个SequenceToOffsetTable实例只能调用一次。

93         void layout() {

94           assert(Entries == 0 && "Can only call layout() once");

95           // Lay out the table in Seqs iteration order.

96           for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;

97                ++I) {

98             I->second = Entries;

99             // Include space for a terminator.

100           Entries += I->first.size() + 1;

101         }

102       }

在通过SequenceToOffsetTable::add方法添加差分序列时,上面98行的I->second设为0,现在则是差分表的当前累计大小(用于下面输出的差分表的注释里,提高生成代码的可读性)。另外,在100行要记入每行终结符的空间,这个终结符将在emit方法里指定(缺省为0)。

3.3.6.2.3. ​​​​​​​生成的差分编码表

RegisterInfoEmitter::runMCDesc()的余下部分就是输出前面准备的差分编码表。

RegisterInfoEmitter::runMCDesc(续)

884       OS << "namespace llvm {\n\n";

885    

886       const std::string &TargetName = Target.getName();

887    

888       // Emit the shared table of differential lists.

889       OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[] = {\n";

890       DiffSeqs.emit(OS, printDiff16);

891       OS << "};\n\n";

892    

893       // Emit the shared table of regunit lane mask sequences.

894       OS << "extern const unsigned " << TargetName << "LaneMaskLists[] = {\n";

895       LaneMaskSeqs.emit(OS, printMask, "~0u");

896       OS << "};\n\n";

897    

898       // Emit the table of sub-register indexes.

899       OS << "extern const uint16_t " << TargetName << "SubRegIdxLists[] = {\n";

900       SubRegIdxSeqs.emit(OS, printSubRegIndex);

901       OS << "};\n\n";

902    

903       // Emit the table of sub-register index sizes.

904       OS << "extern const MCRegisterInfo::SubRegCoveredBits "

905          << TargetName << "SubRegIdxRanges[] = {\n";

906       OS << "  { " << (uint16_t)-1 << ", " << (uint16_t)-1 << " },\n";

907       for (const auto &Idx : SubRegIndices) {

908         OS << "  { " << Idx.Offset << ", " << Idx.Size << " },\t// "

909            << Idx.getName() << "\n";

910       }

911       OS << "};\n\n";

912    

913       // Emit the string table.

914       RegStrings.layout();

915       OS << "extern const char " << TargetName << "RegStrings[] = {\n";

916       RegStrings.emit(OS, printChar);

917       OS << "};\n\n";

918    

919       OS << "extern const MCRegisterDesc " << TargetName

920          << "RegDesc[] = { // Descriptors\n";

921       OS << "  { " << RegStrings.get("") << ", 0, 0, 0, 0, 0 },\n";

922    

923       // Emit the register descriptors now.

924       i = 0;

925       for (const auto &Reg : Regs) {

926         OS << "  { " << RegStrings.get(Reg.getName()) << ", "

927            << DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i])

928            << ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "

929            << (DiffSeqs.get(RegUnitLists[i]) * 16 + RegUnitInitScale[i]) << ", "

930            << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << " },\n";

931         ++i;

932       }

933       OS << "};\n\n";      // End of register descriptors...

934    

935       // Emit the table of register unit roots. Each regunit has one or two root

936       // registers.

937       OS << "extern const MCPhysReg " << TargetName << "RegUnitRoots[][2] = {\n";

938       for (unsigned i = 0, e = RegBank.getNumNativeRegUnits(); i != e; ++i) {

939         ArrayRef<const CodeGenRegister*> Roots = RegBank.getRegUnit(i).getRoots();

940         assert(!Roots.empty() && "All regunits must have a root register.");

941         assert(Roots.size() <= 2 && "More than two roots not supported yet.");

942         OS << "  { " << getQualifiedName(Roots.front()->TheDef);

943         for (unsigned r = 1; r != Roots.size(); ++r)

944           OS << ", " << getQualifiedName(Roots[r]->TheDef);

945         OS << " },\n";

946       }

947       OS << "};\n\n";

948    

949       const auto &RegisterClasses = RegBank.getRegClasses();

950    

951       // Loop over all of the register classes... emitting each one.

952       OS << "namespace {     // Register classes...\n";

953    

954       SequenceToOffsetTable<std::string> RegClassStrings;

955    

956       // Emit the register enum value arrays for each RegisterClass

957       for (const auto &RC : RegisterClasses) {

958         ArrayRef<Record*> Order = RC.getOrder();

959    

960         // Give the register class a legal C name if it's anonymous.

961         std::string Name = RC.getName();

962    

963         RegClassStrings.add(Name);

964    

965         // Emit the register list now.

966         OS << "  // " << Name << " Register Class...\n"

967            << "  const MCPhysReg " << Name

968            << "[] = {\n    ";

969         for (unsigned i = 0, e = Order.size(); i != e; ++i) {

970           Record *Reg = Order[i];

971           OS << getQualifiedName(Reg) << ", ";

972         }

973         OS << "\n  };\n\n";

974    

975         OS << "  // " << Name << " Bit set.\n"

976            << "  const uint8_t " << Name

977            << "Bits[] = {\n    ";

978         BitVectorEmitter BVE;

979         for (unsigned i = 0, e = Order.size(); i != e; ++i) {

980           Record *Reg = Order[i];

981           BVE.add(Target.getRegBank().getReg(Reg)->EnumValue);

982         }

983         BVE.print(OS);

984         OS << "\n  };\n\n";

985    

986       }

987       OS << "}\n\n";

988    

989       RegClassStrings.layout();

990       OS << "extern const char " << TargetName << "RegClassStrings[] = {\n";

991       RegClassStrings.emit(OS, printChar);

992       OS << "};\n\n";

差分编码表通过SequenceToOffsetTable的emit()方法输出。注意117行的参数Term,它在127行追加到每行行末。

115       void emit(raw_ostream &OS,

116                 void (*Print)(raw_ostream&, ElemT),

117                 const char *Term = "0") const {

118         assert(Entries && "Call layout() before emit()");

119         for (typename SeqMap::const_iterator I = Seqs.begin(), E = Seqs.end();

120              I != E; ++I) {

121           OS << "  /* " << I->second << " */ ";

122          for (typename SeqT::const_iterator SI = I->first.begin(),

123                  SE = I->first.end(); SI != SE; ++SI) {

124             Print(OS, *SI);

125             OS << ", ";

126           }

127           OS << Term << ",\n";

128         }

129       }

X96目标机器的寄存器编码差分表的内容如下。它包含了这几部分内容:子寄存器、上级寄存器、寄存器单元列表。

extern const MCPhysReg X86RegDiffLists[] = {

  /* 0 */ 0, 1, 0,

  /* 3 */ 2, 1, 0,

  /* 6 */ 5, 1, 0,

  /* 9 */ 65522, 16, 1, 0,

  /* 13 */ 65522, 17, 1, 0,

  /* 17 */ 65427, 1, 0,

  /* 20 */ 65475, 1, 0,

  /* 23 */ 65520, 65522, 1, 0,

  /* 27 */ 65520, 65527, 1, 0,

  /* 31 */ 8, 2, 0,

  /* 34 */ 4, 0,

  /* 36 */ 65521, 8, 0,

  /* 39 */ 9, 0,

  /* 41 */ 13, 0,

  /* 43 */ 65535, 65519, 14, 0,

  /* 47 */ 65535, 65520, 14, 0,

  /* 51 */ 65528, 15, 0,

  /* 54 */ 2, 6, 16, 0,

  /* 58 */ 5, 6, 16, 0,

  /* 62 */ 65535, 9, 16, 0,

  /* 66 */ 2, 10, 16, 0,

  /* 70 */ 3, 10, 16, 0,

  /* 74 */ 3, 13, 16, 0,

  /* 78 */ 4, 13, 16, 0,

  /* 82 */ 65535, 14, 16, 0,

  /* 86 */ 1, 16, 16, 0,

  /* 90 */ 2, 16, 16, 0,

  /* 94 */ 17, 0,

  /* 96 */ 32, 32, 0,

  /* 99 */ 65221, 0,

  /* 101 */ 65381, 0,

  /* 103 */ 65389, 0,

  /* 105 */ 65397, 0,

  /* 107 */ 16, 65528, 65416, 0,

  /* 111 */ 65445, 0,

  /* 113 */ 65477, 0,

  /* 115 */ 65504, 65504, 0,

  /* 118 */ 65509, 0,

  /* 120 */ 120, 8, 65520, 0,

  /* 124 */ 65523, 0,

  /* 126 */ 65530, 0,

  /* 128 */ 65531, 0,

  /* 130 */ 65532, 0,

  /* 132 */ 65520, 65530, 65534, 65533, 0,

  /* 137 */ 65534, 0,

  /* 139 */ 65520, 65523, 65533, 65535, 0,

  /* 144 */ 65520, 65526, 65534, 65535, 0,

  /* 149 */ 65520, 65520, 65535, 65535, 0,     <-- v7.0298

};

注意,这些序列都是以0结尾,因此利用这个差分表的序列不能出现重复的元素(因为重复元素的差分是0)。另外该数组的类型是uint16_t,因此数组内超过32767的实际上都是负数。

X86目标机器的寄存器Lane掩码差分表则包含如下内容:

extern const unsigned X86LaneMaskLists[] = {

  /* 0 */ 0x00000000, ~0u,

  /* 2 */ 0x00000002, 0x00000001, ~0u,

  /* 5 */ 0x00000003, ~0u,

  /* 7 */ 0x00000004, ~0u,

};

因为v7.0使用LaneBitmask替代unsigned,所以X86LaneMaskLists的输出是:

extern const LaneBitmask X86LaneMaskLists[] = {

  /* 0 */ LaneBitmask(0x00000000), LaneBitmask::getAll(),

  /* 2 */ LaneBitmask(0x00000002), LaneBitmask(0x00000001), LaneBitmask::getAll(),

  /* 5 */ LaneBitmask(0x00000001), LaneBitmask(0x00000004), LaneBitmask::getAll(),

  /* 8 */ LaneBitmask(0x00000002), LaneBitmask(0x00000001), LaneBitmask(0x00000008), LaneBitmask::getAll(),

  /* 12 */ LaneBitmask(0x00000001), LaneBitmask(0x00000004), LaneBitmask(0x00000008), LaneBitmask::getAll(),

  /* 16 */ LaneBitmask(0x00000007), LaneBitmask(0x00000008), LaneBitmask::getAll(),

  /* 19 */ LaneBitmask(0x00000010), LaneBitmask::getAll(),

};

使用LaneBitmask::getAll()作为差分序列的结尾。

因为存在差分0,这些序列以~0u结尾。差分表的内容总是最长的序列,在序列重复后缀多的情形下,这个表就会很小,比如这个Lane掩码差分表。

第三张差分表是寄存器索引表。

extern const uint16_t X86SubRegIdxLists[] = {

  /* 0 */ 4, 3, 1, 0,

  /* 4 */ 4, 3, 1, 2, 0,

  /* 9 */ 4, 3, 0,

  /* 12 */ 6, 5, 0,    <-- v7.022

};

X86目标机器的寄存器相当有规律,因此这张表也是很小的(上面的Lane掩码也是这个原因)。下一张表给出则是所有寄存器索引的位置与大小。注意!这张表不是差分表,其中第一项是保留不用的。其他项中第1个数字是该索引对应子寄存器的偏移,第2个数字则是该子寄存器的大小。

extern const MCRegisterInfo::SubRegCoveredBits X86SubRegIdxRanges[] = {

  { 65535, 65535 },

  { 0, 8 },  // sub_8bit

  { 8, 8 },  // sub_8bit_hi

  { 8, 8 },  // sub_8bit_hi_phony    <-- v7.0增加

  { 0, 16 }, // sub_16bit

  { 16, 16 },      // sub_16bit_hi    <-- v7.0增加

  { 0, 32 }, // sub_32bit

  { 0, 128 },      // sub_xmm

  { 0, 256 },      // sub_ymm

};

可见X86目标机器使用的寄存器索引并不多。

最后的表X86RegStrings也是一张差分表,内容是组成寄存器名字的字符(一共219行):

extern const char X86RegStrings[] = {

  /* 0 */ 'X', 'M', 'M', '1', '0', 0,

  /* 6 */ 'Y', 'M', 'M', '1', '0', 0,

  /* 12 */ 'Z', 'M', 'M', '1', '0', 0,

  /* 18 */ 'C', 'R', '1', '0', 0,

  …

  /* 1051 */ 'R', 'I', 'Z', 0,

};

现在对寄存器的各种描述变成了对这些差分表内容的援引,类MCRegisterDesc就是这样的一个定义:

105     struct MCRegisterDesc {

106       uint32_t Name;      // Printable name for the reg (for debugging)

107       uint32_t SubRegs;   // Sub-register set, described above

108       uint32_t SuperRegs; // Super-register set, described above

109    

110       // Offset into MCRI::SubRegIndices of a list of sub-register indices for each

111       // sub-register in SubRegs.

112       uint32_t SubRegIndices;

113    

114       // RegUnits - Points to the list of register units. The low 4 bits holds the

115       // Scale, the high bits hold an offset into DiffLists. See MCRegUnitIterator.

116       uint32_t RegUnits;

117    

118       /// Index into list with lane mask sequences. The sequence contains a lanemask

119       /// for every register unit.

120       uint16_t RegUnitLaneMasks;

121     };

每一个寄存器(包含子寄存器)都对应一个MCRegisterDesc对象,这个对象给出在上述表中的偏移,根据这些偏移就可以获取这个寄存器的相关信息。

由SequenceToOffsetTable::get()方法向每个MCRegisterDesc实例给出这些偏移值。

105       unsigned get(const SeqT &Seq) const {

106         assert(Entries && "Call layout() before get()");

107         typename SeqMap::const_iterator I = Seqs.lower_bound(Seq);

108         assert(I != Seqs.end() && isSuffix(Seq, I->first) &&

109                "get() called with sequence that wasn't added first");

100         return I->second + (I->first.size() - Seq.size());

101       }

因为SequenceToOffsetTable的编码方式是保留最长序列,较短但是最长序列后缀的序列由最长序列来表示,因此104行的I->first.size() - Seq.size()是使用最长序列所需要的必要的调整。因此会导出这样的数组。第一行是保留不用的,因为这个数组的下标实际上是寄存器的EnumValue(从1开始)。这张表的有效项有245个(v7.0277)。

extern const MCRegisterDesc X86RegDesc[] = { // Descriptors

  { 5, 0, 0, 0, 0, 0 },

  { 870, 2, 90, 3, 2273, 0 },

  { 898, 2, 86, 3, 2273, 0 },

  { 1016, 151, 87, 6, 0, 2 },

  …

  { 997, 122, 108, 2, 1617, 3 },

};

以第二行为例。X86RegStrings的偏移870是“AH”。X86RegDiffLists的偏移2是终结符,表示没有子寄存器。X86RegDiffLists的偏移90是:2, 16, 16, 0。还原差分后,得到的寄存器EnumValue是1+2=3 (AX),1+2+16=19 (EAX),1+2+16+16=35 (RAX)。X86SubRegIdxLists的偏移3也是终结符。2273除以16后得142余1(起始值是1*EnumValue),142仍然是X86RegDiffLists的偏移,对应65535,由RegUnit-1*EnumValue得到,因此RegUnit=0。最后的0是在X86LaneMaskLists的偏移。

在第三行,898对应“AL”。2对应没有子寄存器。86得到寄存器2+1=3 (AX),2+1+16=19 (EAX),2+1+16+16=35 (RAX)。X86SubRegIdxLists的偏移3也是终结符。第5个数仍然是2273,但RegUnit-1*EnumValue中EnumValue=2 ,所以RegUnit=1。最后的0是在X86LaneMaskLists的偏移。

在第四行,1016指向X86RegStrings中的“AX”,151在X86RegDiffLists对应序列:65535, 65535。得到寄存器的EnumValue:3-1=2 (AL),3-1-1=1 (AH)。87对应X86RegDiffLists序列:16, 16。得到寄存器的EnumValue:3+16=19 (EAX),3+16+16=35 (RAX)。6对应X86SubRegIdxLists序列:1, 2。即子寄存器索引的编号。0对应X86RegDiffLists序列:0, 1。起始值为0(0*EnumValue),则第一个RegUnit是0,第二个RegUnit是1。

这里2~4行正好描述了两个子寄存器与包含它们的上级寄存器。

在代码生成时,MCRegisterInfo需要这样解读X86RegDesc,这些操作由迭代器MCSubRegIterator,MCSubRegIndexIterator,MCSuperRegIterator,MCRegUnitIterator,MCRegUnitMaskIterator,MCRegUnitRootIterator,MCRegAliasIterator实现。

接下来输出描述每个RegUnit所在DAG跟的数组,当前版本LLVM不支持超过1个根的DAG(但更早的版本是支持的)。对X86目标机器,这个数组包含100个项。

extern const MCPhysReg X86RegUnitRoots[][2] = {

  { X86::AH },

  { X86::AL },

  { X86::BH },

  { X86::BL },

  { X86::BPL },

   …

  { X86::XMM30 },

  { X86::XMM31 },

};

接着,输出描述各个寄存器类别的数组。RegisterClass中的Order容器规定了这个类别中寄存器的分配(使用)次序。下面的GR8是其中一个例子:

  const uint16_t GR8[] = {

    X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH, X86::SIL, X86::DIL, X86::BPL, X86::SPL, X86::R8B, X86::R9B, X86::R10B, X86::R11B, X86::R14B, X86::R15B, X86::R12B, X86::R13B,

  };

这些数组封装在一个匿名名字空间内。X86::AL等都是由前面的RegisterInfoEmitter::runEnums()输出的代表寄存器ID的枚举常量(CodeGenRegister的EnumValue)。

另外还有一个数组,这个数组也是定义了寄存器类中寄存器的分配(使用)顺序,不过它是这样的格式:将数组视为连续的内存,第一个元素位于低址,最后的元素位于内存高址。对某一寄存器,将该数组第EnumValue/8字节的第EnumValue%8比特置为1:

  const uint8_t GR8Bits[] = {

    0xb6, 0xa6, 0x01, 0x00, 0x00, 0x40, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x3f,

  };

以第一个字节为例,它的二进制形式是:10110110 (0xb6),代表寄存器X86::AH,X86::AL,X86::BL,X86::BH,X86::BPL(EnumValue分别是1,2,4,5,7)。

每个寄存器类别都会输出这样两个数组。

跟着输出的是保存了寄存器类名字的字符数组X86RegClassStrings。

extern const char X86RegClassStrings[] = {

  /* 0 */ 'R', 'F', 'P', '8', '0', 0,

  /* 6 */ 'V', 'K', '1', 0,

  /* 10 */ 'V', 'R', '5', '1', '2', 0,

  /* 16 */ 'V', 'K', '3', '2', 0,

  …

  /* 912 */ 'G', 'R', '6', '4', '_', 'w', 'i', 't', 'h', '_', 's', 'u', 'b', '_', '8', 'b', 'i', 't', 0,

};

RegisterInfoEmitter::runMCDesc(续)

994       OS << "extern const MCRegisterClass " << TargetName

995          << "MCRegisterClasses[] = {\n";

996    

997       for (const auto &RC : RegisterClasses) {

998         // Asserts to make sure values will fit in table assuming types from

999         // MCRegisterInfo.h

1000      assert((RC.SpillSize/8) <= 0xffff && "SpillSize too large.");

1001      assert((RC.SpillAlignment/8) <= 0xffff && "SpillAlignment too large.");

1002      assert(RC.CopyCost >= -128 && RC.CopyCost <= 127 && "Copy cost too large.");

1003 

1004      OS << "  { " << RC.getName() << ", " << RC.getName() << "Bits, "

1005         << RegClassStrings.get(RC.getName()) << ", "

1006         << RC.getOrder().size() << ", sizeof(" << RC.getName() << "Bits), "

1007         << RC.getQualifiedName() + "RegClassID" << ", "

1008         << RC.SpillSize/8 << ", "

1009         << RC.SpillAlignment/8 << ", "

1010         << RC.CopyCost << ", "

1011         << RC.Allocatable << " },\n";

1012    }

1013 

1014    OS << "};\n\n";

1015 

1016    EmitRegMappingTables(OS, Regs, false);

1017 

1018    // Emit Reg encoding table

1019    OS << "extern const uint16_t " << TargetName;

1020    OS << "RegEncodingTable[] = {\n";

1021    // Add entry for NoRegister

1022    OS << "  0,\n";

1023    for (const auto &RE : Regs) {

1024      Record *Reg = RE.TheDef;

1025      BitsInit *BI = Reg->getValueAsBitsInit("HWEncoding");

1026      uint64_t Value = 0;

1027      for (unsigned b = 0, be = BI->getNumBits(); b != be; ++b) {

1028        if (BitInit *B = dyn_cast<BitInit>(BI->getBit(b)))

1029          Value |= (uint64_t)B->getValue() << b;

1030      }

1031      OS << "  " << Value << ",\n";

1032    }

1033    OS << "};\n";       // End of HW encoding table

1034 

1035    // MCRegisterInfo initialization routine.

1036    OS << "static inline void Init" << TargetName

1037       << "MCRegisterInfo(MCRegisterInfo *RI, unsigned RA, "

1038       << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0, unsigned PC = 0) "

1039          "{\n"

1040       << "  RI->InitMCRegisterInfo(" << TargetName << "RegDesc, "

1041       << Regs.size() + 1 << ", RA, PC, " << TargetName << "MCRegisterClasses, "

1042       << RegisterClasses.size() << ", " << TargetName << "RegUnitRoots, "

1043       << RegBank.getNumNativeRegUnits() << ", " << TargetName << "RegDiffLists, "

1044       << TargetName << "LaneMaskLists, " << TargetName << "RegStrings, "

1045       << TargetName << "RegClassStrings, " << TargetName << "SubRegIdxLists, "

1046       << (std::distance(SubRegIndices.begin(), SubRegIndices.end()) + 1) << ",\n"

1047       << TargetName << "SubRegIdxRanges, " << TargetName

1048       << "RegEncodingTable);\n\n";

1049 

1050    EmitRegMapping(OS, Regs, false);

1051 

1052    OS << "}\n\n";

1053 

1054    OS << "} // End llvm namespace\n";

1055    OS << "#endif // GET_REGINFO_MC_DESC\n\n";

1056  }

997行循环输出的寄存器类描述数组大致上是这样:

extern const MCRegisterClass X86MCRegisterClasses[] = {

  { GR8, GR8Bits, 130, 20, sizeof(GR8Bits), X86::GR8RegClassID, 1, 1, 1, 1 },

  { GR8_NOREX, GR8_NOREXBits, 902, 8, sizeof(GR8_NOREXBits), X86::GR8_NOREXRegClassID, 1, 1, 1, 1 },

  { VK1, VK1Bits, 6, 8, sizeof(VK1Bits), X86::VK1RegClassID, 1, 1, 1, 1 },

   …

  { VR512_with_sub_xmm_in_FR32, VR512_with_sub_xmm_in_FR32Bits, 27, 16, sizeof (VR512_with_sub_xmm_in_FR32Bits), X86::VR512_with_sub_xmm_in_FR32RegClassID, 64, 64, 1, 1 },

};

GR8与GR8Bits都是前面842~874行输出的数组,GR8RegClassID等则是由RegisterInfoEmitter:: runEnums()输出的枚举常量。每组中第三个输出的数字是RegClassStrings差分表中的偏移。

V7.0中修改了MCRegisterClass的定义:

class MCRegisterClass {

public:

  using iterator = const MCPhysReg*;

  using const_iterator = const MCPhysReg*;

 

  const iterator RegsBegin;

  const uint8_t *const RegSet;

  const uint32_t NameIdx;

  const uint16_t RegsSize;

  const uint16_t RegSetSize;

  const uint16_t ID;

  const uint16_t PhysRegSize;

  const int8_t CopyCost;

  const bool Allocatable;

因此输出的X86MCRegisterClasses略有不同,这里不再列出。

3.3.6.2.4. 与GCC/GDB编号间的映射

在TableGen的Register定义中,存在一个DwarfNumbers域(list<int>类型),不过这没有不使用。TableGen另外定义了一个DwarfRegNum类,它也提供了一个DwarfNumbers域(list<int>类型),以实现从llvm的寄存器编号到gcc/gdb寄存器编号的映射。需要使用这个功能的寄存器需要同时从Register及DwarfRegNum派生。

这些映射表由RegisterInfoEmitter::EmitRegMappingTables输出。

320     void RegisterInfoEmitter::EmitRegMappingTables(

321         raw_ostream &OS, const std::deque<CodeGenRegister> &Regs, bool isCtor) {

322       // Collect all information about dwarf register numbers

323       typedef std::map<Record*, std::vector<int64_t>, LessRecordRegister> DwarfRegNumsMapTy;

324       DwarfRegNumsMapTy DwarfRegNums;

325    

326       // First, just pull all provided information to the map

327       unsigned maxLength = 0;

328       for (auto &RE : Regs) {

329         Record *Reg = RE.TheDef;

330         std::vector<int64_t> RegNums = Reg->getValueAsListOfInts("DwarfNumbers");

331         maxLength = std::max((size_t)maxLength, RegNums.size());

332         if (DwarfRegNums.count(Reg))

333           PrintWarning(Reg->getLoc(), Twine("DWARF numbers for register ") +

334                        getQualifiedName(Reg) + "specified multiple times");

335         DwarfRegNums[Reg] = RegNums;

336       }

337    

338       if (!maxLength)

339         return;

340    

341       // Now we know maximal length of number list. Append -1's, where needed

342       for (DwarfRegNumsMapTy::iterator

343            I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I)

344         for (unsigned i = I->second.size(), e = maxLength; i != e; ++i)

345           I->second.push_back(-1);

346    

347       std::string Namespace = Regs.front().TheDef->getValueAsString("Namespace");

348    

349       OS << "// " << Namespace << " Dwarf<->LLVM register mappings.\n";

350    

351       // Emit reverse information about the dwarf register numbers.

352       for (unsigned j = 0; j < 2; ++j) {

353         for (unsigned i = 0, e = maxLength; i != e; ++i) {

354           OS << "extern const MCRegisterInfo::DwarfLLVMRegPair " << Namespace;

355           OS << (j == 0 ? "DwarfFlavour" : "EHFlavour");

356           OS << i << "Dwarf2L[]";

357    

358           if (!isCtor) {

359             OS << " = {\n";

360    

361             // Store the mapping sorted by the LLVM reg num so lookup can be done

362             // with a binary search.

363             std::map<uint64_t, Record*> Dwarf2LMap;

364             for (DwarfRegNumsMapTy::iterator

365                    I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {

366               int DwarfRegNo = I->second[i];

367               if (DwarfRegNo < 0)

368                 continue;

369               Dwarf2LMap[DwarfRegNo] = I->first;

370             }

371    

372             for (std::map<uint64_t, Record*>::iterator

373                    I = Dwarf2LMap.begin(), E = Dwarf2LMap.end(); I != E; ++I)

374               OS << "  { " << I->first << "U, " << getQualifiedName(I->second)

375                  << " },\n";

376    

377             OS << "};\n";

378           } else {

379             OS << ";\n";

380           }

381    

382           // We have to store the size in a const global, it's used in multiple

383           // places.

384           OS << "extern const unsigned " << Namespace

385              << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i << "Dwarf2LSize";

386           if (!isCtor)

387             OS << " = array_lengthof(" << Namespace

388                << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i

389                << "Dwarf2L);\n\n";

390           else

391             OS << ";\n\n";

392         }

393       }

394    

395       for (auto &RE : Regs) {

396         Record *Reg = RE.TheDef;

397         const RecordVal *V = Reg->getValue("DwarfAlias");

398         if (!V || !V->getValue())

399           continue;

400    

401         DefInit *DI = cast<DefInit>(V->getValue());

402         Record *Alias = DI->getDef();

403         DwarfRegNums[Reg] = DwarfRegNums[Alias];

404       }

405    

406       // Emit information about the dwarf register numbers.

407       for (unsigned j = 0; j < 2; ++j) {

408         for (unsigned i = 0, e = maxLength; i != e; ++i) {

409           OS << "extern const MCRegisterInfo::DwarfLLVMRegPair " << Namespace;

410           OS << (j == 0 ? "DwarfFlavour" : "EHFlavour");

411           OS << i << "L2Dwarf[]";

412           if (!isCtor) {

413             OS << " = {\n";

414             // Store the mapping sorted by the Dwarf reg num so lookup can be done

415             // with a binary search.

416             for (DwarfRegNumsMapTy::iterator

417                    I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {

418               int RegNo = I->second[i];

419               if (RegNo == -1) // -1 is the default value, don't emit a mapping.

420                 continue;

421    

422               OS << "  { " << getQualifiedName(I->first) << ", " << RegNo

423                  << "U },\n";

424             }

425             OS << "};\n";

426           } else {

427             OS << ";\n";

428           }

429    

430           // We have to store the size in a const global, it's used in multiple

431           // places.

432           OS << "extern const unsigned " << Namespace

433              << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i << "L2DwarfSize";

434           if (!isCtor)

435             OS << " = array_lengthof(" << Namespace

436                << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i << "L2Dwarf);\n\n";

437           else

438             OS << ";\n\n";

439         }

440       }

441     }

328行循环获取寄存器的DwarfNumbers列表保存入DwarfRegNums容器,获取DwarfNumbers列表的最大尺寸。在目前的LLVM中,对于X86机器,这个列表对应3种Dwarf变种。第一列是X86_64,第二列是X86_32_DarwinEH,第三列是X86_32_Generic。在342行的循环将DwarfRegNums所有的元素都调整为相同大小,多出部分由-1填充(-1表示gcc的编号未定义)。

352行循环对X86机器输出数组X86DwarfFlavour0Dwarf2L,X86DwarfFlavour1Dwarf2L,X86DwarfFlavour2Dwarf2L,X86EHFlavour0Dwarf2L,X86EHFlavour1Dwarf2L及X86EHFlavour2Dwarf2L(其中*EHFlavour*数组的使用对象是Darwin EH系统,它与对应的*DwarfFlavour*数组内容没有差别),它们的类型都是DwarfLLVMRegPair,实现从dwarf到llvm的映射。DwarfLLVMRegPair的定义是:

141       struct DwarfLLVMRegPair {

142         unsigned FromReg;

143         unsigned ToReg;

144    

145         bool operator<(DwarfLLVMRegPair RHS) const { return FromReg < RHS.FromReg; }

146       };

358行的isCtor是EmitRegMappingTables()的最后一个参数,在这里的调用上下文里是false,表示不是为目标机器的TargetGenRegisterInfo构造函数生成代码。363行的Dwarf2LMap与DwarfRegNums相反,它把Dwarf数(由i下标指定)映射到LLVM的寄存器编号,注释说这样可以进行二分查找。接着在372行的循环输出Dwarf2LMap的内容。输出的数组大致是这个样子:

extern const MCRegisterInfo::DwarfLLVMRegPair X86DwarfFlavour1Dwarf2L[] = {

  { 0U, X86::EAX },

  { 1U, X86::ECX },

  { 2U, X86::EDX },

   …

  { 36U, X86::MM7 },

};

384~391行则输出一个全局常量,其值是对应数组的大小。比如:

extern const unsigned X86DwarfFlavour1Dwarf2LSize = array_lengthof(X86DwarfFlavour1Dwarf2L);

接下来,DwarfAlias提供了一个方式,使得两个Register可以共享相同的dwarf编号。需要的寄存器必须从DwarfAlias派生,并指定共享的寄存器(由这个寄存器来提供DwarfNumbers定义,参考下面的403行)。

407行的循环输出另一个方向的映射数组(从LLVM到DWARF)。类似地它会输出数组X86DwarfFlavour0L2Dwarf,X86DwarfFlavour1L2Dwarf,X86DwarfFlavour2L2Dwarf,X86EHFlavour0L2Dwarf,X86EHFlavour1L2Dwarf及X86EHFlavour2L2Dwarf。同样,431~439行会输出一个全局常量,值是对应数组的大小。

注意,在这些数组中,-1是不输出内容的,因为它表示gcc的编号没有定义。但会输出-2,像这样(-2表示这个寄存器编号对于该模式/变种是无效的):

extern const MCRegisterInfo::DwarfLLVMRegPair X86EHFlavour0L2Dwarf[] = {

  { X86::EAX, -2U },

  { X86::EBP, -2U },

  { X86::EBX, -2U },

    …

  { X86::ZMM31, 75U },

};

回到RegisterInfoEmitter::runMCDesc()。跟着是输出寄存器在目标机器上的硬件编码表。1025行的HWEncoding就是在TD文件里定义寄存器必须要填写的域。这个数组的内容是小端序的(当前版本有246项,每项对应一个寄存器):

extern const uint16_t X86RegEncodingTable[] = {

  0,

  4,

  0,

  …

  15,

};

这些数值在汇编指令中用于指定寄存器(目前数组中最大的值是31)。

1036~1048行输出函数InitX86MCRegisterInfo()的定义的第一部分。

static inline void InitX86MCRegisterInfo(MCRegisterInfo *RI, unsigned RA, unsigned DwarfFlavour = 0, unsigned EHFlavour = 0, unsigned PC = 0) {

  RI->InitMCRegisterInfo(X86RegDesc, 161, RA, PC, X86MCRegisterClasses, 59, X86RegUnitRoots, 87, X86RegDiffLists, X86RegStrings, X86SubRegIdxLists, 6,

  X86RegEncodingTable);

InitMCRegisterInfo()是平台通用的初始化函数,它将目标机器所要使用的寄存器描述以及差分表保存到相应的MCRegisterInfo(基类)对象中。InitX86MCRegisterInfo()在其基础上进行额外的初始化工作,这部分代码由下面的函数输出。

443     void RegisterInfoEmitter::EmitRegMapping(

444         raw_ostream &OS, const std::deque<CodeGenRegister> &Regs, bool isCtor) {

445       // Emit the initializer so the tables from EmitRegMappingTables get wired up

446       // to the MCRegisterInfo object.

447       unsigned maxLength = 0;

448       for (auto &RE : Regs) {

449         Record *Reg = RE.TheDef;

450         maxLength = std::max((size_t)maxLength,

451                              Reg->getValueAsListOfInts("DwarfNumbers").size());

452       }

453    

454       if (!maxLength)

455         return;

456    

457       std::string Namespace = Regs.front().TheDef->getValueAsString("Namespace");

458    

459       // Emit reverse information about the dwarf register numbers.

460       for (unsigned j = 0; j < 2; ++j) {

461         OS << "  switch (";

462         if (j == 0)

463           OS << "DwarfFlavour";

464         else

465           OS << "EHFlavour";

466         OS << ") {\n"

467          << "  default:\n"

468          << "    llvm_unreachable(\"Unknown DWARF flavour\");\n";

469    

470         for (unsigned i = 0, e = maxLength; i != e; ++i) {

471           OS << "  case " << i << ":\n";

472           OS << "    ";

473           if (!isCtor)

474             OS << "RI->";

475           std::string Tmp;

476           raw_string_ostream(Tmp) << Namespace

477                                   << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i

478                                   << "Dwarf2L";

479           OS << "mapDwarfRegsToLLVMRegs(" << Tmp << ", " << Tmp << "Size, ";

480           if (j == 0)

481               OS << "false";

482             else

483               OS << "true";

484           OS << ");\n";

485           OS << "    break;\n";

486         }

487         OS << "  }\n";

488       }

489    

490       // Emit information about the dwarf register numbers.

491       for (unsigned j = 0; j < 2; ++j) {

492         OS << "  switch (";

493         if (j == 0)

494           OS << "DwarfFlavour";

495         else

496           OS << "EHFlavour";

497         OS << ") {\n"

498            << "  default:\n"

499            << "    llvm_unreachable(\"Unknown DWARF flavour\");\n";

500    

501         for (unsigned i = 0, e = maxLength; i != e; ++i) {

502           OS << "  case " << i << ":\n";

503           OS << "    ";

504           if (!isCtor)

505             OS << "RI->";

506           std::string Tmp;

507           raw_string_ostream(Tmp) << Namespace

508                                   << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i

509                                   << "L2Dwarf";

510           OS << "mapLLVMRegsToDwarfRegs(" << Tmp << ", " << Tmp << "Size, ";

511           if (j == 0)

512               OS << "false";

513             else

514               OS << "true";

515           OS << ");\n";

516           OS << "    break;\n";

517         }

518         OS << "  }\n";

519       }

520     }

输出的将是两组switch case语句,分别根据DwarfFlavour来调用方法mapDwarfRegsToLLVMRegs()或者mapLLVMRegsToDwarfRegs(),把上述的映射表关联到MCRegisterInfo对象相应的成员。

​​​​​​​3.3.6.3. X86GenRegisterInfo的定义

CodeGen比MC的层次要高,MCRegisterInfo的信息是不足够的,因此从MCRegisterInfo派生了TargetRegisterInfo。目标机器需要进一步从TargetRegisterInfo派生出自己的定义,比如X86机器的X86GenRegisterInfo。RegisterInfoEmitter::run()方法接着输出类X86GenRegisterInfo的定义(对X86机器而言)。

1058  void

1059  RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,

1060                                       CodeGenRegBank &RegBank) {

1061    emitSourceFileHeader("Register Information Header Fragment", OS);

1062 

1063    OS << "\n#ifdef GET_REGINFO_HEADER\n";

1064    OS << "#undef GET_REGINFO_HEADER\n";

1065 

1066    const std::string &TargetName = Target.getName();

1067    std::string ClassName = TargetName + "GenRegisterInfo";

1068 

1069    OS << "#include \"llvm/Target/TargetRegisterInfo.h\"\n\n";

1070 

1071    OS << "namespace llvm {\n\n";

1072 

1073    OS << "struct " << ClassName << " : public TargetRegisterInfo {\n"

1074       << "  explicit " << ClassName

1075       << "(unsigned RA, unsigned D = 0, unsigned E = 0, unsigned PC = 0);\n"

1076       << "  bool needsStackRealignment(const MachineFunction &) const override\n"

1077       << "     { return false; }\n";

1078    if (!RegBank.getSubRegIndices().empty()) {

1079      OS << "  unsigned composeSubRegIndicesImpl"

1080         << "(unsigned, unsigned) const override;\n"

1081         << "  unsigned composeSubRegIndexLaneMaskImpl"

1082         << "(unsigned, unsigned) const override;\n"

1083         << "  const TargetRegisterClass *getSubClassWithSubReg"

1084         << "(const TargetRegisterClass*, unsigned) const override;\n";

1085    }

1086    OS << "  const RegClassWeight &getRegClassWeight("

1087       << "const TargetRegisterClass *RC) const override;\n"

1088       << "  unsigned getRegUnitWeight(unsigned RegUnit) const override;\n"

1089       << "  unsigned getNumRegPressureSets() const override;\n"

1090       << "  const char *getRegPressureSetName(unsigned Idx) const override;\n"

1091       << "  unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned "

1092          "Idx) const override;\n"

1093       << "  const int *getRegClassPressureSets("

1094       << "const TargetRegisterClass *RC) const override;\n"

1095       << "  const int *getRegUnitPressureSets("

1096       << "unsigned RegUnit) const override;\n"

1097       << "  ArrayRef<const char *> getRegMaskNames() const override;\n"

1098       << "  ArrayRef<const uint32_t *> getRegMasks() const override;\n"

1099       << "};\n\n";

1100 

1101    const auto &RegisterClasses = RegBank.getRegClasses();

1102 

1103    if (!RegisterClasses.empty()) {

1104      OS << "namespace " << RegisterClasses.front().Namespace

1105         << " { // Register classes\n";

1106 

1107      for (const auto &RC : RegisterClasses) {

1108        const std::string &Name = RC.getName();

1109 

1110        // Output the extern for the instance.

1111        OS << "  extern const TargetRegisterClass " << Name << "RegClass;\n";

1112      }

1113      OS << "} // end of namespace " << TargetName << "\n\n";

1114    }

1115    OS << "} // End llvm namespace\n";

1116    OS << "#endif // GET_REGINFO_HEADER\n\n";

1117  }

上面代码输出的X86GenRegisterInfo的定义如下(定义在llvm名字空间下):

struct X86GenRegisterInfo : public TargetRegisterInfo {

  explicit X86GenRegisterInfo(unsigned RA, unsigned D = 0, unsigned E = 0, unsigned PC = 0);

  bool needsStackRealignment(const MachineFunction &) const override

     { return false; }

  unsigned composeSubRegIndicesImpl(unsigned, unsigned) const override;

  unsigned composeSubRegIndexLaneMaskImpl(unsigned, unsigned) const override;

  const TargetRegisterClass *getSubClassWithSubReg(const TargetRegisterClass*, unsigned) const override;

  const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const override;

  unsigned getRegUnitWeight(unsigned RegUnit) const override;

  unsigned getNumRegPressureSets() const override;

  const char *getRegPressureSetName(unsigned Idx) const override;

  unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override;

  const int *getRegClassPressureSets(const TargetRegisterClass *RC) const override;

  const int *getRegUnitPressureSets(unsigned RegUnit) const override;

  ArrayRef<const char *> getRegMaskNames() const override;

  ArrayRef<const uint32_t *> getRegMasks() const override;

};

V7.0则生成这个改进的定义:

struct X86GenRegisterInfo : public TargetRegisterInfo {

  explicit X86GenRegisterInfo(unsigned RA, unsigned D = 0, unsigned E = 0,

      unsigned PC = 0, unsigned HwMode = 0);

  unsigned composeSubRegIndicesImpl(unsigned, unsigned) const override;

  LaneBitmask composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const override;

  LaneBitmask reverseComposeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const override;

  const TargetRegisterClass *getSubClassWithSubReg(const TargetRegisterClass*, unsigned) const override;

  const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const override;

  unsigned getRegUnitWeight(unsigned RegUnit) const override;

  unsigned getNumRegPressureSets() const override;

  const char *getRegPressureSetName(unsigned Idx) const override;

  unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override;

  const int *getRegClassPressureSets(const TargetRegisterClass *RC) const override;

  const int *getRegUnitPressureSets(unsigned RegUnit) const override;

  ArrayRef<const char *> getRegMaskNames() const override;

  ArrayRef<const uint32_t *> getRegMasks() const override;

  /// Devirtualized TargetFrameLowering.

  static const X86FrameLowering *getFrameLowering(

      const MachineFunction &MF);

};

部分虚函数定义将由下面的代码产生(TableGen的远期目标是生成所有这些代码)。

接下来,输出目标机器所支持寄存器类的前向声明。这些声明都定义在目标机器的名字空间下(以X86为例,就是名字空间X86)。

namespace X86 { // Register classes

  extern const TargetRegisterClass GR8RegClass;

  extern const TargetRegisterClass GR8_NOREXRegClass;

  extern const TargetRegisterClass VK1RegClass;

  extern const TargetRegisterClass VK2RegClass;

  …

  extern const TargetRegisterClass VR512_with_sub_xmm_in_FR32RegClass;

} // end of namespace X86

  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值