gem5中O3模式下fetch_impl.hh源代码详细分析

最新推荐文章于 2022-11-26 21:39:47 发布

乾龙_Heron

最新推荐文章于 2022-11-26 21:39:47 发布

阅读量4.5k

点赞数 4

分类专栏：模拟器文章标签： gem5 源码剖析

本文链接：https://blog.csdn.net/qianlong4526888/article/details/70135742

版权

模拟器专栏收录该内容

3 篇文章 1 订阅

订阅专栏

fetch_impl.hh

1. /*

5. *

6. * The license below extends only to copyright in the software and shall

7. * not be construed as granting a license to any other intellectual

8. * property including but not limited to intellectual property relating

9. * to a hardware implementation of the functionality of the software

10. * licensed hereunder. You may use the software subject to the license

11. * terms below provided that you ensure that this notice is replicated

12. * unmodified and in its entirety in all distributions of the software,

13. * modified or unmodified, in source code or in binary form.

14. *

17. *

18. * Redistribution and use in source and binary forms, with or without

19. * modification, are permitted provided that the following conditions are

20. * met: redistributions of source code must retain the above copyright

21. * notice, this list of conditions and the following disclaimer;

22. * redistributions in binary form must reproduce the above copyright

23. * notice, this list of conditions and the following disclaimer in the

24. * documentation and/or other materials provided with the distribution;

25. * neither the name of the copyright holders nor the names of its

26. * contributors may be used to endorse or promote products derived from

27. * this software without specific prior written permission.

28. *

29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

30. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

31. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

32. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

33. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

34. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

35. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

36. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

37. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

38. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

39. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

40. *

41. * Authors: Kevin Lim

42. * Korey Sewell

43. */

44.

45. #ifndef __CPU_O3_FETCH_IMPL_HH__ //添加该宏判断为了防止该文件被多次include导致的声明冲突

46. #define __CPU_O3_FETCH_IMPL_HH__ //添加该宏定义，下次该文件再次被include时，

47. //上面#ifndef判断会不成立，从而避免本文件再次被include

48.

49. #include <algorithm> //c++头文件，包含算法相关的库函数

50. #include <cstring> //c++头文件，包含字符串相关库函数

51. #include <list> //c++头文件，包含容器list相关库函数

52. #include <map> //c++头文件，包含容器map相关的库函数

53. #include <queue> //c++头文件，包含队列相关的库函数

54.

55. #include "arch/isa_traits.hh" //跟指令操作数和译码相关的头文件

56. #include "arch/tlb.hh" //跟TLB相关头文件

57. #include "arch/utility.hh" //包含译码时需要的一些通用函数

58. #include "arch/vtophys.hh" //包含具体的page table walk相关

59. #include "base/random.hh" //包含随机数产生器及其类定义

60. #include "base/types.hh" //包含cycle的类定义

61. #include "config/the_isa.hh" //定义指令集ISA的类型，如ARM_ISA, MIPS_ISA

62. #include "cpu/base.hh" //主要定义BaseCPU类

63. //#include "cpu/checker/cpu.hh"

64. #include "cpu/o3/fetch.hh" //主要定义DefaultFetch类

65. #include "cpu/exetrace.hh" //定义抓取指令执行踪迹的类

66. #include "debug/Activity.hh" //scons自动产生的头文件，声明Activity

67. #include "debug/Drain.hh" //流水线清空头文件，当需要设置checkpoint时会先清空流水线

68. #include "debug/Fetch.hh" //TODO:

69. #include "debug/O3PipeView.hh" //TODO

70. #include "mem/packet.hh" //访存相关头文件。定义与内存请求相关类

71. #include "params/DerivO3CPU.hh" //TODO

72. #include "sim/byteswap.hh" //字节序头文件。定义大小端转换相关类

73. #include "sim/core.hh" //处理器核头文件。定义处理器核外部相关类，例如主频

74. #include "sim/eventq.hh" //事件队列头文件。定义实现时间队列功能的基类

75. #include "sim/full_system.hh" //全系统头文件。包含与启动全系统相关的外部声明

76. #include "sim/system.hh" //系统相关头文件。定义全系统启动相关的类

77. #include "cpu/o3/isa_specific.hh" //指令集相关头文件。包含指令集和乱序执行相关头文件

78.

79. //使用c++的标准命名空间，后续再调用标准库中函数时，不需要指定命名空间std

80. using namespace std;

81.

82. //DefaultFetch模板类的构造函数，同时初始化类中相关定义

83. //TODO:DerivO3CPUParams在哪里定义?

84. template<class Impl>

85. DefaultFetch<Impl>::DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params)

86. : cpu(_cpu),

87. decodeToFetchDelay(params->decodeToFetchDelay),

88. renameToFetchDelay(params->renameToFetchDelay),

89. iewToFetchDelay(params->iewToFetchDelay),

90. commitToFetchDelay(params->commitToFetchDelay),

91. fetchWidth(params->fetchWidth),

92. decodeWidth(params->decodeWidth),

93. retryPkt(NULL),

94. retryTid(InvalidThreadID),

95. cacheBlkSize(cpu->cacheLineSize()),

96. fetchBufferSize(params->fetchBufferSize),

97. fetchBufferMask(fetchBufferSize - 1),

98. fetchQueueSize(params->fetchQueueSize),

99. numThreads(params->numThreads),

100. numFetchingThreads(params->smtNumFetchingThreads),

101. finishTranslationEvent(this)

102. {

103. //编译之前，程序中会指定最多能支持多少个线程，如果在运行时命令行

104. //中指定的线程数超过设定值，则报错

105. if (numThreads > Impl::MaxThreads)

106. fatal("numThreads (%d) is larger than compiled limit (%d),\n"

107. "\tincrease MaxThreads in src/cpu/o3/impl.hh\n",

108. numThreads, static_cast<int>(Impl::MaxThreads));

109. //编译之前，程序中会指定取指宽度是多大，如果在运行时命令行

110. //中指定的取指宽度超过设定值，则报错

111. if (fetchWidth > Impl::MaxWidth)

112. fatal("fetchWidth (%d) is larger than compiled limit (%d),\n"

113. "\tincrease MaxWidth in src/cpu/o3/impl.hh\n",

114. fetchWidth, static_cast<int>(Impl::MaxWidth));

115. //编译之前，程序中会指定取指缓存是多大，如果在运行时命令行

116. //中指定的取指缓存超过设定值，则报错

117. if (fetchBufferSize > cacheBlkSize)

118. fatal("fetch buffer size (%u bytes) is greater than the cache "

119. "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize);

120. //编译之前，程序中会指定cache块的大小，如果在运行时命令行

121. //中指定的cache块大小超过设定值，则报错

122. if (cacheBlkSize % fetchBufferSize)

123. fatal("cache block (%u bytes) is not a multiple of the "

124. "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize);

125.

126. //根据命令行中指定的smt取指策略初始化policy变量

127. std::string policy = params->smtFetchPolicy;

128.

129. // Convert string to lowercase

130. //调用<algrithom>头文件中的库函数，把policy字符串中字母转为小写

131. std::transform(policy.begin(), policy.end(), policy.begin(),

132. (int(*)(int)) tolower);

133.

134. // Figure out fetch policy

135. //根据运行gem5时的命令行参数，确定smt的取指策略

136. if (policy == "singlethread") {

137. fetchPolicy = SingleThread; //单线程模式取指，其他线程不取指

138. if (numThreads > 1)

139. panic("Invalid Fetch Policy for a SMT workload.");

140. } else if (policy == "roundrobin") {

141. fetchPolicy = RoundRobin; //线程间以循环模式取指

142. DPRINTF(Fetch, "Fetch policy set to Round Robin\n");

143. } else if (policy == "branch") {

144. fetchPolicy = Branch; //按照分支指令的数量确定谁来取值

145. DPRINTF(Fetch, "Fetch policy set to Branch Count\n");

146. } else if (policy == "iqcount") {

147. fetchPolicy = IQ; //按照issue queue(发射队列)中的空闲项数确定谁来取值

148. DPRINTF(Fetch, "Fetch policy set to IQ count\n");

149. } else if (policy == "lsqcount") {

150. fetchPolicy = LSQ; //按照load store queue中的空闲项数确定谁来取值

151. DPRINTF(Fetch, "Fetch policy set to LSQ count\n");

152. } else { //无效命令行

153. fatal("Invalid Fetch Policy. Options Are: {SingleThread,"

154. " RoundRobin,LSQcount,IQcount}\n");

155. }

156.

157. // Get the size of an instruction.

158. //Types.hh (src\arch\arm): typedef uint32_t MachInst;

159. //获取机器指令的长度

160. //对于RISC(例如arm)定长指令集该值为4，对于CISC(如x86)这种变长指令集

161. //该值指定的是从fetch传递给decode阶段的数据长度

162. instSize = sizeof(TheISA::MachInst);

163.

164. //对于每一个smt线程初始化fetch阶段用到的数据结构

165. for (int i = 0; i < Impl::MaxThreads; i++) {

166. decoder[i] = NULL; //初始化该线程对应的译码器为空.Decoder.hh (src\arch\arm):class Decoder

167. fetchBuffer[i] = NULL; //被取回并缓存的指令数据初始化为空

168. fetchBufferPC[i] = 0; //被存入fetch buffer的第一条指令pc值初始化为0

169. fetchBufferValid[i] = false; //初始化fetch buffer为无效

170. }

171.

172. branchPred = params->branchPred;//根据参数初始化分支预测器 TODO:是不是命令行中指定?

173.

174. for (ThreadID tid = 0; tid < numThreads; tid++) {

175. //创建Decode类型的对象，为每一个硬件线程的decoder赋值

176. decoder[tid] = new TheISA::Decoder(params->isa[tid]);

177. // Create space to buffer the cache line data,

178. // which may not hold the entire cache line.

179. //创建fetchBuffer用于缓存取回来的cache块，fetchBuffer有可能小于cache块

180. //TODO: cache line/cache 块统一叫法

181. fetchBuffer[tid] = new uint8_t[fetchBufferSize];

182. }

183. }

184.

185. //该函数返回当前流水级的名称，在gem5运行结束的stats.txt中表现为system.cpu.fetch等

186. template <class Impl>

187. std::string

188. DefaultFetch<Impl>::name() const

189. {

190. return cpu->name() + ".fetch";

191. }

192.

193. //创建探测点用于通知其他等待该事件发生的模块

194. template <class Impl>

195. void

196. DefaultFetch<Impl>::regProbePoints()

197. {

198. //创建Fetch探测点，用于通知其他模块动态指令的创建操作已经完成

199. ppFetch = new ProbePointArg<DynInstPtr>(cpu->getProbeManager(), "Fetch");

200. //创建Fetch请求探测点，用于通知其他模块取指请求已经发出

201. ppFetchRequestSent = new ProbePointArg<RequestPtr>(cpu->getProbeManager(),

202. "FetchRequest");

203. }

204.

205. //对fetch.hh中声明的Stats::scalar变量进行初始化

206. //Stats::scalar定义在src/base/statistics.hh中用于记录处理器所有数据

207. //当gem5运行结束输出stats.txt时，以下变量用于显示各种统计数据

208. template <class Impl>

209. void

210. DefaultFetch<Impl>::regStats()

211. {

212. //以下写法等同于:

213. //icacheStallCycles.name();

214. //icacheStallCycles.desc();

215. //icacheStallCycles.prereq();

216. //以上函数name()/desc()/prepreq()都在statistics.hh中定义

217. icacheStallCycles

218. .name(name() + ".icacheStallCycles")

219. .desc("Number of cycles fetch is stalled on an Icache miss")

220. .prereq(icacheStallCycles);

221.

222. fetchedInsts

223. .name(name() + ".Insts")

224. .desc("Number of instructions fetch has processed")

225. .prereq(fetchedInsts);

226.

227. fetchedBranches

228. .name(name() + ".Branches")

229. .desc("Number of branches that fetch encountered")

230. .prereq(fetchedBranches);

231.

232. predictedBranches

233. .name(name() + ".predictedBranches")

234. .desc("Number of branches that fetch has predicted taken")

235. .prereq(predictedBranches);

236.

237. fetchCycles

238. .name(name() + ".Cycles")

239. .desc("Number of cycles fetch has run and was not squashing or"

240. " blocked")

241. .prereq(fetchCycles);

242.

243. fetchSquashCycles

244. .name(name() + ".SquashCycles")

245. .desc("Number of cycles fetch has spent squashing")

246. .prereq(fetchSquashCycles);

247.

248. fetchTlbCycles

249. .name(name() + ".TlbCycles")

250. .desc("Number of cycles fetch has spent waiting for tlb")

251. .prereq(fetchTlbCycles);

252.

253. fetchIdleCycles

254. .name(name() + ".IdleCycles")

255. .desc("Number of cycles fetch was idle")

256. .prereq(fetchIdleCycles);

257.

258. fetchBlockedCycles

259. .name(name() + ".BlockedCycles")

260. .desc("Number of cycles fetch has spent blocked")

261. .prereq(fetchBlockedCycles);

262.

263. fetchedCacheLines

264. .name(name() + ".CacheLines")

265. .desc("Number of cache lines fetched")

266. .prereq(fetchedCacheLines);

267.

268. fetchMiscStallCycles

269. .name(name() + ".MiscStallCycles")

270. .desc("Number of cycles fetch has spent waiting on interrupts, or "

271. "bad addresses, or out of MSHRs")

272. .prereq(fetchMiscStallCycles);

273.

274. fetchPendingDrainCycles

275. .name(name() + ".PendingDrainCycles")

276. .desc("Number of cycles fetch has spent waiting on pipes to drain")

277. .prereq(fetchPendingDrainCycles);

278.

279. fetchNoActiveThreadStallCycles

280. .name(name() + ".NoActiveThreadStallCycles")

281. .desc("Number of stall cycles due to no active thread to fetch from")

282. .prereq(fetchNoActiveThreadStallCycles);

283.

284. fetchPendingTrapStallCycles

285. .name(name() + ".PendingTrapStallCycles")

286. .desc("Number of stall cycles due to pending traps")

287. .prereq(fetchPendingTrapStallCycles);

288.

289. fetchPendingQuiesceStallCycles

290. .name(name() + ".PendingQuiesceStallCycles")

291. .desc("Number of stall cycles due to pending quiesce instructions")

292. .prereq(fetchPendingQuiesceStallCycles);

293.

294. fetchIcacheWaitRetryStallCycles

295. .name(name() + ".IcacheWaitRetryStallCycles")

296. .desc("Number of stall cycles due to full MSHR")

297. .prereq(fetchIcacheWaitRetryStallCycles);

298.

299. fetchIcacheSquashes

300. .name(name() + ".IcacheSquashes")

301. .desc("Number of outstanding Icache misses that were squashed")

302. .prereq(fetchIcacheSquashes);

303.

304. fetchTlbSquashes

305. .name(name() + ".ItlbSquashes")

306. .desc("Number of outstanding ITLB misses that were squashed")

307. .prereq(fetchTlbSquashes);

308.

309. fetchNisnDist

310. .init(/* base value */ 0,

311. /* last value */ fetchWidth,

312. /* bucket size */ 1)

313. .name(name() + ".rateDist")

314. .desc("Number of instructions fetched each cycle (Total)")

315. .flags(Stats::pdf);

316.

317. idleRate

318. .name(name() + ".idleRate")

319. .desc("Percent of cycles fetch was idle")

320. .prereq(idleRate);

321. idleRate = fetchIdleCycles * 100 / cpu->numCycles;

322.

323. branchRate

324. .name(name() + ".branchRate")

325. .desc("Number of branch fetches per cycle")

326. .flags(Stats::total);

327. branchRate = fetchedBranches / cpu->numCycles;

328.

329. fetchRate

330. .name(name() + ".rate")

331. .desc("Number of inst fetches per cycle")

332. .flags(Stats::total);

333. fetchRate = fetchedInsts / cpu->numCycles;

334. }

335.

336.

337. //设置timebuffer，用于接收从前面流水级发送来的信号

338. template<class Impl>

339. void

340. DefaultFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)

341. {

342. timeBuffer = time_buffer;

343.

344. // Create wires to get information from proper places in time buffer.

345. fromDecode = timeBuffer->getWire(-decodeToFetchDelay);

346. fromRename = timeBuffer->getWire(-renameToFetchDelay);

347. fromIEW = timeBuffer->getWire(-iewToFetchDelay);

348. fromCommit = timeBuffer->getWire(-commitToFetchDelay);

349. }

350.

351. //根据传入参数设置活动线程号

352. template<class Impl>

353. void

354. DefaultFetch<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)

355. {

356. activeThreads = at_ptr;

357. }

358.

359. //设置向decode发送数据的信号线

360. template<class Impl>

361. void

362. DefaultFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *ftb_ptr)

363. {

364. // Create wire to write information to proper place in fetch time buf.

365. toDecode = ftb_ptr->getWire(0);

366. }

367.

368. //fetch流水级启动时需要完成的初始化操作

369. template<class Impl>

370. void

371. DefaultFetch<Impl>::startupStage()

372. {

373. assert(priorityList.empty());//断言有硬件线程需要活动

374. resetStage(); //调用函数初始化fetch启动前的各个信号和变量

375.

376. // Fetch needs to start fetching instructions at the very beginning,

377. // so it must start up in active state.

378. //fetch流水级在启动时当前状态还是inactive，所以需要转换当前状态为active

379. switchToActive();

380. }

381.

382. //初始化fetch启动前的各个信号和变量

383. template<class Impl>

384. void

385. DefaultFetch<Impl>::resetStage()

386. {

387. numInst = 0; //初始化取回的指令数

388. interruptPending = false; //初始化

389. cacheBlocked = false; //初始化cache没有被阻塞，可以正常工作

390.

391. priorityList.clear(); //清空硬件线程链表

392.

393. // Setup PC and nextPC with initial state.

394. //逐个线程设置pc和nextPC

395. for (ThreadID tid = 0; tid < numThreads; ++tid) {

396. fetchStatus[tid] = Running; //设置线程状态为running

397. pc[tid] = cpu->pcState(tid); //设置初始pc

398. fetchOffset[tid] = 0; //设置取指偏移量为0

399. macroop[tid] = NULL; //设置宏操作码为空

400.

401. delayedCommit[tid] = false; //设置推迟提交为假

402. memReq[tid] = NULL; //清空访存请求

403.

404. stalls[tid].decode = false; //清空fetch阶段停顿原因

405. stalls[tid].drain = false;

406.

407. fetchBufferPC[tid] = 0; //初始化fetch buffer第一条指令为0

408. fetchBufferValid[tid] = false; //初始化fetch buffer是否有效标志

409.

410. fetchQueue[tid].clear(); //清空取指队列

411.

412. priorityList.push_back(tid); //把当前线程存入硬件线程链表，等待开始取指

413. }

414.

415. wroteToTimeBuffer = false; //初始化不产生timebuffer事件

416. _status = Inactive; //设置fetch阶段的状态为inactive

417. }

418.

419. //发出request并接收到response操作后，继续处理cache访问剩余操作

420. template<class Impl>

421. void

422. DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)

423. {

424. //因为请求包的contextID跟发出该请求包的threadID有一定的关系

425. //所以可以根据发出的请求包的contexID计算出线程id

426. ThreadID tid = cpu->contextToThread(pkt->req->contextId());

427.

428. DPRINTF(Fetch, "[tid:%u] Waking up from cache miss.\n", tid);

429. assert(!cpu->switchedOut()); //断言当前cpu没有被切换出去

430.

431. // Only change the status if it's still waiting on the icache access

432. // to return.

433. //如果当前fetch阶段的状态不是IcacheWaitResponse，或者返回的pkt请求不是

434. //当时发送出去的请求(即不属于当前线程发出的请求)，那么就算某个cache访问的请求已经

435. //处理结束也不能修改cpu状态，所以直接删除当前传入的pkt请求然后返回

436. //只有当fetch状态为IcacheWaitResponse时并且返回的pkt是发出的访存请求时才能继续后续操作

437. if (fetchStatus[tid] != IcacheWaitResponse ||

438. pkt->req != memReq[tid]) {

439. ++fetchIcacheSquashes; //递增因发生squash导致的icache访问次数

440. delete pkt->req; //删除访存请求

441. delete pkt; //删除请求包本身

442. return; //直接返回，不再进行后续唤醒cpu等操作

443. }

444.

445. //如果当前fetch状态为IcacheWaitResponse，把pkt请求包中的数据(这里即指令)拷贝fetchBufferSize大小到

446. //fetchBuffer中并标记fetchBufferValid为真，即数据有效

447. memcpy(fetchBuffer[tid], pkt->getConstPtr<uint8_t>(), fetchBufferSize);

448. fetchBufferValid[tid] = true;

449.

450. // Wake up the CPU (if it went to sleep and was waiting on

451. // this completion event).

452. //以防当前cpu因为等待当前请求包而进入睡眠状态，唤醒cpu

453. cpu->wakeCPU();

454.

455. DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n",

456. tid);

457.

458. //确保当前fetch状态为active

459. switchToActive();

460.

461. // Only switch to IcacheAccessComplete if we're not stalled as well.

462. //只有当当前线程确实不处于stall状态才把当前fetch的状态转换为IcacheAccessComplete

463. //否则还保持当前fetch状态为blocked

464. //注意: switchToActive是把fetch阶段的状态转换为active，而这里fetchStatus是线程的状态

465. //如果线程状态是Blocked，fetch状态是active也是可以的，即线程处于阻塞态没有指令提交

466. //但是不影响fetch继续取指或者处理指令

467. if (checkStall(tid)) {

468. fetchStatus[tid] = Blocked;

469. } else {

470. fetchStatus[tid] = IcacheAccessComplete;

471. }

472.

473. pkt->req->setAccessLatency(); //该函数计算当前发出请求到返回数据的时间长度

474. cpu->ppInstAccessComplete->notify(pkt); //通知所有监听icache 访问的对象，icache访问已经完成

475. // Reset the mem req to NULL.

476. //把访存请求全部清空

477. delete pkt->req;

478. delete pkt;

479. memReq[tid] = NULL;

480. }

481.

482. //流水线恢复运行需要清空每个线程的fetch阶段的停顿原因

483. template <class Impl>

484. void

485. DefaultFetch<Impl>::drainResume()

486. {

487. for (ThreadID i = 0; i < numThreads; ++i) {

488. stalls[i].decode = false;

489. stalls[i].drain = false;

490. }

491. }

492.

493. //运行所有相关断言，确保流水线清空操作已经完成

494. template <class Impl>

495. void

496. DefaultFetch<Impl>::drainSanityCheck() const

497. {

498. assert(isDrained()); //确保fetch阶段流水线已经清空

499. assert(retryPkt == NULL); //确保重试发送请求包没有发出去

500. assert(retryTid == InvalidThreadID);//确保需要重试发送请求包的线程id是无效的

501. assert(!cacheBlocked); //确保cache没有被阻塞，可以正常工作

502. assert(!interruptPending); //确保没有中断等待处理

503.

504. for (ThreadID i = 0; i < numThreads; ++i) {

505. assert(!memReq[i]); //确保线程没有需要处理的访存请求

506. assert(fetchStatus[i] == Idle || stalls[i].drain); //当前线程状态是idle或者当前fetch阶段

507. //被标记为因为流水线清空停顿

508. }

509.

510. branchPred->drainSanityCheck();

511. }

512.

513. //返回fetch流水线是否已经清空的判断结果

514. template <class Impl>

515. bool

516. DefaultFetch<Impl>::isDrained() const

517. {

518. /* Make sure that threads are either idle of that the commit stage

519. * has signaled that draining has completed by setting the drain

520. * stall flag. This effectively forces the pipeline to be disabled

521. * until the whole system is drained (simulation may continue to

522. * drain other components).

523. */

524. //确保每一个线程的流水线中fetch阶段已经清空并且不会再继续取指

525. for (ThreadID i = 0; i < numThreads; ++i) {

526. // Verify fetch queues are drained

527. //确保取指队列已经清空

528. //如果不空直接返回false

529. if (!fetchQueue[i].empty())

530. return false;

531.

532. // Return false if not idle or drain stalled

533. //当前线程状态如果不是idle，继续判断是否stall

534. if (fetchStatus[i] != Idle) {

535. //如果当前线程状态是Blocked，同时stall的原因是commit阶段

536. //发送的drain信号，那么可以确认该线程isDrained，继续查看其他线程

537. if (fetchStatus[i] == Blocked && stalls[i].drain)

538. continue;

539. else

540. return false; //否则直接返回false

541. }

542. }

543.

544. /* The pipeline might start up again in the middle of the drain

545. * cycle if the finish translation event is scheduled, so make

546. * sure that's not the case.

547. */

548. //就在上面的代码执行结束之后，可以确定流水线已经清空并进入drain

549. //但是就在drain期间，还有可能会被地址翻译结束的事件打断，因为该事件是推迟执行

550. //具体执行时间不确定，所以最后要确保没有地址翻译完成事件被调度等待运行

551. return !finishTranslationEvent.scheduled();

552. }

553.

554. //接管另一个cpu线程的任务并继续执行

555. template <class Impl>

556. void

557. DefaultFetch<Impl>::takeOverFrom()

558. {

559. //确保指令cache已经成功连接到fetch单元

560. assert(cpu->getInstPort().isConnected());

561. resetStage(); //重新初始化fetch启动前的各个信号和变量，准备取指

562.

563. }

564.

565.

566. //流水线因为发生drain而停顿

567. //drain发生在gem5设置checkpoint时

568. template <class Impl>

569. void

570. DefaultFetch<Impl>::drainStall(ThreadID tid)

571. {

572. //流水线接收到drain信号后把状态改为draining后才调用各个阶段的

573. //drain函数，所以正常情况，运行到这里一定是draining状态

574. assert(cpu->isDraining());

575. assert(!stalls[tid].drain); //当前没有因为发生drain而stall

576. DPRINTF(Drain, "%i: Thread drained.\n", tid);

577. stalls[tid].drain = true; //把stall的原因标记为drain

578. }

579.

580. template <class Impl>

581. void

582. DefaultFetch<Impl>::wakeFromQuiesce()

583. {

584. DPRINTF(Fetch, "Waking up from quiesce\n");

585. // Hopefully this is safe

586. // @todo: Allow other threads to wake from quiesce.

587. fetchStatus[0] = Running;

588. }

589.

590. //把当前取指阶段的状态改为active，准备开始取指

591. template <class Impl>

592. inline void

593. DefaultFetch<Impl>::switchToActive()

594. {

595. if (_status == Inactive) {

596. DPRINTF(Activity, "Activating stage.\n");

597.

598. cpu->activateStage(O3CPU::FetchIdx);//通知cpu，fetch阶段要开始活动了，不可以睡眠

599.

600. _status = Active;

601. }

602. }

603.

604. //把当前取指阶段的状态改为inactive

605. template <class Impl>

606. inline void

607. DefaultFetch<Impl>::switchToInactive()

608. {

609. if (_status == Active) {

610. DPRINTF(Activity, "Deactivating stage.\n");

611.

612. cpu->deactivateStage(O3CPU::FetchIdx);//通知cpu，fetch阶段停止活动了

613.

614. _status = Inactive;

615. }

616. }

617.

618. //把作为参数传入的线程号码从活动线程列表中删除，以后不再处理该线程相关任务

619. template <class Impl>

620. void

621. DefaultFetch<Impl>::deactivateThread(ThreadID tid)

622. {

623. // Update priority list

624. //在现有线程列表中查找线程号并返回

625. auto thread_it = std::find(priorityList.begin(), priorityList.end(), tid);

626. if (thread_it != priorityList.end()) {

627. //如果返回的线程号不是最后一个，则从活动线程列表中删除

628. //如果是最后一个，在进行任务处理时本来就不会做任何处理，所以不需要删除

629. priorityList.erase(thread_it);

630. }

631. }

632.

633. //对分支指令进行分支预测返回目标地址和指令是否跳转

634. template <class Impl>

635. bool

636. DefaultFetch<Impl>::lookupAndUpdateNextPC(

637. DynInstPtr &inst, TheISA::PCState &nextPC)

638. {

639. // Do branch prediction check here.

640. // A bit of a misnomer...next_PC is actually the current PC until

641. // this function updates it.

642. //传进来的pc地址其实是当前指令的地址，如果当前指令是branch

643. //那么就是这条branch的地址

644. //当处理完后，nextpc被更新为branch的目标地址处的指令

645. bool predict_taken;

646.

647. //如果传入的指令不是控制类指令，那么就不能送入分支预测进行预测

648. if (!inst->isControl()) {

649. TheISA::advancePC(nextPC, inst->staticInst);//修改nextpc的值为当前pc + sizeof(inst->staticInst)

650. inst->setPredTarg(nextPC); //把该指令的目标地址设置为nextpc，即正常执行不需要跳转

651. inst->setPredTaken(false); //设置该指令不跳转

652. return false;

653. }

654.

655. ThreadID tid = inst->threadNumber; //声明变量用于传入分支预测器进行预测

656. //对该分支指令进行分支预测并返回其是否跳转

657. predict_taken = branchPred->predict(inst->staticInst, inst->seqNum,

658. nextPC, tid);

659.

660. if (predict_taken) {

661. //分支被预测为跳转

662. DPRINTF(Fetch, "[tid:%i]: [sn:%i]: Branch predicted to be taken to %s.\n",

663. tid, inst->seqNum, nextPC);

664. } else {

665. //分支被预测为不跳转

666. DPRINTF(Fetch, "[tid:%i]: [sn:%i]:Branch predicted to be not taken.\n",

667. tid, inst->seqNum);

668. }

669.

670. DPRINTF(Fetch, "[tid:%i]: [sn:%i] Branch predicted to go to %s.\n",

671. tid, inst->seqNum, nextPC);

672.

673. inst->setPredTarg(nextPC); //设置下一条指令地址为nextpc，注意nextpc在分支预测器中被修改过

674. inst->setPredTaken(predict_taken); //设置是否跳转

675.

676. ++fetchedBranches; //记录取回的分支指令数

677.

678. if (predict_taken) {

679. ++predictedBranches; //记录被预测为跳转的分支指令数

680. }

681.

682. return predict_taken; //返回分支是否跳转

683. }

684.

685.

686. //取回一个cache 块

687. template <class Impl>

688. bool

689. DefaultFetch<Impl>::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc)

690. {

691. Fault fault = NoFault;

692.

693. assert(!cpu->switchedOut()); //确保当前cpu没有被切换出去

694.

695. // @todo: not sure if these should block translation.

696. //AlphaDep

697. //如果cache被阻塞，则无法进行cache访问直接返回

698. if (cacheBlocked) {

699. DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n",

700. tid);

701. return false;

702. } else if (checkInterrupt(pc) && !delayedCommit[tid]) {

703. //如果cache没有被阻塞，当前有中断等待处理同时没有需要延迟提交的指令

704. //也无法进行取指操作

705. //如果有中断需要处理，但是当前有delayedCommit需要处理(处于PAL模式中)

706. //则可以进行取指操作，因为PAL(Privileged Architecture Library code)是Alpha处理器特有的状态

707. //仅用于系统启动时

708. //TODO: say more

709. // Hold off fetch from getting new instructions when:

710. // Cache is blocked, or

711. // while an interrupt is pending and we're not in PAL mode, or

712. // fetch is switched out.

713. DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, interrupt pending\n",

714. tid);

715. return false;

716. }

717.

718. // Align the fetch address to the start of a fetch buffer segment.

719. //对于需要访问的虚拟地址，必须对齐到cache行的边界

720. //因为cache的访问都是以行进行，例如对于vaddr=0x8150，需要对齐到0x8140

721. Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr);//局部变量，需要再次对齐

722.

723. DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n",

724. tid, fetchBufferBlockPC, vaddr);

725.

726. // Setup the memReq to do a read of the first instruction's address.

727. // Set the appropriate read size and flags as well.

728. // Build request here.

729. //创建cache访问请求，用于读取fetchBufferBlockPC处的cache行，后面会整整发送该请求

730. //Request定义在mem/request.hh中

731. RequestPtr mem_req =

732. new Request(tid, fetchBufferBlockPC, fetchBufferSize,

733. Request::INST_FETCH, cpu->instMasterId(), pc,

734. cpu->thread[tid]->contextId());

735.

736. //src/cpu/base.hh中定义，taskId是gem5中任务的标号，用于跟踪

737. //发出的请求是由哪个进程id发出的

738. //该函数设置将要发出的mem_req访存请求的taskId为当前cpu的taskId

739. //TODO: cpu->taskID()从哪里来？

740. /** An intrenal representation of a task identifier within gem5. This is

741. * used so the CPU can add which taskId (which is an internal representation

742. * of the OS process ID) to each request so components in the memory system

743. * can track which process IDs are ultimately interacting with them

744. */

745. mem_req->taskId(cpu->taskId());

746.

747. //把新创建的mem_req按照任务id赋值给memReq数组

748. memReq[tid] = mem_req;

749.

750. // Initiate translation of the icache block

751. //初始化icache块的地址转换

752. //把线程状态改为等待tlb翻译完成

753. fetchStatus[tid] = ItlbWait;

754. FetchTranslation *trans = new FetchTranslation(this);

755. //开始地址转换，调用函数translateTiming(){arch/arm/tlb.cc}

756. cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(),

757. trans, BaseTLB::Execute);

758. //直接返回true不代表数据已经取回，因为后续还要判断线程的状态

759. return true;

760. }

761.

762. template <class Impl>

763. void

764. DefaultFetch<Impl>::finishTranslation(const Fault &fault, RequestPtr mem_req)

765. {

766. ThreadID tid = cpu->contextToThread(mem_req->contextId());

767. Addr fetchBufferBlockPC = mem_req->getVaddr();

768.

769. assert(!cpu->switchedOut());

770.

771. // Wake up CPU if it was idle

772. //通过重新调度cpu，在clockedge激活event

773. cpu->wakeCPU();

774.

775. if (fetchStatus[tid] != ItlbWait || mem_req != memReq[tid] ||

776. mem_req->getVaddr() != memReq[tid]->getVaddr()) {

777. DPRINTF(Fetch, "[tid:%i] Ignoring itlb completed after squash\n",

778. tid);

779. ++fetchTlbSquashes;

780. delete mem_req;

781. return;

782. }

783.

784.

785. // If translation was successful, attempt to read the icache block.

786. if (fault == NoFault) {

787. // Check that we're not going off into random memory

788. // If we have, just wait around for commit to squash something and put

789. // us on the right track

790. if (!cpu->system->isMemAddr(mem_req->getPaddr())) {

791. warn("Address %#x is outside of physical memory, stopping fetch\n",

792. mem_req->getPaddr());

793. fetchStatus[tid] = NoGoodAddr;

794. delete mem_req;

795. memReq[tid] = NULL;

796. return;

797. }

798.

799. // Build packet here.

800. //创建packet用于处理request(mem_req)

801. PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq);

802. data_pkt->dataDynamic(new uint8_t[fetchBufferSize]);

803.

804. fetchBufferPC[tid] = fetchBufferBlockPC;

805. fetchBufferValid[tid] = false;

806. DPRINTF(Fetch, "Fetch: Doing instruction read.\n");

807.

808. fetchedCacheLines++;

809.

810. // Access the cache.

811. //真正发送数据请求

812. if (!cpu->getInstPort().sendTimingReq(data_pkt)) {

813. assert(retryPkt == NULL);

814. assert(retryTid == InvalidThreadID);

815. DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid);

816.

817. fetchStatus[tid] = IcacheWaitRetry;

818. retryPkt = data_pkt;

819. retryTid = tid;

820. cacheBlocked = true;

821. } else {

822. DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid);

823. DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache "

824. "response.\n", tid);

825. lastIcacheStall[tid] = curTick();

826. fetchStatus[tid] = IcacheWaitResponse;

827. // Notify Fetch Request probe when a packet containing a fetch

828. // request is successfully sent

829. ppFetchRequestSent->notify(mem_req);

830. }

831. } else {

832. // Don't send an instruction to decode if we can't handle it.

833. if (!(numInst < fetchWidth) || !(fetchQueue[tid].size() < fetchQueueSize)) {

834. assert(!finishTranslationEvent.scheduled());

835. finishTranslationEvent.setFault(fault);

836. finishTranslationEvent.setReq(mem_req);

837. cpu->schedule(finishTranslationEvent,

838. cpu->clockEdge(Cycles(1)));

839. return;

840. }

841. DPRINTF(Fetch, "[tid:%i] Got back req with addr %#x but expected %#x\n",

842. tid, mem_req->getVaddr(), memReq[tid]->getVaddr());

843. // Translation faulted, icache request won't be sent.

844. delete mem_req;

845. memReq[tid] = NULL;

846.

847. // Send the fault to commit. This thread will not do anything

848. // until commit handles the fault. The only other way it can

849. // wake up is if a squash comes along and changes the PC.

850. TheISA::PCState fetchPC = pc[tid];

851.

852. DPRINTF(Fetch, "[tid:%i]: Translation faulted, building noop.\n", tid);

853. // We will use a nop in ordier to carry the fault.

854. DynInstPtr instruction = buildInst(tid,

855. decoder[tid]->decode(TheISA::NoopMachInst, fetchPC.instAddr()),

856. NULL, fetchPC, fetchPC, false);

857.

858. instruction->setPredTarg(fetchPC);

859. instruction->fault = fault;

860. wroteToTimeBuffer = true;

861.

862. DPRINTF(Activity, "Activity this cycle.\n");

863. cpu->activityThisCycle();

864.

865. fetchStatus[tid] = TrapPending;

866.

867. DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid);

868. DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n",

869. tid, fault->name(), pc[tid]);

870. }

871. _status = updateFetchStatus();

872. }

873.

874. template <class Impl>

875. inline void

876. DefaultFetch<Impl>::doSquash(const TheISA::PCState &newPC,

877. const DynInstPtr squashInst, ThreadID tid)

878. {

879. DPRINTF(Fetch, "[tid:%i]: Squashing, setting PC to: %s.\n",

880. tid, newPC);

881. //把pc改为miss predict之前的值

882. pc[tid] = newPC;

883. fetchOffset[tid] = 0;

884. //如果squash信号不是由分支预测指令导致的

885. //并且这条指令的地址就是squash之后要执行的指令

886. //即重新执行该指令，那么把重新执行的指令微操

887. //作赋值为squash指令之前的微操作

888. //microop是把一条指令拆开，比如ADD [R1], R2=load R1 + ADD

889. if (squashInst && squashInst->pcState().instAddr() == newPC.instAddr())

890. macroop[tid] = squashInst->macroop;

891. else

892. macroop[tid] = NULL;

893. //经gdb确定:arch/arm/decoder.cc:66 重置一些Decoder需要的参数

894. decoder[tid]->reset();

895.

896. // Clear the icache miss if it's outstanding.

897. if (fetchStatus[tid] == IcacheWaitResponse) {

898. DPRINTF(Fetch, "[tid:%i]: Squashing outstanding Icache miss.\n",

899. tid);

900. //因为上个cycle发生了Icache miss, memReq此时不需要，置空

901. memReq[tid] = NULL;

902. } else if (fetchStatus[tid] == ItlbWait) {

903. DPRINTF(Fetch, "[tid:%i]: Squashing outstanding ITLB miss.\n",

904. tid);

905. memReq[tid] = NULL;

906. }

907.

908. // Get rid of the retrying packet if it was from this thread.

909. //如果当前线程被设置为发生Icacheblock后，以后每个cycle

910. //都再次尝试访问icache，并且当前的icache依旧被block，那么

911. //暂时清空相应packet和tid号，因为当前线程被squash了，

912. //squash后就没必要继续发送需要flush的请求了。

913. if (retryTid == tid) {

914. assert(cacheBlocked);

915. if (retryPkt) {

916. delete retryPkt->req;

917. delete retryPkt;

918. }

919. retryPkt = NULL;

920. retryTid = InvalidThreadID;

921. }

922.

923. //修改fetch状态为squash

924. fetchStatus[tid] = Squashing;

925.

926. // Empty fetch queue

927. //清空已经取回的指令因为需要全部squash

928. fetchQueue[tid].clear();

929.

930. // microops are being squashed, it is not known wheather the

931. // youngest non-squashed microop was marked delayed commit

932. // or not. Setting the flag to true ensures that the

933. // interrupts are not handled when they cannot be, though

934. // some opportunities to handle interrupts may be missed.

935. //delayedcommit不能被中断信号打断，只能faults可以

936. delayedCommit[tid] = true;

937.

938. ++fetchSquashCycles;

939. }

940.

941. template<class Impl>

942. void

943. DefaultFetch<Impl>::squashFromDecode(const TheISA::PCState &newPC,

944. const DynInstPtr squashInst,

945. const InstSeqNum seq_num, ThreadID tid)

946. {

947. DPRINTF(Fetch, "[tid:%i]: Squashing from decode.\n", tid);

948.

949. doSquash(newPC, squashInst, tid);

950.

951. // Tell the CPU to remove any instructions that are in flight between

952. // fetch and decode.

953. cpu->removeInstsUntil(seq_num, tid);

954. }

955.

956. template<class Impl>

957. bool

958. DefaultFetch<Impl>::checkStall(ThreadID tid) const

959. {

960. bool ret_val = false;

961.

962. if (stalls[tid].drain) {

963. assert(cpu->isDraining());

964. DPRINTF(Fetch,"[tid:%i]: Drain stall detected.\n",tid);

965. ret_val = true;

966. }

967.

968. return ret_val;

969. }

970.

971. template<class Impl>

972. typename DefaultFetch<Impl>::FetchStatus

973. DefaultFetch<Impl>::updateFetchStatus()

974. {

975. //Check Running

976. list<ThreadID>::iterator threads = activeThreads->begin();

977. list<ThreadID>::iterator end = activeThreads->end();

978.

979. //逐个线程处理其状态变化

980. while (threads != end) {

981. ThreadID tid = *threads++;

982.

983. if (fetchStatus[tid] == Running ||

984. fetchStatus[tid] == Squashing ||

985. fetchStatus[tid] == IcacheAccessComplete) {

986.

987. if (_status == Inactive) {

988. DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid);

989.

990. if (fetchStatus[tid] == IcacheAccessComplete) {

991. DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache"

992. "completion\n",tid);

993. }

994. //标记Fetch stage为active，并做相应计数处理

995. cpu->activateStage(O3CPU::FetchIdx);

996. }

997.

998. return Active;

999. }

1000. }

1001.

1002. // Stage is switching from active to inactive, notify CPU of it.

1003. //因为状态变化只能在Active/inactive之间，

1004. //如果上个状态是Active，并且调用了本函数，

1005. //必定是转为inactive

1006. if (_status == Active) {

1007. DPRINTF(Activity, "Deactivating stage.\n");

1008. //标记Fetch Stage为inactive

1009. cpu->deactivateStage(O3CPU::FetchIdx);

1010. }

1011.

1012. return Inactive;

1013. }

1014.

1015. template <class Impl>

1016. void

1017. DefaultFetch<Impl>::squash(const TheISA::PCState &newPC,

1018. const InstSeqNum seq_num, DynInstPtr squashInst,

1019. ThreadID tid)

1020. {

1021. DPRINTF(Fetch, "[tid:%u]: Squash from commit.\n", tid);

1022. //真正squash函数

1023. doSquash(newPC, squashInst, tid);

1024. //发生squash后需要清空发生squash的指令之后的指令，

1025. //即晚于当前发生squash指令的其他指令

1026. // Tell the CPU to remove any instructions that are not in the ROB.

1027. //参看src/cpu/o3/cpu.cc

1028. cpu->removeInstsNotInROB(tid);

1029. }

1030.

1031. template <class Impl>

1032. void

1033. DefaultFetch<Impl>::tick()

1034. {

1035. //获取activeThreads列表

1036. //迭代器用于顺序访问容器类型中的每个元素

1037. //begin()和end()函数分别用于返回指向容器的

1038. //第一个和末尾元素的下一个位置

1039. //activeThreads在FullO3CPU的构造函数中分别调用fetch,decode等阶段进行初始化

1040. list<ThreadID>::iterator threads = activeThreads->begin();

1041. list<ThreadID>::iterator end = activeThreads->end();

1042. //用于指示在fetch阶段是否有状态变化，例如stall running等

1043. bool status_change = false;

1044. //初始化timebuffer为false，timebuffer用于在各个stage之间传递信息与数据

1045. wroteToTimeBuffer = false;

1046. //初始化一个cycle内取多条指令，只针对于x86这样变长指令集使用

1047. //对于ARM定长指令集，不会发生所取指令跨cacheline的问题，即不存在

1048. //pipelinedIfetch

1049. for (ThreadID i = 0; i < numThreads; ++i) {

1050. issuePipelinedIfetch[i] = false;

1051. }

1052. //逐个硬件线程(thread)检查是否有其他stage传递过来的signal，进而根据

1053. //signal对线程做出相应的状态调整

1054. while (threads != end) {

1055. ThreadID tid = *threads++;

1056.

1057. // Check the signals for each thread to determine the proper status

1058. // for each thread.

1059. bool updated_status = checkSignalsAndUpdate(tid);

1060. status_change = status_change || updated_status;

1061. }

1062.

1063. DPRINTF(Fetch, "Running stage.\n");

1064.

1065. if (FullSystem) {

1066. //在全系统中如果中断被pending，是因为当前系统中发生

1067. //太多中断，其中有的没有被立刻处理，为了及时响应中断

1068. //fetch应该立刻停止:在fetchcacheline()函数中调用checkInterrupt()时

1069. //会检查interruptPending，如果被置位，推出fetchcacheline()函数

1070. //如果不是全系统(即se模式)，不会出现中断pending的问题

1071. //因为se模式不会模拟嵌套gic的过程

1072. if (fromCommit->commitInfo[0].interruptPending) {

1073. interruptPending = true;

1074. }

1075.

1076. if (fromCommit->commitInfo[0].clearInterrupt) {

1077. interruptPending = false;

1078. }

1079. }

1080.

1081. for (threadFetched = 0; threadFetched < numFetchingThreads;

1082. threadFetched++) {

1083. // Fetch each of the actively fetching threads.

1084. fetch(status_change);//真正执行fetch的函数

1085. }

1086.

1087. // Record number of instructions fetched this cycle for distribution.

1088. //函数定义在src/base/statistics.hh: class DistBase, sample()，调用同文件中

1089. //DistStor中sample函数进行处理

1090. //用于统计每个cycle取指数目

1091. fetchNisnDist.sample(numInst);

1092.

1093. if (status_change) {

1094. // Change the fetch stage status if there was a status change.

1095. _status = updateFetchStatus();

1096. }

1097.

1098. // Issue the next I-cache request if possible.

1099. for (ThreadID i = 0; i < numThreads; ++i) {

1100. if (issuePipelinedIfetch[i]) {//在fetch函数中会确定是否需要pipeline icache fetch

1101. pipelineIcacheAccesses(i);//在当前cycle不仅要完成执行fetch，

1102. //同时发出下一个访问icache的请求

1103. }

1104. }

1105.

1106. // Send instructions enqueued into the fetch queue to decode.

1107. // Limit rate by fetchWidth. Stall if decode is stalled.

1108. unsigned insts_to_decode = 0;//记录发送给Decode阶段的指令数

1109. unsigned available_insts = 0;//用于记录当前stage，需要处理的所有指令数

1110.

1111. for (auto tid : *activeThreads) {//这里怎么理解? 只是当前线程

1112. if (!stalls[tid].decode) {

1113. available_insts += fetchQueue[tid].size();

1114. }

1115. }

1116.

1117. // Pick a random thread to start trying to grab instructions from

1118. auto tid_itr = activeThreads->begin();

1119. //advance(it, N)迭代器就是将迭代器it移动N次，这里N是由

1120. //random函数产生的0-activeThreads->size() - 1之间的随机数

1121. std::advance(tid_itr, random_mt.random<uint8_t>(0, activeThreads->size() - 1));

1122. //第一次执行到这里时，由于fetchQueue一定是空的(icache miss)，while不会执行

1123. while (available_insts != 0 && insts_to_decode < decodeWidth) {

1124. ThreadID tid = *tid_itr;//随机挑选出来的线程id

1125. //这里的fetchQueue不是timebuffer，是fetch阶段通过buildInst()函数压入队列的指令

1126. //Decode阶段也有一个fetchQueue，那个是timebuffer

1127. //如果当前线程没有被decode阶段stall，并且fetch()函数取回的指令不是空的

1128. if (!stalls[tid].decode && !fetchQueue[tid].empty()) {

1129. auto inst = fetchQueue[tid].front();

1130. //toDecode类型是wire为什么可以直接使用->?

1131. //见wire定义的operator->，运算符重载

1132. toDecode->insts[toDecode->size++] = inst;

1133. DPRINTF(Fetch, "[tid:%i][sn:%i]: Sending instruction to decode from "

1134. "fetch queue. Fetch queue size: %i.\n",

1135. tid, inst->seqNum, fetchQueue[tid].size());

1136.

1137. //用于给event机制使用，确定是否激活当前cycle

1138. wroteToTimeBuffer = true;

1139. //队列弹出队首元素

1140. fetchQueue[tid].pop_front();

1141. //真正发送给decode的指令数

1142. insts_to_decode++;

1143. //当前阶段需要处理的指令数，有可能大于insts_to_decode

1144. available_insts--;

1145. }

1146.

1147. //循环处理所有线程，结束条件是处理完所有需要处理的指令available_insts

1148. tid_itr++;

1149. // Wrap around if at end of active threads list

1150. if (tid_itr == activeThreads->end())

1151. tid_itr = activeThreads->begin();

1152. }

1153.

1154. // If there was activity this cycle, inform the CPU of it.

1155. if (wroteToTimeBuffer) {

1156. DPRINTF(Activity, "Activity this cycle.\n");

1157. cpu->activityThisCycle();

1158. }

1159.

1160. // Reset the number of the instruction we've fetched.

1161. numInst = 0;

1162. }

1163.

1164. template <class Impl>

1165. bool

1166. DefaultFetch<Impl>::checkSignalsAndUpdate(ThreadID tid)

1167. {

1168. // Update the per thread stall statuses.

1169. //上一个cycle如果decode 被Block并传送给fetch信号

1170. //这个cycle内stall

1171. if (fromDecode->decodeBlock[tid]) { //where is decodeBlock come from? see my doc

1172. stalls[tid].decode = true;

1173. }

1174. //同样，上个cycle如果decode的Block解除，这个cycle内Fetch的stall解除

1175. if (fromDecode->decodeUnblock[tid]) {

1176. assert(stalls[tid].decode);

1177. assert(!fromDecode->decodeBlock[tid]);

1178. stalls[tid].decode = false;

1179. }

1180.

1181. // Check squash signals from commit.

1182. //上个cycle如果commit阶段发生了squash，

1183. //这个cycle的fetch阶段要进行相应处理

1184. if (fromCommit->commitInfo[tid].squash) {

1185.

1186. DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "

1187. "from commit.\n",tid);

1188. // In any case, squash.

1189. //这里squash因为commit发现branch miss predict或者其他导致squash的信号

1190. squash(fromCommit->commitInfo[tid].pc,

1191. fromCommit->commitInfo[tid].doneSeqNum,

1192. fromCommit->commitInfo[tid].squashInst, tid);

1193.

1194. // If it was a branch mispredict on a control instruction, update the

1195. // branch predictor with that instruction, otherwise just kill the

1196. // invalid state we generated in after sequence number

1197. //这里是调用BP的squash，处理commit阶段才发现的miss predict

1198. //分成两部分:控制类指令和其他

1199. if (fromCommit->commitInfo[tid].mispredictInst &&

1200. fromCommit->commitInfo[tid].mispredictInst->isControl()) {

1201. branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,

1202. fromCommit->commitInfo[tid].pc,

1203. fromCommit->commitInfo[tid].branchTaken,

1204. tid);

1205. } else {

1206. branchPred->squash(fromCommit->commitInfo[tid].doneSeqNum,

1207. tid);

1208. }

1209. //优先处理来自commit的squash

1210. return true;

1211. } else if (fromCommit->commitInfo[tid].doneSeqNum) {

1212. // Update the branch predictor if it wasn't a squashed instruction

1213. // that was broadcasted.

1214. // 指令正常retire，用正确的结果更新分支预测器

1215. branchPred->update(fromCommit->commitInfo[tid].doneSeqNum, tid);

1216. }

1217.

1218. // Check squash signals from decode.

1219. //上个cycle如果decode阶段发生了squash，这里进行处理

1220. if (fromDecode->decodeInfo[tid].squash) {

1221. DPRINTF(Fetch, "[tid:%u]: Squashing instructions due to squash "

1222. "from decode.\n",tid);

1223.

1224. // Update the branch predictor.

1225. if (fromDecode->decodeInfo[tid].branchMispredict) {

1226. branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,

1227. fromDecode->decodeInfo[tid].nextPC,

1228. fromDecode->decodeInfo[tid].branchTaken,

1229. tid);

1230. } else {

1231. //如果squash不是因为分支预测错误引起的，单独处理

1232. branchPred->squash(fromDecode->decodeInfo[tid].doneSeqNum,

1233. tid);

1234. }

1235. //如果上一个cycle不是squash状态，单独处理

1236. //流水线中fetch和decode之间的指令

1237. if (fetchStatus[tid] != Squashing) {

1238.

1239. DPRINTF(Fetch, "Squashing from decode with PC = %s\n",

1240. fromDecode->decodeInfo[tid].nextPC);

1241. // Squash unless we're already squashing

1242. squashFromDecode(fromDecode->decodeInfo[tid].nextPC,

1243. fromDecode->decodeInfo[tid].squashInst,

1244. fromDecode->decodeInfo[tid].doneSeqNum,

1245. tid);

1246. //如果上一个cycle没有来自commit的stall，而有decode的stall

1247. //处理结束立即返回

1248. return true;

1249. }

1250. }

1251. //如果上一个cycle stall了，但是不是以下几种，

1252. //那么暂时设置为block并返回

1253. if (checkStall(tid) &&

1254. fetchStatus[tid] != IcacheWaitResponse &&

1255. fetchStatus[tid] != IcacheWaitRetry &&

1256. fetchStatus[tid] != ItlbWait &&

1257. fetchStatus[tid] != QuiescePending) {

1258. DPRINTF(Fetch, "[tid:%i]: Setting to blocked\n",tid);

1259.

1260. fetchStatus[tid] = Blocked;

1261.

1262. return true;

1263. }

1264. //如果上一个cycle状态是blocked或者squashing,

1265. //则在当前cycle转为running，即block和squash处理只需要一个cycle

1266. if (fetchStatus[tid] == Blocked ||

1267. fetchStatus[tid] == Squashing) {

1268. // Switch status to running if fetch isn't being told to block or

1269. // squash this cycle.

1270. DPRINTF(Fetch, "[tid:%i]: Done squashing, switching to running.\n",

1271. tid);

1272.

1273. fetchStatus[tid] = Running;

1274.

1275. return true;

1276. }

1277.

1278. // If we've reached this point, we have not gotten any signals that

1279. // cause fetch to change its status. Fetch remains the same as before.

1280. return false;

1281. }

1282.

1283. template<class Impl>

1284. typename Impl::DynInstPtr

1285. DefaultFetch<Impl>::buildInst(ThreadID tid, StaticInstPtr staticInst,

1286. StaticInstPtr curMacroop, TheISA::PCState thisPC,

1287. TheISA::PCState nextPC, bool trace)

1288. {

1289. // Get a sequence number.

1290. InstSeqNum seq = cpu->getAndIncrementInstSeq();

1291.

1292. // Create a new DynInst from the instruction fetched.

1293. DynInstPtr instruction =

1294. new DynInst(staticInst, curMacroop, thisPC, nextPC, seq, cpu);

1295. instruction->setTid(tid);

1296.

1297. instruction->setASID(tid);

1298.

1299. instruction->setThreadState(cpu->thread[tid]);

1300.

1301. DPRINTF(Fetch, "[tid:%i]: Instruction PC %#x (%d) created "

1302. "[sn:%lli].\n", tid, thisPC.instAddr(),

1303. thisPC.microPC(), seq);

1304.

1305. DPRINTF(Fetch, "[tid:%i]: Instruction is: %s\n", tid,

1306. instruction->staticInst->

1307. disassemble(thisPC.instAddr()));

1308.

1309. #if TRACING_ON

1310. if (trace) {

1311. instruction->traceData =

1312. cpu->getTracer()->getInstRecord(curTick(), cpu->tcBase(tid),

1313. instruction->staticInst, thisPC, curMacroop);

1314. }

1315. #else

1316. instruction->traceData = NULL;

1317. #endif

1318.

1319. // Add instruction to the CPU's list of instructions.

1320. instruction->setInstListIt(cpu->addInst(instruction));

1321.

1322. // Write the instruction to the first slot in the queue

1323. // that heads to decode.

1324. assert(numInst < fetchWidth);

1325. fetchQueue[tid].push_back(instruction);

1326. assert(fetchQueue[tid].size() <= fetchQueueSize);

1327. DPRINTF(Fetch, "[tid:%i]: Fetch queue entry created (%i/%i).\n",

1328. tid, fetchQueue[tid].size(), fetchQueueSize);

1329. //toDecode->insts[toDecode->size++] = instruction;

1330.

1331. // Keep track of if we can take an interrupt at this boundary

1332. delayedCommit[tid] = instruction->isDelayedCommit();

1333.

1334. return instruction;

1335. }

1336.

1337. template<class Impl>

1338. void

1339. //status_change用于回传fetch的状态

1340. DefaultFetch<Impl>::fetch(bool &status_change)

1341. {

1342. //

1343. // Start actual fetch

1344. //

1345. //fetchPolicy是enum类型，默认是singthread, 即tid=0

1346. //iqCount()&ldstq count for fetch policy, and check IEW info

1347. ThreadID tid = getFetchingThread(fetchPolicy);

1348.

1349. assert(!cpu->switchedOut());

1350.

1351. if (tid == InvalidThreadID) {

1352. // Breaks looping condition in tick()

1353. //在fetch.tick()中如果这两个相等，在tick函数for循环中

1354. //fetch函数不执行下一次就不会被执行

1355. threadFetched = numFetchingThreads;

1356.

1357. if (numThreads == 1) { // @todo Per-thread stats

1358. profileStall(0);//记录fetchstall的状态计数器，并打印相关信息

1359. }

1360.

1361. return;

1362. }

1363.

1364. DPRINTF(Fetch, "Attempting to fetch from [tid:%i]\n", tid);

1365.

1366. // The current PC.

1367. TheISA::PCState thisPC = pc[tid];

1368.

1369. Addr pcOffset = fetchOffset[tid];

1370. Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;

1371.

1372. //某些指令在流水线里有可能被分拆为几条其他指令

1373. //see src/arch/x86/isa/insts/x87/arithmetic

1374. //microPC的最低位作为inRom标志

1375. bool inRom = isRomMicroPC(thisPC.microPC());

1376.

1377. // If returning from the delay of a cache miss, then update the status

1378. // to running, otherwise do the cache access. Possibly move this up

1379. // to tick() function.

1380. //如果上一个cycle完成了icache访问，本cycle就可以把fetch状态改为running

1381. //因为默认情况下，fetch状态改变为1 cycle

1382. if (fetchStatus[tid] == IcacheAccessComplete) {

1383. DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid);

1384.

1385. fetchStatus[tid] = Running;

1386. status_change = true;

1387. } else if (fetchStatus[tid] == Running) {

1388. // Align the fetch PC so its at the start of a fetch buffer segment.

1389. //如果上一个cycle fetch状态为running, 继续访问icache进行取指

1390. //把取指位置对齐到fetch buffer的起始位置

1391. Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);

1392.

1393. // If buffer is no longer valid or fetchAddr has moved to point

1394. // to the next cache block, AND we have no remaining ucode

1395. // from a macro-op, then start fetch from icache.

1396.

1397. //在fetch指令超过fetchBuffer后， fetchBufferPC有可能落后于fetchBufferBlockPC

1398. //如果fetchBuffer中的数据已经无效(fetchBufferValid[tid]=0, if成立)

1399. //或者尽管fetchBufferBlockPC对齐到fetchBuffer起始位置，

1400. //fetchAddr已经超过了当前的fetchBuffer边界

1401.

1402. //fetchWidth是每个cycle最多取几条指令

1403. //fetchBufferSize要小于等于cache line size，以字节为单位，

1404. //fetchBuffer用于暂存每个cycle取回的指令，有可能无法存放整个cacheline

1405. //fetchQueueSize默认大小32可以暂存多个cycle取回的指令，是队列类型，

1406. //按照以上数据计算，每个cycle可以取回8条指令，

1407. //fetch可以连续取4cycle*8=32条指令，即使Decode等阶段stall了。

1408. if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])

1409. && !inRom && !macroop[tid]) {

1410. DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read "

1411. "instruction, starting at PC %s.\n", tid, thisPC);

1412.

1413. fetchCacheLine(fetchAddr, tid, thisPC.instAddr());

1414.

1415. if (fetchStatus[tid] == IcacheWaitResponse)

1416. ++icacheStallCycles;

1417. else if (fetchStatus[tid] == ItlbWait)

1418. ++fetchTlbCycles;

1419. else

1420. ++fetchMiscStallCycles;

1421. return;

1422. } else if ((checkInterrupt(thisPC.instAddr()) && !delayedCommit[tid])) {

1423. // Stall CPU if an interrupt is posted and we're not issuing

1424. // an delayed commit micro-op currently (delayed commit instructions

1425. // are not interruptable by interrupts, only faults)

1426. ++fetchMiscStallCycles;

1427. DPRINTF(Fetch, "[tid:%i]: Fetch is stalled!\n", tid);

1428. return;

1429. }

1430. } else {

1431. if (fetchStatus[tid] == Idle) {

1432. ++fetchIdleCycles;

1433. DPRINTF(Fetch, "[tid:%i]: Fetch is idle!\n", tid);

1434. }

1435.

1436. // Status is Idle, so fetch should do nothing.

1437. return;

1438. }

1439.

1440. ++fetchCycles;

1441.

1442. TheISA::PCState nextPC = thisPC;

1443.

1444. StaticInstPtr staticInst = NULL;

1445. StaticInstPtr curMacroop = macroop[tid];

1446.

1447. // If the read of the first instruction was successful, then grab the

1448. // instructions from the rest of the cache line and put them into the

1449. // queue heading to decode.

1450.

1451. DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to "

1452. "decode.\n", tid);

1453.

1454. // Need to keep track of whether or not a predicted branch

1455. // ended this fetch block.

1456. bool predictedBranch = false;

1457.

1458. // Need to halt fetch if quiesce instruction detected

1459. bool quiesce = false;

1460.

1461. TheISA::MachInst *cacheInsts =

1462. reinterpret_cast<TheISA::MachInst *>(fetchBuffer[tid]);

1463. //计算fetchbuffer可以存放几条指令

1464. const unsigned numInsts = fetchBufferSize / instSize;

1465. //计算当前需要的指令在cache block中的位置

1466. unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;

1467.

1468. // Loop through instruction memory from the cache.

1469. // Keep issuing while fetchWidth is available and branch is not

1470. // predicted taken

1471. //如果没有被分支预测taken和quiesce指令中断，处理cacheline中剩余指令

1472. //直到所处理指令数达到fetchWidth

1473. while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize

1474. && !predictedBranch && !quiesce) {

1475. // We need to process more memory if we aren't going to get a

1476. // StaticInst from the rom, the current macroop, or what's already

1477. // in the decoder.

1478. //有可能需要更多内存，情况是所取指令不在rom

1479. //不是current macroop，没有在decoder中

1480. //这里的mem指的是取指内存

1481. bool needMem = !inRom && !curMacroop &&

1482. !decoder[tid]->instReady();

1483. fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;

1484. Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);

1485.

1486. if (needMem) {

1487. // If buffer is no longer valid or fetchAddr has moved to point

1488. // to the next cache block then start fetch from icache.

1489. //如果fetchBuffer失效，或者fetchAddr指向的位置已经不是

1490. //当前取回的cacheline内的地址，而是下一个cacheline地址

1491. //那么只能等到下一个cycle调用fetchcacheline函数取指

1492. if (!fetchBufferValid[tid] ||

1493. fetchBufferBlockPC != fetchBufferPC[tid])

1494. break;

1495. //如果要取得指令已经大于一个block能容纳的指令数

1496. //则需要多个block

1497. if (blkOffset >= numInsts) {

1498. // We need to process more memory, but we've run out of the

1499. // current block.

1500. break;

1501. }

1502. //arm没有延迟槽，只有mips sparc有

1503. if (ISA_HAS_DELAY_SLOT && pcOffset == 0) {

1504. // Walk past any annulled delay slot instructions.

1505. Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask;

1506. while (fetchAddr != pcAddr && blkOffset < numInsts) {

1507. blkOffset++;

1508. fetchAddr += instSize;

1509. }

1510. if (blkOffset >= numInsts)

1511. break;

1512. }

1513. //inst指向将要取得的指令编码

1514. //gtoh:gest to host 字节序转换sim/byteswap.hh

1515. MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]);

1516. //见decode.hh 处理跨cacheline指令，同时进行预译码

1517. //moreBytes()函数的作用就是修改needMoreBytes()

1518. decoder[tid]->moreBytes(thisPC, fetchAddr, inst);

1519. //如果跨cache line，需要步进处理

1520. if (decoder[tid]->needMoreBytes()) {

1521. blkOffset++;

1522. fetchAddr += instSize;

1523. pcOffset += instSize;

1524. }

1525. }

1526.

1527. // Extract as many instructions and/or microops as we can from

1528. // the memory we've processed so far.

1529. do {

1530. if (!(curMacroop || inRom)) {

1531. if (decoder[tid]->instReady()) {//ready之一即跨cacheline的指令已经处理完

1532. staticInst = decoder[tid]->decode(thisPC);//注册每条指令的execute()函数

1533.

1534. // Increment stat of fetched instructions.

1535. ++fetchedInsts;

1536.

1537. if (staticInst->isMacroop()) {

1538. curMacroop = staticInst;//如果当前指令是macro指令，赋值

1539. } else {

1540. pcOffset = 0;

1541. }

1542. } else {

1543. // We need more bytes for this instruction so blkOffset and

1544. // pcOffset will be updated

1545. //需要进一步处理，blkOffset pcOffset都要更新

1546. break;

1547. }

1548. }

1549. // Whether we're moving to a new macroop because we're at the

1550. // end of the current one, or the branch predictor incorrectly

1551. // thinks we are...

1552. bool newMacro = false;

1553. if (curMacroop || inRom) {

1554. if (inRom) {

1555. staticInst = cpu->microcodeRom.fetchMicroop(

1556. thisPC.microPC(), curMacroop);

1557. } else {

1558. staticInst = curMacroop->fetchMicroop(thisPC.microPC());

1559. }

1560. newMacro |= staticInst->isLastMicroop();

1561. }

1562. //创建跟这条指令相关的动态信息

1563. //同时把指令放到fetchQueue中准备传送给decode

1564. DynInstPtr instruction =

1565. buildInst(tid, staticInst, curMacroop,

1566. thisPC, nextPC, true);

1567.

1568. ppFetch->notify(instruction);

1569. numInst++;

1570.

1571. #if TRACING_ON

1572. if (DTRACE(O3PipeView)) {

1573. instruction->fetchTick = curTick();

1574. }

1575. #endif

1576.

1577. //把当前指令赋值给下一条指令

1578. nextPC = thisPC;

1579.

1580. // If we're branching after this instruction, quit fetching

1581. // from the same block.

1582. //如果当前指令是分支指令或者被预测为taken的指令

1583. //那么接下来取指操作不能挨着当前指令继续取指

1584. predictedBranch |= thisPC.branching();

1585. predictedBranch |=

1586. lookupAndUpdateNextPC(instruction, nextPC);

1587. if (predictedBranch) {

1588. DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC);

1589. }

1590.

1591. newMacro |= thisPC.instAddr() != nextPC.instAddr();

1592.

1593. // Move to the next instruction, unless we have a branch.

1594. thisPC = nextPC;

1595. inRom = isRomMicroPC(thisPC.microPC());

1596.

1597. if (newMacro) {

1598. fetchAddr = thisPC.instAddr() & BaseCPU::PCMask;

1599. blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize;

1600. pcOffset = 0;

1601. curMacroop = NULL;//如果没有其他指令ready则结束while循环

1602. }

1603.

1604. if (instruction->isQuiesce()) {

1605. DPRINTF(Fetch,

1606. "Quiesce instruction encountered, halting fetch!\n");

1607. fetchStatus[tid] = QuiescePending;

1608. status_change = true;

1609. quiesce = true;

1610. break;

1611. }

1612. } while ((curMacroop || decoder[tid]->instReady()) &&

1613. numInst < fetchWidth &&

1614. fetchQueue[tid].size() < fetchQueueSize);

1615.

1616. // Re-evaluate whether the next instruction to fetch is in micro-op ROM

1617. // or not.

1618. inRom = isRomMicroPC(thisPC.microPC());

1619. }

1620.

1621. if (predictedBranch) {

1622. DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch "

1623. "instruction encountered.\n", tid);

1624. } else if (numInst >= fetchWidth) {

1625. DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth "

1626. "for this cycle.\n", tid);

1627. } else if (blkOffset >= fetchBufferSize) {

1628. DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of the"

1629. "fetch buffer.\n", tid);

1630. }

1631.

1632. macroop[tid] = curMacroop;

1633. fetchOffset[tid] = pcOffset;

1634.

1635. if (numInst > 0) {

1636. wroteToTimeBuffer = true;

1637. }

1638.

1639. pc[tid] = thisPC;

1640.

1641. // pipeline a fetch if we're crossing a fetch buffer boundary and not in

1642. // a state that would preclude fetching

1643. //下一个cycle是否需要继续访问icache?

1644. //如果指令跨过fetchbuffer，同时状态允许，则下一个cycle继续访问icache

1645. fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;

1646. Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);

1647. issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] &&

1648. fetchStatus[tid] != IcacheWaitResponse &&

1649. fetchStatus[tid] != ItlbWait &&

1650. fetchStatus[tid] != IcacheWaitRetry &&

1651. fetchStatus[tid] != QuiescePending &&

1652. !curMacroop;

1653. }

1654.

1655. template<class Impl>

1656. void

1657. DefaultFetch<Impl>::recvReqRetry()

1658. {

1659. if (retryPkt != NULL) {

1660. assert(cacheBlocked);

1661. assert(retryTid != InvalidThreadID);

1662. assert(fetchStatus[retryTid] == IcacheWaitRetry);

1663.

1664. if (cpu->getInstPort().sendTimingReq(retryPkt)) {

1665. fetchStatus[retryTid] = IcacheWaitResponse;

1666. // Notify Fetch Request probe when a retryPkt is successfully sent.

1667. // Note that notify must be called before retryPkt is set to NULL.

1668. ppFetchRequestSent->notify(retryPkt->req);

1669. retryPkt = NULL;

1670. retryTid = InvalidThreadID;

1671. cacheBlocked = false;

1672. }

1673. } else {

1674. assert(retryTid == InvalidThreadID);

1675. // Access has been squashed since it was sent out. Just clear

1676. // the cache being blocked.

1677. cacheBlocked = false;

1678. }

1679. }

1680.

1681. ///

1682. // //

1683. // SMT FETCH POLICY MAINTAINED HERE //

1684. // //

1685. ///

1686. template<class Impl>

1687. ThreadID

1688. DefaultFetch<Impl>::getFetchingThread(FetchPriority &fetch_priority)

1689. {

1690. if (numThreads > 1) {

1691. switch (fetch_priority) {

1692.

1693. case SingleThread:

1694. return 0;

1695.

1696. case RoundRobin:

1697. return roundRobin();

1698.

1699. case IQ:

1700. return iqCount();

1701.

1702. case LSQ:

1703. return lsqCount();

1704.

1705. case Branch:

1706. return branchCount();

1707.

1708. default:

1709. return InvalidThreadID;

1710. }

1711. } else {

1712. list<ThreadID>::iterator thread = activeThreads->begin();

1713. if (thread == activeThreads->end()) {

1714. return InvalidThreadID;

1715. }

1716.

1717. ThreadID tid = *thread;

1718.

1719. if (fetchStatus[tid] == Running ||

1720. fetchStatus[tid] == IcacheAccessComplete ||

1721. fetchStatus[tid] == Idle) {

1722. return tid;

1723. } else {

1724. return InvalidThreadID;

1725. }

1726. }

1727. }

1728.

1729.

1730. template<class Impl>

1731. ThreadID

1732. DefaultFetch<Impl>::roundRobin()

1733. {

1734. list<ThreadID>::iterator pri_iter = priorityList.begin();

1735. list<ThreadID>::iterator end = priorityList.end();

1736.

1737. ThreadID high_pri;

1738.

1739. while (pri_iter != end) {

1740. high_pri = *pri_iter;

1741.

1742. assert(high_pri <= numThreads);

1743.

1744. if (fetchStatus[high_pri] == Running ||

1745. fetchStatus[high_pri] == IcacheAccessComplete ||

1746. fetchStatus[high_pri] == Idle) {

1747.

1748. priorityList.erase(pri_iter);

1749. priorityList.push_back(high_pri);

1750.

1751. return high_pri;

1752. }

1753.

1754. pri_iter++;

1755. }

1756.

1757. return InvalidThreadID;

1758. }

1759.

1760. template<class Impl>

1761. ThreadID

1762. DefaultFetch<Impl>::iqCount()

1763. {

1764. //sorted from lowest->highest

1765. std::priority_queue<unsigned,vector<unsigned>,

1766. std::greater<unsigned> > PQ;

1767. std::map<unsigned, ThreadID> threadMap;

1768.

1769. list<ThreadID>::iterator threads = activeThreads->begin();

1770. list<ThreadID>::iterator end = activeThreads->end();

1771.

1772. while (threads != end) {

1773. ThreadID tid = *threads++;

1774. unsigned iqCount = fromIEW->iewInfo[tid].iqCount;

1775.

1776. //we can potentially get tid collisions if two threads

1777. //have the same iqCount, but this should be rare.

1778. PQ.push(iqCount);

1779. threadMap[iqCount] = tid;

1780. }

1781.

1782. while (!PQ.empty()) {

1783. ThreadID high_pri = threadMap[PQ.top()];

1784.

1785. if (fetchStatus[high_pri] == Running ||

1786. fetchStatus[high_pri] == IcacheAccessComplete ||

1787. fetchStatus[high_pri] == Idle)

1788. return high_pri;

1789. else

1790. PQ.pop();

1791.

1792. }

1793.

1794. return InvalidThreadID;

1795. }

1796.

1797. template<class Impl>

1798. ThreadID

1799. DefaultFetch<Impl>::lsqCount()

1800. {

1801. //sorted from lowest->highest

1802. std::priority_queue<unsigned,vector<unsigned>,

1803. std::greater<unsigned> > PQ;

1804. std::map<unsigned, ThreadID> threadMap;

1805.

1806. list<ThreadID>::iterator threads = activeThreads->begin();

1807. list<ThreadID>::iterator end = activeThreads->end();

1808.

1809. while (threads != end) {

1810. ThreadID tid = *threads++;

1811. unsigned ldstqCount = fromIEW->iewInfo[tid].ldstqCount;

1812.

1813. //we can potentially get tid collisions if two threads

1814. //have the same iqCount, but this should be rare.

1815. PQ.push(ldstqCount);

1816. threadMap[ldstqCount] = tid;

1817. }

1818.

1819. while (!PQ.empty()) {

1820. ThreadID high_pri = threadMap[PQ.top()];

1821.

1822. if (fetchStatus[high_pri] == Running ||

1823. fetchStatus[high_pri] == IcacheAccessComplete ||

1824. fetchStatus[high_pri] == Idle)

1825. return high_pri;

1826. else

1827. PQ.pop();

1828. }

1829.

1830. return InvalidThreadID;

1831. }

1832.

1833. template<class Impl>

1834. ThreadID

1835. DefaultFetch<Impl>::branchCount()

1836. {

1837. #if 0

1838. list<ThreadID>::iterator thread = activeThreads->begin();

1839. assert(thread != activeThreads->end());

1840. ThreadID tid = *thread;

1841. #endif

1842.

1843. panic("Branch Count Fetch policy unimplemented\n");

1844. return InvalidThreadID;

1845. }

1846.

1847. template<class Impl>

1848. void

1849. DefaultFetch<Impl>::pipelineIcacheAccesses(ThreadID tid)

1850. {

1851. if (!issuePipelinedIfetch[tid]) {

1852. return;

1853. }

1854.

1855. // The next PC to access.

1856. TheISA::PCState thisPC = pc[tid];

1857.

1858. if (isRomMicroPC(thisPC.microPC())) {

1859. return;

1860. }

1861.

1862. Addr pcOffset = fetchOffset[tid];

1863. Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask;

1864.

1865. // Align the fetch PC so its at the start of a fetch buffer segment.

1866. Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr);

1867.

1868. // Unless buffer already got the block, fetch it from icache.

1869. if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) {

1870. DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, "

1871. "starting at PC %s.\n", tid, thisPC);

1872.

1873. fetchCacheLine(fetchAddr, tid, thisPC.instAddr());

1874. }

1875. }

1876.

1877. template<class Impl>

1878. void

1879. DefaultFetch<Impl>::profileStall(ThreadID tid) {

1880. DPRINTF(Fetch,"There are no more threads available to fetch from.\n");

1881.

1882. // @todo Per-thread stats

1883.

1884. if (stalls[tid].drain) {

1885. ++fetchPendingDrainCycles;

1886. DPRINTF(Fetch, "Fetch is waiting for a drain!\n");

1887. } else if (activeThreads->empty()) {

1888. ++fetchNoActiveThreadStallCycles;

1889. DPRINTF(Fetch, "Fetch has no active thread!\n");

1890. } else if (fetchStatus[tid] == Blocked) {

1891. ++fetchBlockedCycles;

1892. DPRINTF(Fetch, "[tid:%i]: Fetch is blocked!\n", tid);

1893. } else if (fetchStatus[tid] == Squashing) {

1894. ++fetchSquashCycles;

1895. DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid);

1896. } else if (fetchStatus[tid] == IcacheWaitResponse) {

1897. ++icacheStallCycles;

1898. DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n",

1899. tid);

1900. } else if (fetchStatus[tid] == ItlbWait) {

1901. ++fetchTlbCycles;

1902. DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to "

1903. "finish!\n", tid);

1904. } else if (fetchStatus[tid] == TrapPending) {

1905. ++fetchPendingTrapStallCycles;

1906. DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending trap!\n",

1907. tid);

1908. } else if (fetchStatus[tid] == QuiescePending) {

1909. ++fetchPendingQuiesceStallCycles;

1910. DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for a pending quiesce "

1911. "instruction!\n", tid);

1912. } else if (fetchStatus[tid] == IcacheWaitRetry) {

1913. ++fetchIcacheWaitRetryStallCycles;

1914. DPRINTF(Fetch, "[tid:%i]: Fetch is waiting for an I-cache retry!\n",

1915. tid);

1916. } else if (fetchStatus[tid] == NoGoodAddr) {

1917. DPRINTF(Fetch, "[tid:%i]: Fetch predicted non-executable address\n",

1918. tid);

1919. } else {

1920. DPRINTF(Fetch, "[tid:%i]: Unexpected fetch stall reason (Status: %i).\n",

1921. tid, fetchStatus[tid]);

1922. }

1923. }

1924.

1925. #endif//__CPU_O3_FETCH_IMPL_HH__

乾龙_Heron

关注

4
点赞
踩
10

收藏

觉得还不错? 一键收藏
0
评论
gem5中O3模式下fetch_impl.hh源代码详细分析

gem5中如何通过软件模拟硬件执行的？fetch阶段如何进行指令取值处理？我准备把所有gem5代码都分析一遍。
复制链接

扫一扫

专栏目录

gem5中O3模式下fetch_impl.hh源代码详细分析

fetch_impl.hh

“相关推荐”对你有帮助么？