深入理解 CPU 的分支预测(Branch Prediction)模型
使用表结构优化
优化前代码
// test_predict.cc
#include <algorithm>
#include <ctime>
#include <iostream>
int main() {
const unsigned ARRAY_SIZE = 50000;
int data[ARRAY_SIZE];
const unsigned DATA_STRIDE = 256;
for (unsigned c = 0; c < ARRAY_SIZE; ++c) data[c] = std::rand() % DATA_STRIDE;
std::sort(data, data + ARRAY_SIZE);
{ // 测试部分
clock_t start = clock();
long long sum = 0;
for (unsigned i = 0; i < 100000; ++i) {
for (unsigned c = 0; c < ARRAY_SIZE; ++c) {
if (data[c] >= 128) sum += data[c];
}
}
double elapsedTime = static_cast<double>(clock() - start) / CLOCKS_PER_SEC;
std::cout << elapsedTime << "\n";
std::cout << "sum = " << sum << "\n";
}
return 0;
}
优化后代码
#include <algorithm>
#include <ctime>
#include <iostream>
int main() {
const unsigned ARRAY_SIZE = 50000;
int data[ARRAY_SIZE];
const unsigned DATA_STRIDE = 256;
for (unsigned c = 0; c < ARRAY_SIZE; ++c) data[c] = std::rand() % DATA_STRIDE;
int lookup[DATA_STRIDE];
for (unsigned c = 0; c < DATA_STRIDE; ++c) {
lookup[c] = (c >= 128) ? c : 0;
}
std::sort(data, data + ARRAY_SIZE);
{ // 测试部分
clock_t start = clock();
long long sum = 0;
for (unsigned i = 0; i < 100000; ++i) {
for (unsigned c = 0; c < ARRAY_SIZE; ++c) {
// if (data[c] >= 128) sum += data[c];
sum += lookup[data[c]];
}
}
double elapsedTime = static_cast<double>(clock() - start) / CLOCKS_PER_SEC;
std::cout << elapsedTime << "\n";
std::cout << "sum = " << sum << "\n";
}
return 0;
}