示例代码含义:记目标字符串中有多少个目标字符。
linux代码(例子)如下:
#include <iostream>
#include <x86intrin.h>
#include <fstream>
#include <chrono>
using namespace std;
struct StringView {
const char* p;
const size_t len;
};
StringView FileSize(const char* fileName) {
ifstream ifstr(fileName);
const auto b = ifstr.tellg();
ifstr.seekg(0, ios::end);
const auto e = ifstr.tellg();
const size_t fileSize = e - b;
ifstr.seekg(0, ios::beg);
char *p = new char[fileSize];
ifstr.read(p, fileSize);
return {p, fileSize};
}
// Normal function
size_t count_c_normal(const StringView& str, const uint8_t c) {
uint32_t num = 0;
for (uint32_t i = 0; i < str.len; ++i) {
if (c == *(str.p + i)) {
++num;
}
}
return num;
}
// SIMD function
size_t count_c_simd(const StringView& str, const uint8_t c) {
__m128i ch = _mm_set1_epi8(c); // char ch[16] = { c, c, ..., c }
size_t cnt = 0;
uint32_t i = 0;
for (; i < str.len; i+=16) {
// char t[16] = { (str+i)[0], (str+i)[1], ... }
__m128i t = _mm_loadu_si128((__m128i *)(str.p + i));
__m128i res = _mm_cmpeq_epi8(t, ch);
// res[16] = { 0xFF, 0x00, 0xFF ... }
unsigned mask = _mm_movemask_epi8(res);
// bits[16] = 0...1101
cnt += __builtin_popcount(mask);
}
// free cnt .
for (; i < str.len; ++i) {
if (c == *(str.p + i))
{
++cnt;
}
}
return cnt;
}
int main() {
const auto ret = FileSize("./test_file");
size_t cnt1 = 0, cnt2 = 0;
const auto t1 = std::chrono::steady_clock::now();
cnt1 = count_c_normal(ret, uint8_t('1'));
const auto t2 = std::chrono::steady_clock::now();
cnt2 = count_c_simd(ret, uint8_t('1'));
const auto t3 = std::chrono::steady_clock::now();
std::cout << "cnt1:" << cnt1 << ",cnt2:" << cnt2 << std::endl;
const auto d1 = std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count();
const auto d2 = std::chrono::duration_cast<std::chrono::milliseconds>(t3-t2).count();
std::cout << "time1:" << d1 << ",time2:" << d2 << std::endl;
return 0;
}
生成随机数代码如下:
#include <iostream>
#include <random>
#include <fstream>
using namespace std;
void RandCharFile(const uint32_t file_len) {
default_random_engine e;
ofstream ofstr("./test_file");
for (uint32_t i = 0; i < file_len; ++i) {
ofstr << e() % 128;
}
ofstr.close();
}
int main() {
RandCharFile(1024 * 1024 * 1024);
return 0;
}
编译命令:
g++ -std=c++11 main.cc -o main -O2
结果如下:
cnt1:511699574,cnt2:511699574
time1:1957,time2:900
总结:在O2优化下耗时相差2倍多,没有O2优化耗时6倍+。
备注:avx比simd还要强大:对avx2也做了类似测试,测试过程和结果:https://blog.csdn.net/weixin_41644391/article/details/113571486