按照经典理论,频繁的在堆上分配动态内存对于系统是一个开销,应当尽可能的使用静态内存来降低开销。 我们以std::vector为例,假如重写其allocator,该allocator在静态数组中分配空间,其实际表现如何?
#include <chrono>
#include <vector>
#include <iostream>
#include <fstream>
#include <unistd.h>
#include <new>
#include <cstddef>
#include <cstdlib>
std::vector<std::chrono::steady_clock::time_point> time_point_begin;
std::vector<std::chrono::steady_clock::time_point> time_point_end;
template<typename T>
class StaticVectorAllocator {
private:
static const size_t BUFFER_SIZE = 1024;
alignas(alignof(T)) static char buffer[BUFFER_SIZE];
static size_t used_bytes;
public:
using value_type = T;
StaticVectorAllocator() = default;
template <class U>
constexpr StaticVectorAllocator(const StaticVectorAllocator<U>&) noexcept {}
[[nodiscard]] T* allocate(std::size_t n) {
if (n * sizeof(T) > BUFFER_SIZE - used_bytes) {
throw std::bad_alloc();
}
T* ptr = reinterpret_cast<T*>(buffer + used_bytes);
used_bytes += n * sizeof(T);
return ptr;
}
void deallocate(T* p, std::size_t n) noexcept {
(void)p; // Mark unused
(void)n; // Mark unused
// No need to do anything, the memory is statically allocated
}
[[nodiscard]] auto allocate_array(size_t n) -> T* { return allocate(n); }
void deallocate_array(T* p, size_t n) noexcept { deallocate(p, n); }
template<class U, class... Args>
void construct(U* p, Args&&... args) {
::new (p) U(std::forward<Args>(args)...);
}
void destroy(T* p) noexcept { p->~T(); }
};
template<typename T>
char StaticVectorAllocator<T>::buffer[BUFFER_SIZE];
template<typename T>
size_t StaticVectorAllocator<T>::used_bytes = 0;
int main(int argc, char**argv) {
int cycle = atoi(argv[1]);
int size = atoi(argv[2]);
int interval = atoi(argv[3]);
int count = 0;
while (++count < cycle) {
time_point_begin.emplace_back(std::chrono::steady_clock::now());
{
std::vector<int, StaticVectorAllocator<int>> vec1;
std::vector<int, StaticVectorAllocator<int>> vec2;
std::vector<int, StaticVectorAllocator<int>> vec3;
std::vector<int, StaticVectorAllocator<int>> vec4;
}
time_point_end.emplace_back(std::chrono::steady_clock::now());
usleep(interval);
}
std::ofstream result_file_begin;
result_file_begin.open("begin.txt", std::ios_base::app);
for (auto item : time_point_begin) {
result_file_begin << (std::chrono::time_point_cast<std::chrono::microseconds>(item)).time_since_epoch().count() << std::endl;
}
std::ofstream result_file_end;
result_file_end.open("end.txt", std::ios_base::app);
for (auto item : time_point_end) {
result_file_end << (std::chrono::time_point_cast<std::chrono::microseconds>(item)).time_since_epoch().count() << std::endl;
}
result_file_begin.close();
result_file_end.close();
}
按照10000次的规模,2ms间隔。
开启-O3优化后,平均开销0.2286微秒
不开启任何优化,平均开销1.2648微秒
从性能开销上看,这段代码并未体现任何优势。