void flush(uint64_t saddr, uint64_t size, uint64_t cacheline_size)
{
uint64_t addr;
uint64_t flush_size = 0;
while(flush_size < size){
__asm__ __volatile__("cbo.flush (%0)"::"r"(saddr+flush_size));
flush_size += cacheline_size;
}
}