6.S081-Lab8

pppgr

已于 2023-05-13 10:55:08 修改

阅读量171

点赞数

文章标签： c++ java 开发语言 linux 系统架构

于 2023-05-13 10:35:47 首次发布

本文链接：https://blog.csdn.net/XHGHQHWWK/article/details/130619108

版权

文章介绍了对内存分配器（kalloc,kfree）和块缓存（bcache）的改进，以减少锁的争用。通过对kalloc和kfree的调整，每个CPU绑定自己的内存列表，减少了锁的使用。同时，bcache的改进引入了哈希分桶策略，降低了不同进程访问缓冲区时的冲突，改善了并发性能。测试结果显示锁的争用情况显著降低，达到了更好的并发效果。

摘要由CSDN通过智能技术生成

Lab: locks

课程地址

Memory allocator

这个就是让你每一个cpu绑定一个空闲的内存列表挺简单的直接修改kalloc就好了


// Free the page of physical memory pointed at by pa,
// which normally should have been returned by a
// call to kalloc().  (The exception is when
// initializing the allocator; see kinit above.)
void
kfree(void *pa)
{
  struct run *r;
  if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
    panic("kfree");

  // Fill with junk to catch dangling refs.
  memset(pa, 1, PGSIZE);
  r = (struct run*)pa;
  push_off();
    int id = cpuid();
  pop_off();  
  acquire(&kmem[id].lock);
  r->next =kmem[id].freelist;
 kmem[id].freelist= r;
  release(&kmem[id].lock);
 
}

// Allocate one 4096-byte page of physical memory.
// Returns a pointer that the kernel can use.
// Returns 0 if the memory cannot be allocated.
void *
kalloc(void)
{
  struct run *r;
 
  push_off();
      int id = cpuid();
   pop_off();    
  //拿到当前cpu对应的锁 和free list 
  acquire(&kmem[id].lock);
  r=kmem[id].freelist;
  if(r){
    kmem[id].freelist =r->next;
    release(&kmem[id].lock);
  }else{
  // safe for dead lock cuz kalloc could be invoke mutilple thread  
   release(&kmem[id].lock);
     // if don't find any free with this core try steal other free space from other cpu core
    for(int i=0;i< NCPU;i++){
        if(i==id)
        continue;
        
     acquire(&kmem[i].lock);
       if(kmem[i].freelist){
          r=kmem[i].freelist;
          kmem[i].freelist =r->next;
           release(&kmem[i].lock);
          break;
       }
     release(&kmem[i].lock);
    }
  }
 
  if(r)
    memset((char*)r, 5, PGSIZE); // fill with junk

  return (void*)r;
}

init: starting sh
$ kalloctest
start test1
test1 results:
--- lock kmem/bcache stats
lock: kmem: #test-and-set 0 #acquire() 34766
lock: kmem: #test-and-set 0 #acquire() 200666
lock: kmem: #test-and-set 0 #acquire() 197662
lock: bcache: #test-and-set 0 #acquire() 356
--- top 5 contended locks:
lock: proc: #test-and-set 247886 #acquire() 441378
lock: proc: #test-and-set 152575 #acquire() 441378
lock: proc: #test-and-set 108767 #acquire() 841508
lock: proc: #test-and-set 81825 #acquire() 841528
lock: proc: #test-and-set 67813 #acquire() 441378
tot= 0
test1 OK
start test2
total free number of pages: 32497 (out of 32768)

.....
test2 OK
start test3
child done 1
child done 100000
test3 OK

Buffer cache

修改块缓存，使bcache中所有锁的获取循环迭代次数在运行bcachetest时接近于零。理想情况下，块缓存中涉及的所有锁的计数之和应该为零，但如果总和小于500也可以。修改bget和brelse，使bcache中不同块的并发查找和释放不太可能在锁上发生冲突（例如，不必等待bcache.lock）。您必须保持不变，即每个块最多缓存一个副本。完成后，您的输出应该与下面显示的类似（尽管不完全相同）。确保“usertests-q”仍然通过。完成后，成绩应该通过所有测试。

在完成之前的输出是这样的

$ bcachetest
start test0
test0 results:
--- lock kmem/bcache stats
lock: kmem: #test-and-set 0 #acquire() 33035
lock: bcache: #test-and-set 16142 #acquire() 65978
--- top 5 contended locks:
lock: virtio_disk: #test-and-set 162870 #acquire() 1188
lock: proc: #test-and-set 51936 #acquire() 73732
lock: bcache: #test-and-set 16142 #acquire() 65978
lock: uart: #test-and-set 7505 #acquire() 117
lock: proc: #test-and-set 6937 #acquire() 73420
tot= 16142
test0: FAIL
start test1
test1 OK

大致思路和kalloc 很像，不过多了一个lru的逻辑，

修改的逻辑把head 替换成table的形式就很类似了

结构体结构


#define TABLESIZE 13

struct {
  // 简单来说就是 head 操作 buf ，然后不同的进程访问buf时现在的模式下必须做全局等待，所以就引入bucket的思想，拆分成多个
  //然后 对于buftable来说 淘汰策略就是根据last visit time trap ticks

  struct buf buf[NBUF];
  struct spinlock locks[TABLESIZE];
  struct spinlock tablelock;
  struct buf buftable[TABLESIZE];

  // Linked list of all buffers, through prev/next.
  // Sorted by how recently the buffer was used.
  // head.next is most recent, head.prev is least.
  // struct buf head;
} bcache;

定长hash分桶

uint hash(uint block){
  return block % TABLESIZE;
}

binit

初始化头节点和表锁和bucket级别的锁


void
binit(void)
{
  struct buf *b;

   initlock(&bcache.tablelock, "bache.tablelock");

    for (int i = 0; i < TABLESIZE; i++) {
    initlock(&(bcache.locks[i]), "bcache.hash");
  }

   for (int i = 0; i < TABLESIZE; i++) {
    bcache.buftable[i].next = &bcache.buftable[i];
    bcache.buftable[i].prev = &bcache.buftable[i];
  }

    int i=0;
  for(b = bcache.buf; b < bcache.buf+NBUF; b++,i++){
    uint index= hash(i);
    b->next = bcache.buftable[index].next;
    b->prev = &bcache.buftable[index];
    initsleeplock(&b->lock, "buffer");
    bcache.buftable[index].next->prev = b;
    bcache.buftable[index].next = b;
  }
}

brelse,bpin,bunpin

所要做的工作就是正确的加锁，额外的工作是在brelse中记录下访问的时间。


// Release a locked buffer.
// Move to the head of the most-recently-used list.
void
brelse(struct buf *b)
{
  if(!holdingsleep(&b->lock))
    panic("brelse");

  releasesleep(&b->lock);

  int id = hash(b->blockno);
  acquire(&bcache.locks[id]);
  b->refcnt--; 
  b->last_visit_time=ticks; 
  
  release(&bcache.locks[id]);
  
}

bget

bget的功能可以分为以下几个部分：

如果已经被载入了直接从cachebuf中获取
如果没有载入则尝试从当前的bucket上寻找一个空闲块，并且给这个空闲块分配对应的设备和blockno
如果当前的bucket上没有空闲的块那么就需要线性化（意味着这里需要table lock）的去整个table上寻找空闲块（refcnt==0），在这里实现了lru的策略去找最长未被使用的的块。
找到了这个空闲块以后将它从当前的bucket上分离挂在到当前的bucket，并且给这个块分配对应的设备和blockno
如果还没有任何空闲的块可以分配则panic

在这里实现的lru ticks只是给出了需要记录的位置，单纯说按照作业要求只用在release中记录，因为在该作业中一旦没有空闲的块以后不会做淘汰策略，然后试图分配新的block而是panic。

static struct buf*
bget(uint dev, uint blockno)
{
  struct buf *b;
  struct buf *free_space=0;

  uint index=hash(blockno);

  acquire(&bcache.locks[index]);

  // Is the block already cached?
  for(b = bcache.buftable[index].next; b != &bcache.buftable[index]; b = b->next){
    if(b->dev == dev && b->blockno == blockno){
      b->refcnt++;
      //refreash tick
      b->last_visit_time=ticks;
      release(&bcache.locks[index]);
      acquiresleep(&b->lock);
      return b;
    }
    if(b->refcnt==0){
      free_space=b;
    }
  }
   //got  free one 
  if(free_space){
    free_space->dev = dev;
      free_space->blockno = blockno;
      free_space->valid = 0;
      free_space->refcnt = 1;
      free_space->last_visit_time=ticks;

      release(&bcache.locks[index]);
      acquiresleep(&free_space->lock);
      return free_space;
  }
 
    release(&bcache.locks[index]);

   //serialize finding an unused buf in bget 
    acquire(&bcache.tablelock);

  // Not cached.
  // Recycle the least recently used (LRU) unused buffer.
  //(i.e., the part of bget that selects a buffer to re-use when a lookup misses in the cache).
   uint time = __UINT32_MAX__;
    struct buf *last = 0;
    int i = hash(index + 1);
  for(; i != index; i = hash(i + 1)){
    // cache miss case should use table lock 
    //  acquire(&bcache.locks[i]);
    // for(b=bcache.buftable[i].prev;b!=&bcache.buftable[i];b=b->prev){
     for(b = bcache.buftable[i].next; b != &bcache.buftable[i]; b = b->next){
        if(b->refcnt ==0){
          if(b->last_visit_time<time){
            time=b->last_visit_time;
            last=b;
          }
        }
    } 
  }  
   if(!last){
      // no free block
     panic("bget: no buffers");
    }

  
    //detach from other bucket
   last->next->prev=last->prev; 
   last->prev->next=last->next; 
  
  //link to current bucket 
  last->next=bcache.buftable[index].next;
  last->prev=&bcache.buftable[index];

  bcache.buftable[index].next->prev = last;
  bcache.buftable[index].next = last;

  

      last->dev = dev;
      last->blockno = blockno;
      last->valid = 0;
      last->refcnt = 1;
      last->last_visit_time=ticks;
   
      release(&bcache.tablelock);
      acquiresleep(&last->lock);
      return last;
}

结果

降低很多的锁争用情况，有一个不错的竞争表现。

xv6 kernel is booting

hart 2 starting
hart 1 starting
init: starting sh
$ bcachetest
start test0
test0 results:
--- lock kmem/bcache stats
lock: kmem: #test-and-set 0 #acquire() 32932
lock: kmem: #test-and-set 0 #acquire() 93
lock: kmem: #test-and-set 0 #acquire() 69
lock: bcache.hash: #test-and-set 0 #acquire() 4120
lock: bcache.hash: #test-and-set 0 #acquire() 4122
lock: bcache.hash: #test-and-set 0 #acquire() 2258
lock: bcache.hash: #test-and-set 0 #acquire() 4270
lock: bcache.hash: #test-and-set 0 #acquire() 2254
lock: bcache.hash: #test-and-set 0 #acquire() 4250
lock: bcache.hash: #test-and-set 0 #acquire() 4598
lock: bcache.hash: #test-and-set 0 #acquire() 7348
lock: bcache.hash: #test-and-set 0 #acquire() 6176
lock: bcache.hash: #test-and-set 0 #acquire() 6176
lock: bcache.hash: #test-and-set 0 #acquire() 6180
lock: bcache.hash: #test-and-set 0 #acquire() 6178
lock: bcache.hash: #test-and-set 0 #acquire() 6178
--- top 5 contended locks:
lock: virtio_disk: #test-and-set 699367 #acquire() 1205
lock: proc: #test-and-set 50962 #acquire() 114762
lock: proc: #test-and-set 49858 #acquire() 114782
lock: proc: #test-and-set 49843 #acquire() 114768
lock: proc: #test-and-set 38723 #acquire() 114799
tot= 0
test0: OK
start test1
test1 OK