MySQL Page读取和淘汰过程分析

目录

Page磁盘读取过程

申请空闲PAGE空间

刷脏页和LRU链表

Page内存读取和Page淘汰的互斥


Page磁盘读取过程

buf_page_get_gen
|    | ==> rw_lock_s_lock(hash_lock);
|    | ==> block = (buf_block_t*) buf_page_hash_get_low(buf_pool, space, offset, fold);
|    | ==> if (block == NULL) rw_lock_s_unlock(hash_lock); 
|    | ==> //(MM)从存储读取PAGE到BP
|    | ==> buf_read_page(space, zip_size, offset)  
|        | ==> count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
    	                                 zip_size, FALSE, tablespace_version, offset);
|            | ==> //从BP中申请空闲page空间
|            | ==> bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip, tablespace_version, offset);
|                | ==> block = NULL;
|                | ==> buf_pool_mutex_enter(buf_pool);
|                | ==> rw_lock_x_lock(hash_lock);
|                | ==> watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
|                | ==> //释放了hashlock,还持有BP lock
|                | ==> rw_lock_x_unlock(hash_lock);
|                | ==> //申请空闲内存块
|                | ==> data = buf_buddy_alloc(buf_pool, zip_size, &lru);
|                    | ==> buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru));
|                        | ==> //从Free List中获取内存块
|                        | ==> block = buf_LRU_get_free_only(buf_pool);
|                        | ==> //free list中没有,从LRU中获取,此时要释放BP锁,因为从LRU获取过程中可能会休眠
|                        | ==> buf_pool_mutex_exit(buf_pool);
|                        |
|                        |
|                        | ==> block = buf_LRU_get_free_block(buf_pool);
|                        | ==> //标识从LRU链表中获取
|                        | ==> *lru = TRUE;
|                        | ==> //获取成功后重新加锁
|                        | ==> buf_pool_mutex_enter(buf_pool);
|                | ==> rw_lock_x_lock(hash_lock);
|                | ==> //如果从LRU获取的话,需要重新CHECK hash表中是否有该Page,因为获取过程中会释放BP MUTEX
|                | ==> /* If buf_buddy_alloc() allocated storage from the LRU list, it released and reacquired buf_pool->mutex.  Thus, we must check the page_hash again, as it may have been modified. */
|                | ==> if (UNIV_UNLIKELY(lru)) {
|                    | ==> watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
|                | ==> if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) rw_lock_x_unlock(hash_lock); watch_page = NULL; buf_buddy_free(buf_pool, data, zip_size);
|                | ==> //(MM)插入到HASH表
|                | ==> HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
|                | ==> rw_lock_x_unlock(hash_lock);
|                | ==> //(MM)插入到LRU链表
|                | ==> buf_LRU_add_block(bpage, TRUE/* to old blocks */);
|                | ==> buf_pool_mutex_exit(buf_pool);
|    | ==> //增加引用计数
|    | ==> buf_block_fix(fix_block);
|        | ==> #ifdef PAGE_ATOMIC_REF_COUNT
|        | ==> 	os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
|        | ==> #else
|        | ==> 	ib_mutex_t*	block_mutex = buf_page_get_mutex(&block->page);
|        | ==> 	mutex_enter(block_mutex);
|        | ==> 	++block->page.buf_fix_count;
|        | ==> 	mutex_exit(block_mutex);
|    | ==> rw_lock_s_unlock(hash_lock);
|    | ==> buf_wait_for_read(fix_block);

过程是:

  1. 对Page_Hash加s锁,判断是否存在。
  2. 如果不存在,释放s锁,加bp的互斥锁、加x锁。再次判断是否存在。
  3. 如果不存在,释放x锁,申请空闲Block。
  4. 然后加x锁,判断是否存在,如果不存在,插入空闲BLock。释放x锁
  5. 把block加入lru链表。释放bp的互斥锁。

步骤3释放x锁的原因是,线程持有bp的互斥锁,对lru\freelist的操作都依靠互斥锁。因此可以通过bp的互斥锁,阻塞其他线程申请空闲Block。但是在从Lru申请BLock过程中,可能出现由于休眠等待需要释放互斥锁,而导致其他线程进入,因此第4步根据空闲BLock的来源判断是否需要重新Check。

可以看到,从磁盘读取过程中,对hash的互斥成本是比较高的,加了2次x锁。这个地方后续版本中有优化。

申请空闲PAGE空间

buf_LRU_get_free_block
|    | ==> buf_pool_mutex_enter(buf_pool);
|    | ==> //(MM)从free_list中获取Page空间
|    | ==> block = buf_LRU_get_free_only(buf_pool);
|        | ==> UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
|    | ==> //(MM)如果没有获取到,从LRU链表中获取非脏的PAGE空间
|    | ==> freed = buf_LRU_scan_and_free_block(buf_pool, n_iterations > 0);
|    | ==> //(MM)如果没有,从LLU刷一个脏页下去
|    | ==> buf_flush_single_page_from_LRU(buf_pool)



buf_LRU_scan_and_free_block(buf_pool, n_iterations > 0)
|    | ==> //尝试从解压缩页中获取空间
|    | ==> buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all)
|    | ==> //从LRU中获取非脏页面
|    | ==> buf_LRU_free_from_common_LRU_list(buf_pool, scan_all)
|        | ==> //从后向前遍历LRU链表
|        | ==> for (bpage = UT_LIST_GET_LAST(buf_pool->LRU), scanned = 1, freed = FALSE;
					bpage != NULL && !freed && (scan_all || scanned < srv_LRU_scan_depth); ++scanned) {
|        | ==> accessed = buf_page_is_accessed(bpage);
|        | ==> //尝试从LRU链表中转移一个PAGE到FREE LIST
|        | ==> freed = buf_LRU_free_page(bpage, true);
|            | ==> //判断页面是否有引用或者在IO过程中
|            | ==> if (!buf_page_can_relocate(bpage)) exit
|            | ==> //判断页面是否有修改
|            | ==> if (bpage->oldest_modification) exit
|            | ==> //从LRU链表中删除一条PAGE空间,如果是BUF_BLOCK_FILE_PAGE类型的PAGE,要在外面调用函数加入FREELIST
|            | ==> buf_LRU_block_remove_hashed(bpage, zip)
|                | ==> buf_LRU_remove_block(bpage);
|                    | ==> UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
|                | ==> //从HASH表中删除
|                | ==> HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
|            | ==> //把Page加入到FREE LIST
|            | ==> buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
|                | ==> buf_LRU_block_free_non_file_page(block);
|                    | ==> memset
|                    | ==> UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
|        | ==> }



buf_flush_single_page_from_LRU(buf_pool)
|    | ==> for (bpage = UT_LIST_GET_LAST(buf_pool->LRU), scanned = 1;
				bpage != NULL;
				bpage = UT_LIST_GET_PREV(LRU, bpage), ++scanned) {
|    | ==> //判断页面是否有修改且是否在IO中
|    | ==> if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)
|    | ==> //刷脏页(同步IO)
|    | ==> buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, true);
|        | ==> buf_page_set_io_fix(bpage, BUF_IO_WRITE);
|        | ==> //如果是BUF_FLUSH_LIST,可能会走下面函数,此处有三个且条件
|        | ==> buf_dblwr_flush_buffered_writes();
|        | ==> buf_flush_write_block_low(bpage, flush_type, sync);
|            | ==> if (flush_type == BUF_FLUSH_SINGLE_PAGE) buf_dblwr_write_single_page(bpage, sync);
|                | ==> //持久化dblwr
|                | ==> fil_flush(TRX_SYS_SPACE);
|                | ==> //持久化数据页
|                | ==> buf_dblwr_write_block_to_datafile(bpage, sync);
|                    | ==> //调IO接口
|                    | ==> fil_io(flags, sync, buf_block_get_space(block), 0,
									buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
									(void*) block->frame, (void*) block)
|                        | ==> //调AIO接口
|                        | ==> ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, offset, len, node, message);
|                            | ==> os_aio_func
|                                | ==> //如果同步IO,读调用os_file_read_func,写调用os_file_write_func
|                                | ==> //如果异步IO,走异步IO系统
|                                | ==> os_aio_simulated_wake_handler_thread
|                        | ==> //如果是同步IO,进行IO_WAIT
|                        | ==> fil_node_complete_io(node, fil_system, type);
|            | ==> //关闭DBLWR场景
|            | ==> fil_io
|            | ==> //其他场景,通过dblwr持久化数据页
|            | ==> buf_dblwr_add_to_batch(bpage);
|            | ==> //同步IO场景,执行IO完成后操作
|            | ==> if (sync) {
|            | ==> fil_flush(buf_page_get_space(bpage));
|            | ==> buf_page_io_complete(bpage);
|                | ==> buf_flush_write_complete(bpage)
|                    | ==> //从FLU链表中移出
|                    | ==> buf_flush_remove(bpage);
|            | ==> }
|    | ==> }
|    | ==> //因为刚刷完脏页,重新遍历LRU链表,尝试把转移一个PAGE到FREE_LIST
|    | ==> ready = buf_flush_ready_for_replace(bpage);
|    | ==> freed = buf_LRU_free_page(bpage, evict_zip);

 

刷脏页和LRU链表

buf_flush_page_cleaner_thread
|    | ==> //依次遍历每个Buffer pool instance,从LRU尾部开始扫描,直到第srv_LRU_scan_depth个page停止,
|    | ==> //按批次刷LRU,每次期望刷100个page(PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE), 每个Bp会进行srv_LRU_scan_depth/100次循环
|    | ==> buf_flush_LRU_tail
|        | ==> for (ulint i = 0; i < srv_buf_pool_instances; i++) {
|        | ==> scan_depth = UT_LIST_GET_LEN(buf_pool->LRU);
|        | ==> //LRU释放PAGE到FREE LIST
|        | ==> buf_flush_LRU(buf_pool, PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE, &n_flushed)
|            | ==> buf_flush_start(buf_pool, BUF_FLUSH_LRU)
|            | 
|            |
|            | ==> buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
|                | ==> case BUF_FLUSH_LRU: buf_do_LRU_batch(buf_pool, min_n);
|                    | ==> if (buf_LRU_evict_from_unzip_LRU(buf_pool)) buf_free_from_unzip_LRU_list_batch(buf_pool, max);
|                    | ==> //
|                    | ==> buf_flush_LRU_list_batch(buf_pool, max - count);
|                        | ==> //没有引用、没有IO、没有变更
|                        | ==> evict = buf_flush_ready_for_replace(bpage);
|                        | ==> //尝试从LRU链表中转移PAGE到FREE LIST
|                        | ==> if (evict) buf_LRU_free_page(bpage, true);
|                        | ==> //把这个PAGE持久化
|                        | ==> else buf_flush_page_and_try_neighbors(bpage, BUF_FLUSH_LRU, max, &count);
|                            | ==> //判断这个PAGE是否需要持久化
|                            | ==> buf_flush_ready_for_flush(bpage, flush_type)
|                            | ==> //持久化这个PAGE
|                            | ==> buf_flush_try_neighbors(space, offset, flush_type, *count, n_to_flush)
|                                | ==> buf_flush_page(buf_pool, bpage, flush_type, false)
|            | ==> buf_flush_end(buf_pool, BUF_FLUSH_LRU);
|        | ==> }
|    | ==> //判断是需要刷脏页
|    | ==> page_cleaner_flush_pages_if_needed
|        | ==> //number of pages thatwe should attempt to flush, LSN up to which flushing must happen
|        | ==> page_cleaner_do_flush_batch(ulint n_to_flush, lsn_t lsn_limit)
|            | ==> buf_flush_list(n_to_flush, lsn_limit, &n_flushed);
|                | ==> //依次为每个BP执行
|                | ==> buf_flush_batch(buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit)
|                    | ==> count = buf_do_flush_list_batch(buf_pool, min_n, lsn_limit);
|                        | ==> //从FLU链表的尾部开始遍历,跳出条件时候刷脏的PAGE数量和LSN
|                        | ==> buf_flush_set_hp(buf_pool, prev);
|                        | ==> buf_flush_list_mutex_exit(buf_pool);
|                        | ==> buf_flush_page_and_try_neighbors(bpage, BUF_FLUSH_LIST, min_n, &count);
|                        | ==> buf_flush_list_mutex_enter(buf_pool);
|                        | ==> buf_flush_is_hp(buf_pool, prev)

 

Page内存读取和Page淘汰的互斥

buf_flush_batch
|    | ==> //加LRU LOCK
|    | ==> buf_pool_mutex_enter(buf_pool);
|    | ==> count = buf_do_LRU_batch(buf_pool, min_n);
|        | ==> buf_flush_LRU_list_batch(buf_pool, max - count);
|            | ==> bpage = UT_LIST_GET_LAST(buf_pool->LRU);
|            | ==> mutex_enter(block_mutex);
|            | ==> evict = buf_flush_ready_for_replace(bpage);
|                | ==> bpage->buf_fix_count == 0
|            | ==> mutex_exit(block_mutex);
|            | ==> if (evict) buf_LRU_free_page
|                | ==> rw_lock_x_lock(hash_lock);
|                | ==> mutex_enter(block_mutex);
|                | ==> //重新判断引用计数
|                | ==> buf_page_can_relocate
|                | ==> //从LRU和HASH表删除
|                | ==> buf_LRU_block_remove_hashed
|                    | ==> buf_LRU_remove_block(bpage);
|                    | ==> HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
|                    | ==> rw_lock_x_unlock(hash_lock);
|                | ==> buf_pool_mutex_exit(buf_pool);
|                | ==> buf_pool_mutex_enter(buf_pool);
|                | ==> buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
|                    | ==> buf_LRU_block_free_non_file_page(block);
|                        | ==> UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
|    | ==> buf_pool_mutex_exit(buf_pool);

Page淘汰过程如下:

  • 加BP的互斥锁
  • 从LRU链表的获取最老的数据页
  • 确认Page是否可以被淘汰
  • 如果可以淘汰,加hash_page的x锁,此时重新判断,因为前述判断无法拦截对Page的访问,因此此时对Page_hash加x锁,进行准确的判断。
  • 如果确实可以淘汰,从Page_Hash中删除,释放hash的x锁。
  • 将Page加入空闲链表

 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值