内存分配算法分析与内存泄露实践

最新推荐文章于 2024-07-18 10:28:27 发布

sf-linux kernel

最新推荐文章于 2024-07-18 10:28:27 发布

阅读量73

点赞数 1

文章标签： windows

本文链接：https://blog.csdn.net/qq_48907036/article/details/134062659

版权

内存分配的主要算法为伙伴算法，其次有slab、slub和slob算法

1、请分析5.0以后版本的伙伴算法，给出流程图和心得体会

如下分析的都是5.1版本的内核代码：
分区伙伴分配器：文件在(include/linux/mmzone.h)中

//主要描述的就是相同尺寸的内存块在伙伴系统中的组织结构
struct free_area {
	struct list_head	free_list[MIGRATE_TYPES];
	unsigned long		nr_free;//表示的是该尺寸的内存块在当前伙伴系统中的个数
	//这个数值会随着内存的分配而减少，随着内存的回收而增加
};
//nr_tree表示的可不是空闲内存页 page 的个数，而是空闲内存块的个数，对于 0 阶的内存块来说 nr_free 确实表示的是单个内存页 page 的个数，因为 0 阶内存块是由一个 page 组成的，但是对于 1 阶内存块来说，nr_free 则表示的是 2 个 page 集合的个数，以此类推对于 n 阶内存块来说，nr_free 表示的是 2 的 n 次方 page 集合的个数 。

（include/linux/mmzone.h)

struct zone {
	/* Read-mostly fields */

	/* zone watermarks, access with *_wmark_pages(zone) macros */
	unsigned long _watermark[NR_WMARK];
	unsigned long watermark_boost;

	unsigned long nr_reserved_highatomic;

	/*
	 * We don't know if the memory that we're going to allocate will be
	 * freeable or/and it will be released eventually, so to avoid totally
	 * wasting several GB of ram we must reserve some of the lower zone
	 * memory (otherwise we risk to run OOM on the lower zones despite
	 * there being tons of freeable ram on the higher zones).  This array is
	 * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
	 * changes.
	 */
	long lowmem_reserve[MAX_NR_ZONES];

#ifdef CONFIG_NUMA
	int node;
#endif
	struct pglist_data	*zone_pgdat;
	struct per_cpu_pageset __percpu *pageset;

#ifndef CONFIG_SPARSEMEM
	/*
	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
	 * In SPARSEMEM, this map is stored in struct mem_section
	 */
	unsigned long		*pageblock_flags;
#endif /* CONFIG_SPARSEMEM */

	/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
	unsigned long		zone_start_pfn;

	/*
	 * spanned_pages is the total pages spanned by the zone, including
	 * holes, which is calculated as:
	 * 	spanned_pages = zone_end_pfn - zone_start_pfn;
	 *
	 * present_pages is physical pages existing within the zone, which
	 * is calculated as:
	 *	present_pages = spanned_pages - absent_pages(pages in holes);
	 *
	 * managed_pages is present pages managed by the buddy system, which
	 * is calculated as (reserved_pages includes pages allocated by the
	 * bootmem allocator):
	 *	managed_pages = present_pages - reserved_pages;
	 *
	 * So present_pages may be used by memory hotplug or memory power
	 * management logic to figure out unmanaged pages by checking
	 * (present_pages - managed_pages). And managed_pages should be used
	 * by page allocator and vm scanner to calculate all kinds of watermarks
	 * and thresholds.
	 *
	 * Locking rules:
	 *
	 * zone_start_pfn and spanned_pages are protected by span_seqlock.
	 * It is a seqlock because it has to be read outside of zone->lock,
	 * and it is done in the main allocator path.  But, it is written
	 * quite infrequently.
	 *
	 * The span_seq lock is declared along with zone->lock because it is
	 * frequently read in proximity to zone->lock.  It's good to
	 * give them a chance of being in the same cacheline.
	 *
	 * Write access to present_pages at runtime should be protected by
	 * mem_hotplug_begin/end(). Any reader who can't tolerant drift of
	 * present_pages should get_online_mems() to get a stable value.
	 */
	 //被伙伴系统所管理的物理内存页个数
	atomic_long_t		managed_pages;
	unsigned long		spanned_pages;
	unsigned long		present_pages;

	const char		*name;

#ifdef CONFIG_MEMORY_ISOLATION
	/*
	 * Number of isolated pageblock. It is used to solve incorrect
	 * freepage counting problem due to racy retrieving migratetype
	 * of pageblock. Protected by zone->lock.
	 */
	unsigned long		nr_isolate_pageblock;
#endif

#ifdef CONFIG_MEMORY_HOTPLUG
	/* see spanned/present_pages for more description */
	seqlock_t		span_seqlock;
#endif

	int initialized;

	/* Write-intensive fields used from the page allocator */
	ZONE_PADDING(_pad1_)

	/* free areas of different sizes */
	//伙伴系统的核心数据结构
	struct free_area	free_area[MAX_ORDER];

	/* zone flags, see below */
	unsigned long		flags;

	/* Primarily protects free_area */
	spinlock_t		lock;

	/* Write-intensive fields used by compaction and vmstats. */
	ZONE_PADDING(_pad2_)

	/*
	 * When free pages are below this point, additional steps are taken
	 * when reading the number of free pages to avoid per-cpu counter
	 * drift allowing watermarks to be breached
	 */
	unsigned long percpu_drift_mark;

#if defined CONFIG_COMPACTION || defined CONFIG_CMA
	/* pfn where compaction free scanner should start */
	unsigned long		compact_cached_free_pfn;
	/* pfn where async and sync compaction migration scanner should start */
	unsigned long		compact_cached_migrate_pfn[2];
	unsigned long		compact_init_migrate_pfn;
	unsigned long		compact_init_free_pfn;
#endif

#ifdef CONFIG_COMPACTION
	/*
	 * On compaction failure, 1<<compact_defer_shift compactions
	 * are skipped before trying again. The number attempted since
	 * last failure is tracked with compact_considered.
	 */
	unsigned int		compact_considered;
	unsigned int		compact_defer_shift;
	int			compact_order_failed;
#endif

#if defined CONFIG_COMPACTION || defined CONFIG_CMA
	/* Set to true when the PG_migrate_skip bits should be cleared */
	bool			compact_blockskip_flush;
#endif

	bool			contiguous;

	ZONE_PADDING(_pad3_)
	/* Zone statistics */
	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
	atomic_long_t		vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
} ____cacheline_internodealigned_in_smp;

伙伴算法：如下分析的是在linux5.1版本当中，mm/page_allow.c

static struct page *
get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
						const struct alloc_context *ac)
{
	struct zoneref *z;
	struct zone *zone;
	struct pglist_data *last_pgdat_dirty_limit = NULL;
	bool no_fallback;

retry:
	/*
	 * Scan zonelist, looking for a zone with enough free.
	 * See also __cpuset_node_allowed() comment in kernel/cpuset.c.
	 */
	no_fallback = alloc_flags & ALLOC_NOFRAGMENT;
	z = ac->preferred_zoneref;
	for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
								ac->nodemask) {
		struct page *page;
		unsigned long mark;

		if (cpusets_enabled() &&
			(alloc_flags & ALLOC_CPUSET) &&
			!__cpuset_zone_allowed(zone, gfp_mask))
				continue;
		/*
		 * When allocating a page cache page for writing, we
		 * want to get it from a node that is within its dirty
		 * limit, such that no single node holds more than its
		 * proportional share of globally allowed dirty pages.
		 * The dirty limits take into account the node's
		 * lowmem reserves and high watermark so that kswapd
		 * should be able to balance it without having to
		 * write pages from its LRU list.
		 *
		 * XXX: For now, allow allocations to potentially
		 * exceed the per-node dirty limit in the slowpath
		 * (spread_dirty_pages unset) before going into reclaim,
		 * which is important when on a NUMA setup the allowed
		 * nodes are together not big enough to reach the
		 * global limit.  The proper fix for these situations
		 * will require awareness of nodes in the
		 * dirty-throttling and the flusher threads.
		 */
		if (ac->spread_dirty_pages) {
			if (last_pgdat_dirty_limit == zone->zone_pgdat)
				continue;

			if (!node_dirty_ok(zone->zone_pgdat)) {
				last_pgdat_dirty_limit = zone->zone_pgdat;
				continue;
			}
		}

		if (no_fallback && nr_online_nodes > 1 &&
		    zone != ac->preferred_zoneref->zone) {
			int local_nid;

			/*
			 * If moving to a remote node, retry but allow
			 * fragmenting fallbacks. Locality is more important
			 * than fragmentation avoidance.
			 */
			local_nid = zone_to_nid(ac->preferred_zoneref->zone);
			if (zone_to_nid(zone) != local_nid) {
				alloc_flags &= ~ALLOC_NOFRAGMENT;
				goto retry;
			}
		}

		mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
		if (!zone_watermark_fast(zone, order, mark,
				       ac_classzone_idx(ac), alloc_flags)) {
			int ret;

#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
			/*
			 * Watermark failed for this zone, but see if we can
			 * grow this zone if it contains deferred pages.
			 */
			if (static_branch_unlikely(&deferred_pages)) {
				if (_deferred_grow_zone(zone, order))
					goto try_this_zone;
			}
#endif
			/* Checked here to keep the fast path fast */
			BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
			if (alloc_flags & ALLOC_NO_WATERMARKS)
				goto try_this_zone;

			if (node_reclaim_mode == 0 ||
			    !zone_allows_reclaim(ac->preferred_zoneref->zone, zone))
				continue;

			ret = node_reclaim(zone->zone_pgdat, gfp_mask, order);
			switch (ret) {
			case NODE_RECLAIM_NOSCAN:
				/* did not scan */
				continue;
			case NODE_RECLAIM_FULL:
				/* scanned but unreclaimable */
				continue;
			default:
				/* did we reclaim enough */
				if (zone_watermark_ok(zone, order, mark,
						ac_classzone_idx(ac), alloc_flags))
					goto try_this_zone;

				continue;
			}
		}

try_this_zone:
		page = rmqueue(ac->preferred_zoneref->zone, zone, order,
				gfp_mask, alloc_flags, ac->migratetype);
		if (page) {
			prep_new_page(page, order, gfp_mask, alloc_flags);

			/*
			 * If this is a high-order atomic allocation then check
			 * if the pageblock should be reserved for the future
			 */
			if (unlikely(order && (alloc_flags & ALLOC_HARDER)))
				reserve_highatomic_pageblock(page, zone, order);

			return page;
		} else {
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
			/* Try again if zone has deferred pages */
			if (static_branch_unlikely(&deferred_pages)) {
				if (_deferred_grow_zone(zone, order))
					goto try_this_zone;
			}
#endif
		}
	}

	/*
	 * It's possible on a UMA machine to get through all zones that are
	 * fragmented. If avoiding fragmentation, reset and try again.
	 */
	if (no_fallback) {
		alloc_flags &= ~ALLOC_NOFRAGMENT;
		goto retry;
	}

	return NULL;
}

该函数主要是遍历各个内存管理zonelist以尝试页面申请，其中for_each_zone_zonelist_nodemask()则是用于遍历zonelist的，每个内存管理区尝试申请前，都将检查内存管理区是否有可分配的内存空间、根据alloc_flags判断当前CPU是否允许在该内存管理区zone中申请以及做watermask水印检查以判断zone中的内存是否足够等。

如下是rmqueue函数的实现过程：

static __always_inline struct page *
__rmqueue(struct zone *zone, unsigned int order, int migratetype,
						unsigned int alloc_flags)
{
	struct page *page;

retry:
	page = __rmqueue_smallest(zone, order, migratetype);
	if (unlikely(!page)) {
		if (migratetype == MIGRATE_MOVABLE)
			page = __rmqueue_cma_fallback(zone, order);

		if (!page && __rmqueue_fallback(zone, order, migratetype,
								alloc_flags))
			goto retry;
	}

	trace_mm_page_alloc_zone_locked(page, order, migratetype);
	return page;
}

这个函数的目标是从给定的内粗区域中分配一个页面，它首先尝试分配一个页面，如果失败，它会尝试从CMA区域分配，然后是fallback策略。如果所有尝试都失败了，它会重新开始尝试。在每次尝试之间，它都会记录跟踪信息以帮助调试和性能分析。
该函数里面有两个比较重要的关键函数：
1、_rmqueue_smallest()
2、_rmqueue_fallback()
先分析一下_rmqueue_fallback函数()
下面是__rmqueue_fallback代码片段：

static __always_inline bool
__rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
						unsigned int alloc_flags)
{
	struct free_area *area;
	int current_order;
	int min_order = order;
	struct page *page;
	int fallback_mt;
	bool can_steal;

	/*
	 * Do not steal pages from freelists belonging to other pageblocks
	 * i.e. orders < pageblock_order. If there are no local zones free,
	 * the zonelists will be reiterated without ALLOC_NOFRAGMENT.
	 */
	if (alloc_flags & ALLOC_NOFRAGMENT)
		min_order = pageblock_order;

	/*
	 * Find the largest available free page in the other list. This roughly
	 * approximates finding the pageblock with the most free pages, which
	 * would be too costly to do exactly.
	 */
	for (current_order = MAX_ORDER - 1; current_order >= min_order;
				--current_order) {
		area = &(zone->free_area[current_order]);
		fallback_mt = find_suitable_fallback(area, current_order,
				start_migratetype, false, &can_steal);
		if (fallback_mt == -1)
			continue;

		/*
		 * We cannot steal all free pages from the pageblock and the
		 * requested migratetype is movable. In that case it's better to
		 * steal and split the smallest available page instead of the
		 * largest available page, because even if the next movable
		 * allocation falls back into a different pageblock than this
		 * one, it won't cause permanent fragmentation.
		 */
		if (!can_steal && start_migratetype == MIGRATE_MOVABLE
					&& current_order > order)
			goto find_smallest;

		goto do_steal;
	}

	return false;

find_smallest:
	for (current_order = order; current_order < MAX_ORDER;
							current_order++) {
		area = &(zone->free_area[current_order]);
		fallback_mt = find_suitable_fallback(area, current_order,
				start_migratetype, false, &can_steal);
		if (fallback_mt != -1)
			break;
	}

	/*
	 * This should not happen - we already found a suitable fallback
	 * when looking for the largest page.
	 */
	VM_BUG_ON(current_order == MAX_ORDER);

do_steal:
	page = list_first_entry(&area->free_list[fallback_mt],
							struct page, lru);

	steal_suitable_fallback(zone, page, alloc_flags, start_migratetype,
								can_steal);

	trace_mm_page_alloc_extfrag(page, order, current_order,
		start_migratetype, fallback_mt);

	return true;

}

该函数主要用于在无法从原始的内存区域获取所需内存时，寻找其他可用的内存区域。
这是一个内联函数，函数名为_rmqueue，它接受4个参数：一个指向struct zone类型的指针zone，一个整数order，一个整数start_migratetype和一个无符号整数alloc_flags。它的返回值类型时布尔值。
它的执行流程如下图流程图所示：
在这里插入图片描述
下面这段代码是expand()函数：

// expand函数接受6个参数：一个指向内存区域的指针，一个指向页的指针，以及一些用于表示内存范围和类型的整数。  
static inline void expand(struct zone *zone, struct page *page,  
 int low, int high, struct free_area *area,  
 int migratetype)  
{  
 // 计算要分配的内存大小，它是一个2的high次幂（即2的high次方）。  
 unsigned long size = 1 << high;  
  
 // 当high大于low时，循环会继续执行。  
 while (high > low) {  
 // 将area指针向下移动一位，因为我们将要处理的是比当前area更小的内存块。  
 area--;  
 // 将high向下移动一位，因为我们正在处理更小的内存块。  
 high--;  
 // 将size右移一位，因为我们正在处理的是当前size的一半。  
 size >>= 1;  
 // 使用VM_BUG_ON_PAGE宏检查我们是否有一个不合法的内存范围。如果page[size]是不合法的，则程序会终止。  
 VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);  
  
 // 设置页为守卫状态。这允许在将来当buddy被释放时，可以将其合并回分配器。这个函数的成功返回将允许我们继续处理下一个内存块。  
 // 如果set_page_guard函数返回false，我们会继续循环处理下一个内存块。  
 if (set_page_guard(zone, &page[size], high, migratetype))  
 continue;  
  
 // 将页添加到area的free_list中，以便将来可以重新使用它。同时增加area的nr_free计数，并设置页的order。  
 list_add(&page[size].lru, &area->free_list[migratetype]);  
 area->nr_free++;  
 set_page_order(&page[size], high);  
 }  
} // expand函数结束

该函数用于扩展一个内存区域。
简单来说，该代码的主要目的就是将一个较大的内存块分解为较小的内存块，并添加它们到内存区域的free_list中以供将来使用。这是内存管理的一部分。
下面这段代码是_rmqueue_smallest():

static __always_inline  
struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, int migratetype)  
{  
 // 定义一个返回类型为struct page *的函数__rmqueue_smallest，该函数接收一个struct zone类型的指针zone，一个无符号整型变量order和一个整型变量migratetype。  
 // __always_inline表示该函数总是内联，不进行函数调用。这样可以减少函数调用的开销。  
  
 unsigned int current_order;  
 // 定义一个无符号整型变量current_order，用于记录当前检查的内存页的阶。  
  
 struct free_area *area;  
 // 定义一个指向struct free_area类型的指针area，用于访问内存区域的free_area数组。  
  
 struct page *page;  
 // 定义一个指向struct page类型的指针page，用于访问内存页。  
  
 // 循环查找适当大小的页在优先级列表中。  
 for (current_order = order; current_order < MAX_ORDER; ++current_order) {  
 area = &(zone->free_area[current_order]);  
 // 获取当前阶的free_area。  
   
 page = list_first_entry_or_null(&area->free_list[migratetype], struct page, lru);  
 // 获取该阶的优先级列表中的第一个页，如果列表为空则返回null。  
   
 if (!page)  
 continue;  
 // 如果页为null，则继续循环查找下一个阶。  
   
 list_del(&page->lru);  
 // 从列表中删除该页，表示该页现在可以被重新使用。  
   
 rmv_page_order(page);  
 // 删除页的order，因为页的大小可能会改变。  
   
 area->nr_free--;  
 // 将该阶的空闲页数减1，因为现在有一个页被分配出去。  
   
 expand(zone, page, order, current_order, area, migratetype);  
 // 对分配出去的页进行扩展操作，将其分解为更小的内存块并添加到free_area中。  
   
 set_pcppage_migratetype(page, migratetype);  
 // 设置该页的迁移类型，以便将来可以正确地处理该页。  
   
 return page;  
 // 返回被分配的页。  
 }  
   
 return NULL;  
 // 如果无法找到适当大小的页，则返回null。  
}

该函数实现了分配算法的核心功能，首先for()循环其由指定的伙伴管理算法链表order阶开始，如果该阶的链表不为空，则直接通过list_del()从该链表中获取空闲页面以满足申请需要；如果该阶的链表为空，则往更高一阶的链表查找，直到找到链表不为空的一阶，至于找到了最高阶仍为空链表，则申请失败；否则将在找到链表不为空的一阶后，将空闲页面块通过list_del()从链表中摘除出来，然后通过expand()将其对等拆分开来，并将拆分出来的一半空闲部分挂接至低一阶的链表中，直到拆分至恰好满足申请需要的order阶，最后将得到的满足要求的页面返回回去。至此，页面已经分配到了。
在这里插入图片描述

2、https://www.51cto.com/article/645546.html结合文中给出的方法，进行实践，截图（5分），给出相关原理和代码分析，总结，心得体会（10分，可选）

这片文章主要解决的是内存泄漏问题：
“程序向系统申请内存，使用完不需要之后，不释放内存还给系统回收，造成申请的内存被浪费”
怎么去寻找到这些内存地址空间呢？
接下来进行排查思路是：
1、监控系统中每个用户进程消耗的PSS（使用pmap工具（pmap pid））。
PSS：按比例报告的物理内存，比如进程A占用20M物理内存，进程B和进程A共享5M物理内存，那么进程A的PSS就是（20-5）+5/2=17.5M
2、监控/proc/meminfo输出，重点观察Slab使用量和slab对应的/proc/slabinfo信息
3、参考/proc/meminfo输出，计算系统中未被统计的内存变化，比如内核驱动代码
直接调用alloc_page()从buddy中拿走的内存不会被单独统计。
其中在使用slabtop监控slab的使用情况时发现
在这里插入图片描述
在这里用的一个bpftrace进行内存泄漏探测：
1、先设置一个mem_check.c的文件。设置一个有内存泄漏的和一个没有内存泄露的。

#include <stdio.h>
#include <stdlib.h>

int main(){
    char *p1 = NULL;
    char *p2 = NULL;

    for(int i = 0; i < 5; i++)
    {
        p1 = malloc(16);
    }

    for(int i = 0; i < 5; i++)
    {
        p2 = malloc(32);
        free(p2);
    }
    getchar();
    return 0;
}

上面的代码是申请了5次16个字节的内存，没有释放，存在内存泄漏。申请5次32字节的内存，有释放，没存在内存泄漏。那么我们如何通过bpftrace定位呢？
通过bpftrace对mem_check.c进行动态的统计内存的申请和释放，定位内存泄漏的问题。需要对关键的两个接口进行probe—malloc和free,这两个接口的实现在libc中。

2、编译mem_check.c文件，生成可执行文件：
gcc -o mem_check mem_check.c
3、探测mem_check可执行文件
介绍：bpftrace可以对内核态进行探测也可以对用户态进行探测，其中两个探针如下：
内核态探针：kprobe/kretprobe
用户态探针：uprobe/uretprobe
mem_check.c是一个应用程序，属于用户态函数，因此就需要用的是用户态探针：uprobe/uretprobe
通过uprobe探测mem_check.c中的malloc函数，我们单行指令验证，参数格式是uprobe:可执行文件：函数名：
将上面的单行命令变为bpftrace脚本—bpf_test.bt
bpf_test.bt:

BEGIN {
    printf("start probe\n");
}
 
uprobe:/lib/x86_64-linux-gnu/libc.so.6:malloc /comm == "mem_check"/{
 
  printf("malloc size: %d\n", arg0);

}
 
END {
    printf("end probe\n");
}

探测的是mem_check中malloc的内存空间大小。
如下图可以看到mem_check总申请内存的情况，最后一个malloc size 1024是mem_check自动创建输出缓冲区的内存。
在这里插入图片描述
探测mem_check中malloc的返回值：
malloc的返回值是地址，需借助uretprobe进行探测，函数返回值可通过内置retval访问。uretprobe的filter与malloc参数探测时类似，将脚本修改为：

BEGIN {
    printf("start probe\n");
}
 
uprobe:/lib/x86_64-linux-gnu/libc.so.6:malloc /comm == "mem_check"/{
    printf("malloc size: %d\n", arg0);
}
 
uretprobe:/lib/x86_64-linux-gnu/libc.so.6:malloc /comm == "mem_check"/{
  printf("addr = %p\n", retval);
}
 
END {
    printf("end probe\n");
}

运行结果为：
在这里插入图片描述
探测mem_check中free
我们已经探测到mem_check中的malloc的内存大小，内存的地址，通过探测free，然后匹配malloc和free的情况就可以查找到内存的泄漏点。将脚本修改为：

BEGIN {
    printf("start probe\n");
}
 
uprobe:/lib/x86_64-linux-gnu/libc.so.6:malloc /comm == "mem_check"/{
    printf("malloc size: %d\n", arg0);
}
 
uretprobe:/lib/x86_64-linux-gnu/libc.so.6:malloc /comm == "mem_check"/{
  printf("addr = %p\n", retval);
}
 
uprobe:/lib/x86_64-linux-gnu/libc.so.6:free /comm == "mem_check"/{
  printf("free addr = %p\n", arg0);
}
 
END {
    printf("end probe\n");
}

运行结果：
在这里插入图片描述
探测内存泄漏：
上面我们已经到了mem_check的malloc和free情况。我们可以通过malloc和free的地址合差，就可以得到内存泄漏的地址位置。
bpftrace底层使用的是ebpf的map作为存储结构，步骤如下：
1、定义一个map变量@mem：保存malloc返回的内存地址。
2、当探测到free调用时，将@mem对应地址删除。
3、最后@mem剩下的就是内存泄漏的地址。
内存泄漏的脚本如下：

BEGIN {
    printf("start probe\n");
}
 
uprobe:/lib/x86_64-linux-gnu/libc.so.6:malloc /comm == "mem_check"/{
    printf("malloc size: %d\n", arg0);
    @size = arg0;
}
 
uretprobe:/lib/x86_64-linux-gnu/libc.so.6:malloc /comm == "mem_check"/{
    printf("addr = %p\n", retval);
    @mem[retval] = @size;
 
}
 
uprobe:/lib/x86_64-linux-gnu/libc.so.6:free /comm == "mem_check"/{
    printf("free addr = %p\n", arg0);
    delete(@mem[arg0]);
}
 
END {
    printf("end probe\n");
}

运行结果：
在这里插入图片描述
如上图，@mem后面跟着的就是内存当中，没有被释放的内存和内存大小。
总结：
通过编写一些简单的bpftrace脚本，我们就可以监视到应用程序的内存分配和释放事件，捕获内存泄漏的迹象。这种直接的实时监控方式，能使开发者在问题出现时即可获得反馈。

sf-linux kernel

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
内存分配算法分析与内存泄露实践

这片文章主要解决的是内存泄漏问题：“程序向系统申请内存，使用完不需要之后，不释放内存还给系统回收，造成申请的内存被浪费”怎么去寻找到这些内存地址空间呢？接下来进行排查思路是：1、监控系统中每个用户进程消耗的PSS（使用pmap工具（pmap pid））。PSS：按比例报告的物理内存，比如进程A占用20M物理内存，进程B和进程A共享5M物理内存，那么进程A的PSS就是（20-5）+5/2=17.5M。
复制链接

扫一扫