iOS底层原理类探索之cache分析

最新推荐文章于 2023-12-10 17:19:39 发布

WeaterMr

最新推荐文章于 2023-12-10 17:19:39 发布

阅读量144

点赞数

分类专栏： iOS底层探究文章标签：类

本文链接：https://blog.csdn.net/weater1/article/details/118214041

版权

iOS底层探究专栏收录该内容

17 篇文章 0 订阅

订阅专栏

iOS底层原理类探索之cache分析

一，cache的数据结构

struct objc_class : objc_object {
    // Class ISA;
    Class superclass;
    cache_t cache;             // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags

上一章我们探索了关于bits中的存储信息，这一章我们来探索cache中的秘密。

struct cache_t {
private:
    explicit_atomic<uintptr_t> _bucketsAndMaybeMask; // 8
    union {
        struct {
            explicit_atomic<mask_t>    _maybeMask; // 4
#if __LP64__
            uint16_t                   _flags;  // 2
#endif
            uint16_t                   _occupied; // 2
        };
        explicit_atomic<preopt_cache_t *> _originalPreoptCache; // 8
    };

顾名思义 cache 及方法缓存，我们先来看看cache_t的数据结构
_bucketsAndMaybeMask：指针类型，存放buckets的首地址
_maybeMask：当前的缓存区count
_flags：同_occupied
_occupied：当前cache的可存储的buckets数量，默认是0
_originalPreoptCache：初始时候的缓存（注意联合体互斥）
__LP64__指的是Unix和Unix类系统（Linx和macOS）

二，LLDB调试验证

(lldb) p/x (cache_t*)0x0000000100004510
(cache_t *) $0 = 0x0000000100004510
(lldb) p *$0
(cache_t) $1 = {
  _bucketsAndMaybeMask = {
    std::__1::atomic<unsigned long> = {
      Value = 4302349488
    }
  }
   = {
     = {
      _maybeMask = {
        std::__1::atomic<unsigned int> = {
          Value = 15
        }
      }
      _flags = 32808
      _occupied = 5
    }
    _originalPreoptCache = {
      std::__1::atomic<preopt_cache_t *> = {
        Value = 0x000580280000000f
      }
    }
  }
}
(lldb) p $1.buckets()
(bucket_t *) $2 = 0x000000010070a4b0
(lldb) p *$2
(bucket_t) $3 = {
  _sel = {
    std::__1::atomic<objc_selector *> = (null) {
      Value = (null)
    }
  }
  _imp = {
    std::__1::atomic<unsigned long> = {
      Value = 0
    }
  }
}
(lldb) p $1.buckets()[1]
(bucket_t) $4 = {
  _sel = {
    std::__1::atomic<objc_selector *> = (null) {
      Value = (null)
    }
  }
  _imp = {
    std::__1::atomic<unsigned long> = {
      Value = 0
    }
  }
}

(
(lldb) p $1.buckets()[17]
(bucket_t) $19 = {
  _sel = {
    std::__1::atomic<objc_selector *> = (null) {
      Value = (null)
    }
  }
  _imp = {
    std::__1::atomic<unsigned long> = {
      Value = 0
    }
  }
}
(lldb) p $7.sel()
(SEL) $20 = "isKindOfClass:"
(lldb) p $10.sel()
(SEL) $21 = "isNSObject__"
(lldb) p $11.sel()
(SEL) $22 = "description"
(lldb) p $14.sel()
(SEL) $23 = "isProxy"
(lldb) p $15.sel()
(SEL) $24 = "class"

通过上面打印测试发现一个问题：当我们的方法中有NSlog 方法，如果里面包含对应的对象逻辑方法将会影响 _maybeMask 的值。如：打印当前的对象，将会调用对应的description方法和一系列的关联方法。
上面的方式是我们通过lldb调试进行探索。

三，通过模拟代码方式探索(非源码环境)

#import <objc/runtime.h>
typedef uint32_t mask_t;

struct gd_bucket_t {
    SEL _sel;
    IMP _imp;
};
struct gd_class_data_bits_t {
    // Values are the FAST_ flags above.
    uintptr_t bits;
};

struct gd_cache_t {
    struct gd_bucket_t *_bukets; // 8
    mask_t    _maybeMask; // 4
    uint16_t                   _flags;  // 2
    uint16_t                   _occupied; // 2
    };
struct gd_objc_class {

    Class ISA;
    Class superclass;
    struct gd_cache_t cache;             // formerly cache pointer and vtable
    struct  gd_class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
};
int main(int argc, const char * argv[]) {
    @autoreleasepool {
        // class_data_bits_t  cache_t buckets  p *$2.buckets()
       // p/x (cache_t*)0x0000000100004510  $7 $10  $11  $14 $15 $17
        GoodOne *p = [GoodOne alloc];
        Class gdClass= p.class;
        
        [p goodOne];
        [p goodOne];
        [p good2];
        [p good3];
        [p good4];

        struct gd_objc_class *gd_class = (__bridge struct gd_objc_class *)(gdClass);
        NSLog(@"%hu - %u",gd_class->cache._occupied,gd_class->cache._maybeMask);
        
        // 0 - 8136976 count
        for (mask_t i = 0; i<gd_class->cache._maybeMask; i++) {
            struct gd_bucket_t bucket = gd_class->cache._bukets[i];
            NSLog(@"%@ - %pf",NSStringFromSelector(bucket._sel),bucket._imp);
        }
        NSLog(@"Hello, World!");
       
    }
    
    return 0;
}

在通过模拟代码过程中注意引用同文件#import <objc/runtime.h>

[GoodOne goodOne]
[GoodOne goodOne]
 2 - 3
 (null) - 0x0f
 class - 0x33d698f
 goodOne - 0x7e88f
 Hello, World!

当同一个方法多次执行时只会缓存一次

[GoodOne goodOne]
[GoodOne goodOne]
[GoodOne good2]
[GoodOne good3]
[GoodOne good4]
 3 - 7
 good3 - 0x79c0f
 (null) - 0x0f
 good2 - 0x7990f
 (null) - 0x0f
 (null) - 0x0f
 (null) - 0x0f
 good4 - 0x79f0f

1.当occupied的数量大于或等于maybeMask 时，即不满就会自动扩容。
2.在扩容后将清空扩容前的所用内容。
3.方法的调用是无顺序。
上面只是现象，下面我们将会探索产生这些现象的原因。
缓存肯定牵扯到数据的读写下面我们来看看对应的数据插入。

四，源码流程分析

void cache_t::insert(SEL sel, IMP imp, id receiver)
{
    // Use the cache as-is if until we exceed our expected fill ratio.
    mask_t newOccupied = occupied() + 1; // 1+1
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) {
        // Cache is read-only. Replace it.
        //当为空的时候将会走这个判断逻辑。
        if (!capacity) capacity = INIT_CACHE_SIZE;//初始化缓存大小 4
        reallocate(oldCapacity, capacity, /* freeOld */false);
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
        // Cache is less than 3/4 or 7/8 full. Use it as-is.
    }
#if CACHE_ALLOW_FULL_UTILIZATION
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
    }
#endif
    else {// 4*2 = 8
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true);
    }

    bucket_t *b = buckets();
    mask_t m = capacity - 1; // 16-1=15 _maybeMask = 这里的mask_t m  
    mask_t begin = cache_hash(sel, m);
    mask_t i = begin;

    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot.
    do {
        if (fastpath(b[i].sel() == 0)) {
            incrementOccupied();
            b[i].set<Atomic, Encoded>(b, sel, imp, cls());
            return;
        }
        if (b[i].sel() == sel) {
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));

    bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}

if (!capacity) capacity = INIT_CACHE_SIZE; 初始化缓存大小 4 。
下面我们将看看void cache_t::reallocate方法。

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity);

    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1);
    
    if (freeOld) {
        collect_free(oldBuckets, oldCapacity);
    }
}

创建桶子

void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
    // objc_msgSend uses mask and buckets with no locks.
    // It is safe for objc_msgSend to see new buckets but old mask.
    // (It will get a cache miss but not overrun the buckets' bounds).
    // It is unsafe for objc_msgSend to see old buckets and new mask.
    // Therefore we write new buckets, wait a lot, then write new mask.
    // objc_msgSend reads mask first, then buckets.

#ifdef __arm__
    // ensure other threads see buckets contents before buckets pointer
    mega_barrier();

    _bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_relaxed);

    // ensure other threads see new buckets before new mask
    mega_barrier();

    _maybeMask.store(newMask, memory_order_relaxed);
    _occupied = 0;
#elif __x86_64__ || i386
    // ensure other threads see buckets contents before buckets pointer
    _bucketsAndMaybeMask.store((uintptr_t)newBuckets, memory_order_release);

    // ensure other threads see new buckets before new mask
    _maybeMask.store(newMask, memory_order_release);
    _occupied = 0;
#else
#error Don't know how to do setBucketsAndMask on this architecture.
#endif
}

_bucketsAndMaybeMask.store 这里将对我们的cache_t 第一个成员变量进行存值，这里存储的是对应的newBuckets 的首地址，不可能把所有的数据存储到这里。
do {}while (fastpath((i = cache_next(i, m)) != begin)); 这里我们将看到一个dowhile循化，用来插入bucket，

#if CACHE_END_MARKER
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return (i+1) & mask;
}
#elif __arm64__
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return i ? i-1 : mask;
}

这里我们可以看到，如果是真机是向前插入，好比，电影院有10个位置，但是你并不知道那个位置上有人，所以，会随机取一个位置好比3号位，向前插入顾名思义，向2号位置插入，如果有人，在向一号位置，如果还有人，那么将会取m（就是capacity - 1 即 _maybeMask），那么我是不是就冲最后减一位开始继续往下找，这样可以保证所有位置都能遍历到。那为什么真机要往前插入，这可能和手机的运行机制有关，往前应该更能提高碰撞概率。
这里面还有一个机制即满足3/4 内存，就会制动扩容。capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;

哈希函数补充补充

什么是 Hash

Hash（哈希），又称“散列”。
在某种程度上，散列是与排序相反的一种操作，排序是将集合中的元素按照某种方式比如字典顺序排列在一起，而散列通过计算哈希值，打破元素之间原有的关系，使集合中的元素按照散列函数的分类进行排列。

在介绍一些集合时，我们总强调需要重写某个类的 equlas() 方法和 hashCode() 方法，确保唯一性。这里的mask_t begin = cache_hash(sel, m); 表示的是对当前begin 的唯一标示。计算 cache_hash 的过程就称作哈希。

为什么要有 Hash

我们通常使用数组或者链表来存储元素，一旦存储的内容数量特别多，需要占用很大的空间，而且在查找某个元素是否存在的过程中，数组和链表都需要挨个循环比较，而通过哈希计算，可以大大减少比较次数。

哈希的应用

哈希表（hash table）是哈希函数最主要的应用。

哈希表是实现关联数组（associative array）的一种数据结构，广泛应用于实现数据的快速查找。
用哈希函数计算关键字的哈希值（hash value）,通过哈希值这个索引就可以找到关键字的存储位置，即桶（bucket）。哈希表不同于二叉树、栈、序列的数据结构一般情况下，在哈希表上的插入、查找、删除等操作的时间复杂度是 O(1)。

查找过程中，关键字的比较次数，取决于产生冲突的多少，产生的冲突少，查找效率就高，产生的冲突多，查找效率就低。因此，影响产生冲突多少的因素，也就是影响查找效率的因素。
影响产生冲突多少有以下三个因素：

哈希函数是否均匀；
处理冲突的方法；
哈希表的加载因子。

哈希表的加载因子和容量决定了在什么时候桶数（存储位置）不够，需要重新哈希。

加载因子太大的话桶太多，遍历时效率变低；太大的话频繁 rehash，导致性能降低。所以加载因子的大小需要结合时间和空间效率考虑。

在 HashMap 中的加载因子为 0.75，即四分之三。
未完待续。。。

WeaterMr

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
iOS底层原理类探索之cache分析

iOS底层原理类探索之cache分析struct objc_class : objc_object { // Class ISA; Class superclass; cache_t cache; // formerly cache pointer and vtable class_data_bits_t bits; // class_rw_t * plus custom rr/alloc flags上一章我们探索了关于bits中的存储信息
复制链接

扫一扫

专栏目录