常见内核函数

我要暴富

已于 2023-08-17 17:07:48 修改

阅读量2.8k

点赞数 2

分类专栏： Linux内核文章标签： linux

于 2022-01-20 12:02:20 首次发布

本文链接：https://blog.csdn.net/weixin_48006170/article/details/122594700

版权

Linux内核专栏收录该内容

2 篇文章 0 订阅

订阅专栏

fls 函数

fls 函数可以查找到一个数中最低的1位，具体参考注释中的note说明。

/**
 * fls - find last (most-significant) bit set
 * @x: the word to search
 *
 * This is defined the same way as ffs.
 * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
 */

static __always_inline int fls(unsigned int x)

local_irq_disable 和local_irq_enable

local_irq_disable() 禁用当前cpu 中断
local_irq_enable() 使能当前cpu 中断

preempt_disable 和preempt_enable

void preempt_disable(void) 禁用内核抢占（禁用抢占后cpu 将不会调度到其他线程）
void preempt_enable(void) 使能内核抢占

try_module_get和module_put

bool try_module_get(struct module *module) 增加模块的引用计数（module->refcnt++），返回1 表示成功。
void module_put(struct module *module) 减少模块引用计数（module->refcnt–）

kthread_run

内核线程创建宏，该宏调用kthread_create 实现线程创建。

/**
 * kthread_run - create and wake a thread.
 * @threadfn: the function to run until signal_pending(current).
 * @data: data ptr for @threadfn.
 * @namefmt: printf-style name for the thread.
 *
 * Description: Convenient wrapper for kthread_create() followed by
 * wake_up_process().  Returns the kthread or ERR_PTR(-ENOMEM).
 */
#define kthread_run(threadfn, data, namefmt, ...)                          \
({                                                                         \
        struct task_struct *__k                                            \
                = kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \
        if (!IS_ERR(__k))                                                  \
                wake_up_process(__k);                                      \
        __k;                                                               \
})

threadfn： 创建线程后需要执行的函数。
data： 传给threadfn 的参数，可以为NULL。
namefmt： 线程名字。
返回： struct task_struct 用来描述一个线程。
使用例子：pThread = kthread_run(LED_Thread, NULL, “ledThread”);
创建完成后ps 命令可以查看到该线程。
在这里插入图片描述

proc_create

很多时候我们需要查看内核中的一些属性/数据，就可以在 “/proc” 创建虚拟文件，例如cmdline。
在这里插入图片描述
proc_create 函数可以在"/proc" 下创建一个虚拟文件。

struct proc_dir_entry *proc_create(
	const char *name, umode_t mode, struct proc_dir_entry *parent,
	const struct file_operations *proc_fops);
//proc_dir_entry 可以描述一个文件条目
//当返回值为 NULL 时创建失败

//删除一个/proc 虚拟文件，proc_dir_entry 创建时返回的指针
void proc_remove(struct proc_dir_entry *);

cmdline 参考代码（fs/proc/cmdline.c）
最重要的参数就是proc_fops （struct file_operations），open、read、write 等等函数可以根据需要自由发挥（甚至可以不写open）
在这里插入图片描述

原子位操作

原子操作中的位操作部分函数如下：
void set_bit(int nr, void *addr) 原子设置addr所指的第nr位
void clear_bit(int nr, void *addr) 原子的清空所指对象的第nr位
void change_bit(nr, void *addr) 原子的翻转addr所指的第nr位
int test_bit(nr, void *addr) 原子的返回addr位所指对象nr位
int test_and_set_bit(nr, void *addr) 原子设置addr所指对象的第nr位，并返回原先的值
int test_and_clear_bit(nr, void *addr) 原子清空addr所指对象的第nr位，并返回原先的值
int test_and_change_bit(nr, void *addr) 原子翻转addr所指对象的第nr位，并返回原先的值

unsigned long word = 0;

set_bit(0, &word); /*第0位被设置*/

set_bit(1, &word); /*第1位被设置*/

clear_bit(1, &word); /*第1位被清空*/

change_bit(0, &word); /*翻转第0位*/

Linux内核内存申请的三种方式(kmalloc，kzalloc，vmalloc)

Linux 大小端读写函数

内核 ARM64 大小端读写函数定义在：linux-5.4.47\linux-5.4.47\arch\arm64\include\asm\io.h
小端读写：

#define readb_relaxed(c)	({ u8  __r = __raw_readb(c); __r; })
#define readw_relaxed(c)	({ u16 __r = le16_to_cpu((__force __le16)__raw_readw(c)); __r; })
#define readl_relaxed(c)	({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
#define readq_relaxed(c)	({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })

#define writeb_relaxed(v,c)	((void)__raw_writeb((v),(c)))
#define writew_relaxed(v,c)	((void)__raw_writew((__force u16)cpu_to_le16(v),(c)))
#define writel_relaxed(v,c)	((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
#define writeq_relaxed(v,c)	((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))

#define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(__v); __v; })
#define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(__v); __v; })
#define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
#define readq(c)		({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })

#define writeb(v,c)		({ __iowmb(); writeb_relaxed((v),(c)); })
#define writew(v,c)		({ __iowmb(); writew_relaxed((v),(c)); })
#define writel(v,c)		({ __iowmb(); writel_relaxed((v),(c)); })
#define writeq(v,c)		({ __iowmb(); writeq_relaxed((v),(c)); })

大端读写：

#define ioread16be(p)		({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(__v); __v; })
#define ioread32be(p)		({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(__v); __v; })
#define ioread64be(p)		({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(__v); __v; })

#define iowrite16be(v,p)	({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); })
#define iowrite32be(v,p)	({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); })
#define iowrite64be(v,p)	({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); })

__read_mostly

kmemdup

list_splice_init

将list 链表插入head 链表中

static inline void list_splice_init(struct list_head *list,
				    struct list_head *head)
{
	if (!list_empty(list)) {
		__list_splice(list, head, head->next);
		INIT_LIST_HEAD(list);
	}
}

of_count_phandle_with_args

获取handle 中元素的个数。

init_completion

初始化完成量 completion，用于一个执行单元等待另一个执行单元执行完某事。例如spi_sync 同步传输函数中，需要等待spi 传输完成才会返回。（completion 的实现是依赖休眠、唤醒机制）

spin_lock_irqsave

中断中使用自旋锁。

PTR_TO_UINT

PTR_TO_UINT：指针类型转化为 uint类型

for_each_child_of_node

遍历设备树节点parent 下所有子节点。

#define for_each_child_of_node(parent, child) \
	for (child = of_get_next_child(parent, NULL); child != NULL; \
	     child = of_get_next_child(parent, child))

for_each_child_of_node(fm_node, dev_node) {		//遍历fman 下所有子节点
	......
}

of_clk_get

获取时钟。

clocks = <&clockgen 3 0>;

struct clk *of_clk_get(struct device_node *np, int index)
unsigned long clk_get_rate(struct clk *clk)	//获取时钟速率

of_irq_to_resource

of_irq_to_resource 从设备树获取irq。
dev：设备树节点。
index：下标，指定要获取第几个irq。
resource ：指向存放获取到资源地址的指针，此处为NULL。
返回值：失败返回0，否则返回irq。

//设备树
interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>,
             <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
//原型
int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
//实例
err_irq = of_irq_to_resource(fm_node, 1, NULL);

of_address_to_resource

of_address_to_resource 获取外设寄存器的物理地址。
dev：设备树节点。
index：下标，指定要获取第几个irq。
resource ：指向存放获取到资源地址的指针，这里是reg的物理地址范围。
返回值：返回<0 时为失败。

//设备树
reg = <0x0 0x1a00000 0x0 0xfe000>;
//原型
int of_address_to_resource(struct device_node *dev, int index,
			   struct resource *r)
//实例
_errno = of_address_to_resource(fm_node, 0, &res);	//获取fman 物理地址
if (unlikely(_errno < 0)) {
    ......
}

大小端转换函数 be32_to_cpu

在kernel里面经常能看见把十六进制数用下面几个函数转换一下,比如be32_to_cpu, cpu_to_be32, cpu_to_le16,cpu_to_le32等.其实很简单.

名词解释
le叫做little endian, be叫做big endian,这是两种字节序,分别称为小段和大端.
le表示地址低为存储值的低位,地址高位存储值的高位.
be表示地址低位存储值的高位,地址高位存储值的低位.

不用cpu使用了不同的字节序,比如PowerPC系列cpu就用了大端模式,而ARM和x86就用的小端模式.
因此,对于不用的cpu,上面几个函数的执行结果也是不一样的.
但是,凡是xx_to_cpu就说明结果是给cpu使用的.反之,cpu_to_xx就说明从cpu的字节序转换成目标字节序.
如果cpu本身就是小端模式,那么cput_to_le32这类函数就会do nothing.

per_cpu

在Linux操作系统中，特别是针对SMP或者NUMA架构的多CPU系统的时候，描述每个CPU的私有数据的时候，Linux操作系统提供了per_cpu机制。per_cpu机制就是让每个CPU都有自己的私有数据段，便于保护与访问。

per_cpu：获取指定cpu私有数据变量。通过per_cpu(var, cpu)宏来获取，cpu是int型，代表CPU index；var代表将要访问的CPU的那一份数据（变量类型）。

struct softnet_data *sd = &per_cpu(softnet_data, i);

例如上面代码中使用per_cpu 获取指定cpu 的网络软中断数据softnet_data。

INIT_LIST_HEAD

offsetof

offsetof 是一个宏，它可以计算出member 成员在type 结构体类型的偏移地址。

#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)

(TYPE *)0 把0强转成指针，它的值是0x0, 0x0->MEMBER 就是 0x0 + member 偏移地址；这点在汇编中也是这么计算的可以使用gdb来查看内核的反汇编代码和结构体的偏移。
例如 offsetof(typeof(netdev) ,irq) 就可以得到irq 的偏移地址64。

(gdb) ptype /o struct net_device
/* offset | size / type = struct net_device {
/ 0 | 16 / char name[16];
/ 16 | 16 / struct hlist_node {
/ 16 | 8 */ struct hlist_node next;
/ 24 | 8 */ struct hlist_node **pprev;

                           /* total size (bytes):   16 */
                       } name_hlist;

/* 32 | 8 */ struct dev_ifalias ifalias;
/ 40 | 8 / unsigned long mem_end;
/ 48 | 8 / unsigned long mem_start;
/ 56 | 8 / unsigned long base_addr;
/ 64 | 4 */ int irq;

**typeof : ** 使用typeof() 可以返回该结构体的结构体类型，传入的参数是结构体，而不是结构体指针。

container_of简介

#define container_of(ptr, type, member) \
    (type *)((char *)(ptr) - (char *) &((type *)0)->member)

container_of(to_mdio_device(d), struct phy_device, mdio) 可以得到phy_device 结构体的地址。
该函数的意义是得到type 类型的结构体首地址（即得到该结构体）。

原理很简单：已知结构体type的成员member的地址ptr，求解结构体type的起始地址。
type的起始地址 = ptr(member地址) - offset (member偏移量)。 size 不用计较它如何得知，我们只要知道该函数会返回member成员所在结构体的首地址就好了。

linux内核中likely与unlikely

likely和unlikely
参考/include/linux/compiler.h */

# define likely(x)  __builtin_expect(!!(x), 1)
# define unlikely(x)    __builtin_expect(!!(x), 0)

上述源码中采用了内建函数__builtin_expect来进行定义，即 built in function。
　　__builtin_expect的函数原型为long __builtin_expect (long exp, long c)，返回值为完整表达式exp的值，它的作用是期望表达式exp的值等于c（如果exp == c条件成立的机会占绝大多数，那么性能将会得到提升，否则性能反而会下降）。注意， __builtin_expect (lexp, c)的返回值仍是exp值本身，并不会改变exp的值。
　　__builtin_expect函数用来引导gcc进行条件分支预测。在一条指令执行时，由于流水线的作用，CPU可以同时完成下一条指令的取指，这样可以提高CPU的利用率。在执行条件分支指令时，CPU也会预取下一条执行，但是如果条件分支的结果为跳转到了其他指令，那CPU预取的下一条指令就没用了，这样就降低了流水线的效率。
　　另外，跳转指令相对于顺序执行的指令会多消耗CPU时间，如果可以尽可能不执行跳转，也可以提高CPU性能。
　　简单从表面上看if(likely(value)) == if(value)，if(unlikely(value)) == if(value)。
也就是likely和unlikely是一样的，但是实际上执行是不同的，加likely的意思是value的值为真的可能性更大一些，那么执行if的机会大，而unlikely表示value的值为假的可能性大一些，执行else机会大一些。
　　加上这种修饰，编译成二进制代码时likely使得if后面的执行语句紧跟着前面的程序，unlikely使得else后面的语句紧跟着前面的程序，这样就会被cache预读取，增加程序的执行速度。
　　那么上述定义中为什么要使用！！符号呢？
　　两次 !! 翻转非逻辑值（比如1、2、3等数值，或某个指针等等）转化位逻辑值0或1，方便比较。
　　计算机中bool逻辑只有0和1，非0即是1，当likely(x)中参数不是逻辑值时，就可以使用！！符号转化为逻辑值1或0 。比如：！！（3）=！（！（3））=！0=1，这样就把参数3转化为逻辑1了。
　　那么简单理解就是：
　　likely(x)代表x是逻辑真（1）的可能性比较大；
　　unlikely(x)代表x是逻辑假（0）的可能性比较大。

preempt_disable 和preempt_enable

preempt_disable 在内核进程上锁前，为了防止抢占发生死锁，需要关闭内核抢占。
preempt_enable 解锁后使能抢占
上锁前需要关闭内核抢占的原因

ARRAY_SIZE

ARRAY_SIZE ：求一个数组中元素的个数。sizeof(arr)为整个数组的大小，sizeof((arr)[0])) 为其中一个元素的大小，相除及得到数组元素的个数。

#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))

ALIGN

先说ALIGN的用法，ALIGN(x,a) 是为了使x以a为边界对齐，实现原理是给x加上一个最小的数，使x以a为边界对齐。举个例子，a = 8, x=0, ALIGN(x,a) 运算结果为0； a = 8, x = 3，运算结果为8； a = 8, x = 11, 运算结果为16。

#define ALIGN(x, a)	(((x) + (a) - 1) & ~((a) - 1))

PTR_ALIGN

地址p 以a 为字节对齐，并返回p 类型的地址。
typeof§ 获取p的数据类型。

#define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))

__setup

uboot bootargs 向内核传递参数，内核通过__setup 设置。

#define __setup_param(str, unique_id, fn, early)			\
	static const char __setup_str_##unique_id[] __initconst		\
		__aligned(1) = str; 					\
	static struct obs_kernel_param __setup_##unique_id		\
		__used __section(.init.setup)				\
		__attribute__((aligned((sizeof(long)))))		\
		= { __setup_str_##unique_id, fn, early }

#define __setup(str, fn)						\
	__setup_param(str, fn, fn, 0)

下面是再bootargs 中关闭pcie 电源管理
bootargs=console=ttyS0,115200 earlycon=uart8250,mmio,0x21c0500 root=/dev/mmcblk0p2 cma=64M rw rootwait pcie_port_pm=off

static int __init pcie_port_pm_setup(char *str)
{
	if (!strcmp(str, "off"))
		pci_bridge_d3_disable = true;
	else if (!strcmp(str, "force"))
		pci_bridge_d3_force = true;
	return 1;
}
__setup("pcie_port_pm=", pcie_port_pm_setup);

net_ratelimit

net_ratelimit 限制内核打印时间，当返回ture时表示可以打印。
net_ratelimit()用于保护内核网络调试信息的打印, 当它返回(TRUE)时则可以打印调试信息,返回零则禁止信息打印. 它的特性为当"极快地"调用net_ratelimit()时,它最多只允许连续打印前10条信息, 后继信息每隔5秒允许打印一次.
net/core/utils.c:

int net_msg_cost = 5HZ; /在拥塞时, 每条网络消息记录所间隔的时间/
　　int net_msg_burst = 105*HZ; /连续记录网络突发消息的间隔(最多连续记录10条消息)/

of_phy_find_device

内核的rcu 机制

RCU锁机制原理解析
 深入理解 Linux 的 RCU 机制

我要暴富

关注

2
点赞
踩
12

收藏

觉得还不错? 一键收藏
2
评论
常见内核函数

container of 函数简介linux内核中likely与unlikelylikely和unlikely参考/include/linux/compiler.h */# define likely(x) __builtin_expect(!!(x), 1)# define unlikely(x) __builtin_expect(!!(x), 0)两次 !! 翻转非逻辑值（比如1、2、3等数值，或某个指针等等）转化位逻辑值0或1，方便比较。if分支跳转语句会降低cpu效率，lik
复制链接

扫一扫