[内核源码] list 链表

2021-07-09

链表(双向链表/环形链表)是 Linux 内核的一个基础数据结构(内核源码目录 ./include/linux/list.h),看看它是如何实现和应用的。

走读的源码版本是 5.0.1。


1. 链表接口

链表节点关系,主要是当前节点,前一个节点 (prev),后一个节点(next),三个节点的关系。


1.1. 链表结构

1
2
3
4
/* ./include/linux/types.h */
struct list_head {
    struct list_head *next, *prev;
};

1.2. 初始化

链表初始化链表时,把节点的 prev, next 指针都指向自己。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
static inline void INIT_LIST_HEAD(struct list_head *list) {
    /* 为了避免编译器优化,从内存写数据。 */
    WRITE_ONCE(list->next, list);
    list->prev = list;
}

/* ./include/linux/compile.h */
#define WRITE_ONCE(x, val) \
({                            \
    union { typeof(x) __val; char __c[1]; } __u =    \
        { .__val = (__force typeof(x)) (val) }; \
    __write_once_size(&(x), __u.__c, sizeof(x));    \
    __u.__val;                    \
})

/* ./include/linux/compiler.h */
static __always_inline void __write_once_size(volatile void *p, void *res, int size) {
    switch (size) {
    case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
    case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
    case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
    case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
    default:
        barrier();
        __builtin_memcpy((void *)p, (const void *)res, size);
        barrier();
    }
}

1.3. 检查链表是否为空

当它的 next 指针还是指向自己就认为是空。

这个链表的设计,难道 head 节点是不参与真实数据处理的(O_O)?。

1
2
3
4
5
6
7
/* ./include/linux/list.h
 * list_empty - tests whether a list is empty
 * @head: the list to test.
 */
static inline int list_empty(const struct list_head *head) {
    return READ_ONCE(head->next) == head;
}

1.4. 添加

添加节点,函数设计三个参数的思路:新节点(new),往两个节点(prev, next)中间插入。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/* ./include/linux/list.h
 * Insert a new entry between two known consecutive entries.
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
static inline void __list_add(struct list_head *new,
                  struct list_head *prev,
                  struct list_head *next) {
    if (!__list_add_valid(new, prev, next))
        return;

    next->prev = new;
    new->next = next;
    new->prev = prev;
    WRITE_ONCE(prev->next, new);
}
  • list_add,将新节点添加到 head 节点后面。
1
2
3
4
5
6
7
8
9
10
11
/** ./include/linux/list.h
 * list_add - add a new entry
 * @new: new entry to be added
 * @head: list head to add it after
 *
 * Insert a new entry after the specified head.
 * This is good for implementing stacks.
 */
static inline void list_add(struct list_head *new, struct list_head *head) {
    __list_add(new, head, head->next);
}
  • list_add_tail,将新节点添加到 head 节点前面。(这是环形链表吧~)
1
2
3
4
5
6
7
8
9
10
11
/** ./include/linux/list.h
 * list_add_tail - add a new entry
 * @new: new entry to be added
 * @head: list head to add it before
 *
 * Insert a new entry before the specified head.
 * This is useful for implementing queues.
 */
static inline void list_add_tail(struct list_head *new, struct list_head *head) {
    __list_add(new, head->prev, head);
}

1.5. 删除

1
2
3
4
5
6
7
8
9
10
11
/* ./include/linux/list.h
 * Delete a list entry by making the prev/next entries
 * point to each other.
 *
 * This is only for internal list manipulation where we know
 * the prev/next entries already!
 */
static inline void __list_del(struct list_head * prev, struct list_head * next) {
    next->prev = prev;
    WRITE_ONCE(prev->next, next);
}

从链表中删除节点,被删除的节点 prev,next 指针指向自己。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/**
 * list_del_init - deletes entry from list and reinitialize it.
 * @entry: the element to delete from the list.
 */
static inline void list_del_init(struct list_head *entry) {
    __list_del_entry(entry);
    INIT_LIST_HEAD(entry);
}

/* ./include/linux/list.h
 * list_del - deletes entry from list.
 * @entry: the element to delete from the list.
 * Note: list_empty() on entry does not return true after this, the entry is
 * in an undefined state.
 */
static inline void __list_del_entry(struct list_head *entry) {
    if (!__list_del_entry_valid(entry))
        return;

    __list_del(entry->prev, entry->next);
}

1.6. 获取

list_head 数据结构,在应用过程中,作为某个数据结构的成员出现,例如下面的 rdllink 是一个 struct list_head 结构。

使用过程中,通过 rdllink 这个成员将 epitem 关联进链表,但是链表里保存的是 rdllink 指针,它不是 struct epitem 的第一个成员,那么如何从 rdllink 位置,找到 epitem 的指针呢?

我们可以知道 rdllink 内存指针,也可以知道 rdllink 在 epitem 的相对偏移位置。这样:

理解一下 struct 数据结构数据在内存的布局。

epitem 指针 = rdllink 指针 - rdllink 在 epitem 的相对偏移位置。

  • epoll 管理的 fd 节点 epitem。
1
2
3
4
5
6
7
8
9
10
11
12
/* ./fs/eventpoll.c */
struct epitem {
    union {
        /* RB tree node links this structure to the eventpoll RB tree */
        struct rb_node rbn;
        /* Used to free the struct epitem */
        struct rcu_head rcu;
    };
    ...
    struct list_head rdllink;
    ...
};
  • list 节点内容。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/* ./include/linux/list.h
 * list_entry - get the struct for this entry
 * @ptr:     the &struct list_head pointer.
 * @type:    the type of the struct this is embedded in.
 * @member:  the name of the list_head within the struct.
 */
#define list_entry(ptr, type, member) \
    container_of(ptr, type, member)

/* ./include/linux/list.h
 * Returns a pointer to the container of this list element.
 *
 * Example:
 * struct foo* f;
 * f = container_of(&foo->entry, struct foo, entry);
 * assert(f == foo);
 *
 * @param ptr Pointer to the struct list_head.
 * @param type Data type of the list element.
 * @param member Member name of the struct list_head field in the list element.
 * @return A pointer to the data struct containing the list head.
 */
/* 获得数据结构成员的相对位置,那么往前便宜相对位置,就是数据结构指针的首地址。 */
#ifndef container_of
#define container_of(ptr, type, member) \
    (type *)((char *)(ptr) - (char *) &((type *)0)->member)
#endif

1.7. 遍历

list_first_entry 从 head 节点的下一个节点开始…

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
/**
 * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
 * @pos:    the type * to use as a loop cursor.
 * @n:      another type * to use as temporary storage
 * @head:   the head for your list.
 * @member: the name of the list_head within the struct.
 */
#define list_for_each_entry_safe(pos, n, head, member)            \
    for (pos = list_first_entry(head, typeof(*pos), member),    \
        n = list_next_entry(pos, member);            \
         &pos->member != (head);                     \
         pos = n, n = list_next_entry(n, member))

/**
 * list_first_entry - get the first element from a list
 * @ptr:    the list head to take the element from.
 * @type:   the type of the struct this is embedded in.
 * @member: the name of the list_head within the struct.
 *
 * Note, that list is expected to be not empty.
 */
#define list_first_entry(ptr, type, member) \
    list_entry((ptr)->next, type, member)

2. epoll 应用

epoll 的就绪队列,结合上面的代码,理解一下这个队列的应用。


2.1. 初始化

epoll 数据结构对象 eventpoll 在创建时,会初始化就绪链表,就是 ep->rdllist (list_head) 指针 prev 和 next,指向自己。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/* ./fs/eventpoll.c */

struct eventpoll {
    ...
    /* List of ready file descriptors */
    struct list_head rdllist;
    ...
}

/* 初始化链表。 */
static int ep_alloc(struct eventpoll **pep) {
    ...
    struct eventpoll *ep;
    ep = kzalloc(sizeof(*ep), GFP_KERNEL);
    INIT_LIST_HEAD(&ep->rdllist);
    ...
}

2.2. 添加就绪事件

当用户关注的就绪事件发生时,软中断,将 fd 节点(epi),添加进就绪队列。

1
2
3
4
5
6
7
8
9
10
11
12
13
/* ./fs/eventpoll.c */
static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) {
    ...
    struct epitem *epi = ep_item_from_wait(wait);
    struct eventpoll *ep = epi->ep;
    ...
    /* If this file is already in the ready list we exit soon */
    if (!ep_is_linked(epi)) {
        list_add_tail(&epi->rdllink, &ep->rdllist);
        ep_pm_stay_awake_rcu(epi);
    }
    ...
}

2.3. 删除就绪事件

内核遍历就绪链表,向用户态拷贝数据,先将就绪事件删除(list_del_init),如果是 LT 模式再就 epi 添加回就绪链表,等待下次处理。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
/* ./fs/eventpoll.c */
static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
                   void *priv) {
    ...
    /* 感觉 list_for_each_entry_safe 这个遍历有点奇葩,取的是 head 节点的下一个,
     * 内核的封装会把人绕晕。 */
    list_for_each_entry_safe(epi, tmp, head, rdllink) {
        ...
        list_del_init(&epi->rdllink);
        ...
        revents = ep_item_poll(epi, &pt, 1);
        if (!revents)
            continue;
        ...
        if (__put_user(revents, &uevent->events) ||
            __put_user(epi->event.data, &uevent->data)) {
            /* 拷贝数据失败,将 epi 节点,添加到 head 节点后面,退出遍历。 */
            list_add(&epi->rdllink, head);
            ep_pm_stay_awake(epi);
            if (!esed->res)
                esed->res = -EFAULT;
            return 0;
        }
        ...
        else if (!(epi->event.events & EPOLLET)) {
            ...
            /* LT 模式,将 epi 节点,重新添加回就绪队列。*/
            list_add_tail(&epi->rdllink, &ep->rdllist);
            ...
        }
    }

    return 0;
}

3. 参考