nginx 地址对齐(ngx_align_ptr)

2020-02-08

内存池,要在大块连续内存上,分配小块内存,指向小内存块的地址是否对齐,对系统性能有一定影响:因为 cpu 从主存上读取数据很慢的,合理的地址对齐可以减少访问次数,提高访问效率。

1. 对齐操作

看看 nginx 的内存池地址对齐操作:

1
2
3
// p 是内存指针,a 是对齐字节数
#define ngx_align_ptr(p, a)                                                   \
    (u_char *) (((uintptr_t) (p) + ((uintptr_t) a - 1)) & ~((uintptr_t) a - 1))

该宏的原理详细证明,请参考 《高效算法的奥秘》(第二版)第三章 2 的幂边界


  • 当 $ a = 2^n $ 时,~((uintptr_t) a - 1)) 的 64 位二进制数,最右边 $n$ 位数是 0。所以 x & ~((uintptr_t) a - 1)) 能被 $2^n$ 整除。
a 对齐字节数 2 的幂 64位二进制
1 $2^0$ 1111111111111111111111111111111111111111111111111111111111111111
2 $2^1$ 1111111111111111111111111111111111111111111111111111111111111110
4 $2^2$ 1111111111111111111111111111111111111111111111111111111111111100
8 $2^3$ 1111111111111111111111111111111111111111111111111111111111111000
16 $2^4$ 1111111111111111111111111111111111111111111111111111111111110000
32 $2^5$ 1111111111111111111111111111111111111111111111111111111111100000
64 $2^6$ 1111111111111111111111111111111111111111111111111111111111000000

2. 测试

测试源码


2.1. 测试 ~((uintptr_t)a - 1))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// 测试 ~((uintptr_t)a - 1))
void test_a() {
    int i, len;
    uintptr_t l;
    char* p;
    char test[128];

    int aligns[] = {1, 2, 4, 8, 16, 32, 64};
    len = sizeof(aligns) / sizeof(int);

    for (i = 0; i < len; i++) {
        l = ~((uintptr_t)aligns[i] - 1);
        p = i2bin(l, test, 128);
        printf("a: %2d,  d: %s\n", aligns[i], p);
    }
}

结果:

1
2
3
4
5
6
7
a:  1,  d: 1111111111111111111111111111111111111111111111111111111111111111
a:  2,  d: 1111111111111111111111111111111111111111111111111111111111111110
a:  4,  d: 1111111111111111111111111111111111111111111111111111111111111100
a:  8,  d: 1111111111111111111111111111111111111111111111111111111111111000
a: 16,  d: 1111111111111111111111111111111111111111111111111111111111110000
a: 32,  d: 1111111111111111111111111111111111111111111111111111111111100000
a: 64,  d: 1111111111111111111111111111111111111111111111111111111111000000

2.2. 地址添加随机数,测试不同的对齐方式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
// 测试数值是否对齐
void test_align_mod() {
    char bin[128];
    u_char *p, *a, *r;
    int i, len, alignment;
    int aligns[] = {1, 2, 4, 8, 16, 32, 64};

    len = sizeof(aligns) / sizeof(int);
    srand(time(NULL));

    p = (u_char*)malloc(1024 * sizeof(u_char));
    printf("p: %p\n", p);

    r = p;

    for (i = 0; i < len; i++) {
        alignment = aligns[i];
        r = p + rand() % 64;
        a = ngx_align_ptr(r, alignment);
        printf("a: %2d, r: %p, align: %p, abin: %s, mod: %lu\n", alignment, r,
               a, i2bin((unsigned long long)a, bin, 128),
               (uintptr_t)a % alignment);
    }
    free(p);
}

结果:

1
2
3
4
5
6
7
8
p: 0x7fd035800600
a:  1, r: 0x7fd03580062f, align: 0x7fd03580062f, abin: 11111111101000000110101100000000000011000101111, mod: 0
a:  2, r: 0x7fd03580061a, align: 0x7fd03580061a, abin: 11111111101000000110101100000000000011000011010, mod: 0
a:  4, r: 0x7fd035800635, align: 0x7fd035800638, abin: 11111111101000000110101100000000000011000111000, mod: 0
a:  8, r: 0x7fd035800613, align: 0x7fd035800618, abin: 11111111101000000110101100000000000011000011000, mod: 0
a: 16, r: 0x7fd035800633, align: 0x7fd035800640, abin: 11111111101000000110101100000000000011001000000, mod: 0
a: 32, r: 0x7fd035800602, align: 0x7fd035800620, abin: 11111111101000000110101100000000000011000100000, mod: 0
a: 64, r: 0x7fd03580061b, align: 0x7fd035800640, abin: 11111111101000000110101100000000000011001000000, mod: 0

2.3. 测试对齐效率

申请两块内存,一块内存是对齐处理,另外一块不对齐查看效率(测试代码)。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#define ALIGN 1
#define UN_ALIGN 0
#define READ 0
#define WRITE 1
#define ALIGN_COUNT (1024 * 1024 * 64)
#define UN_ALIGN_COUNT ALIGN_COUNT

typedef int type_t;
#define ngx_align_ptr(p, a) \
    (u_char*)(((uintptr_t)(p) + ((uintptr_t)a - 1)) & ~((uintptr_t)a - 1))

// 申请两块内存,一块内存是对齐处理,另外一块不对齐。
void test_align(u_char* p, int size, int alignment, int is_align,
                int is_write) {
    u_char* end;
    long long start, stop;
    type_t *wirte, read;
    int count;

    count = 0;
    srand(time(NULL));

    end = p + size;
    p = (u_char*)ngx_align_ptr(p, alignment);
    p += is_align ? 0 : 1;  //制造不对齐地址

    start = mstime();
    while (p + sizeof(type_t) < end) {
        if (is_write) {
            wirte = (type_t*)p;
            *wirte = (type_t)rand();
        } else {
            read = (type_t)rand();
        }
        p += sizeof(type_t);

        count++;
    }
    stop = mstime();

    printf(
        "is_align: %d, is_write: %d, alignment: %d, count: %d, cost: %lld ms,"
        " avg: %lf ms\n",
        is_align, is_write, alignment, count, stop - start,
        (float)(stop - start) / count);
}

void test_alloc_mem(int argc, char** argv, int alignment, int is_align) {
    u_char *aligns, *ualigns;
    int alen, ualen;

    alen = ALIGN_COUNT * sizeof(type_t);
    aligns = (u_char*)malloc(alen);
    ualen = UN_ALIGN_COUNT * sizeof(type_t);
    ualigns = (u_char*)malloc(ualen);

    if (is_align) {
        test_align(aligns, alen, alignment, ALIGN, WRITE);
        test_align(aligns, alen, alignment, ALIGN, READ);
    } else {
        test_align(ualigns, ualen, alignment, UN_ALIGN, WRITE);
        test_align(ualigns, ualen, alignment, UN_ALIGN, READ);
    }

    free(aligns);
    free(ualigns);
    return;
}

int main(int argc, char* argv[]) {
    int alignment, is_align;

    alignment = (argc >= 2) ? atoi(argv[1]) : 4;
    is_align = (argc == 3 && !strcasecmp(argv[2], "1")) ? 1 : 0;
    test_alloc_mem(argc, argv, alignment, is_align);
    return 0;
}

结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# ./test_align.sh

is_align: 1, is_write: 1, alignment: 16, count: 67108862, cost: 1016 ms, avg: 0.000015 ms
is_align: 1, is_write: 0, alignment: 16, count: 67108862, cost: 214 ms, avg: 0.000003 ms

real    0m1.244s
user    0m1.177s
sys     0m0.066s
-------
is_align: 0, is_write: 1, alignment: 16, count: 67108862, cost: 919 ms, avg: 0.000014 ms
is_align: 0, is_write: 0, alignment: 16, count: 67108862, cost: 223 ms, avg: 0.000003 ms

real    0m1.159s
user    0m1.084s
sys     0m0.075s

3. 总结

从测试例子中,对齐和不对齐效率没有明显差距(cost 耗费时间),反而对齐的地址有时候花的时间还多,实践和理论对不上啊!——不知道问题出在哪里,能力有限,欢迎指正。


4. 参考

[nginx 源码走读] 内存池

NGINX 内存池 — 对齐

Nginx - CPU Cacheline 深思

C语言字节对齐问题详解

谈谈内存对齐一