使用mbind
,可以为给定的映射内存段设置内存策略。
问:我如何知道mbind
在所有节点上交错一个段?
如果在分配之后但在使用之前完成,那么MPOL_INTERLEAVE
在所有节点上都会按照我们的预期进行——内存将在所有节点上统一分配。
但是,如果该段已经被写入并分配在例如节点 0 中,则无法告诉内核在所有 NUMA 节点上统一交错。
该操作简单地变为无操作,因为内核将其解释为“请将此段放在这组节点上”。由于我们正在传递所有 NUMA 节点的集合,因此没有在外部分配需要移动的内存。
最小、完整和可验证的示例
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <sys/syscall.h>
#include <numaif.h>
#include <numa.h>
#define N ((1<<29) / sizeof(int))
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define PAGE_MASK (~(PAGE_SIZE - 1))
void print_command(char *cmd) {
FILE *fp;
char buf[1024];
if ((fp = popen(cmd, "r")) == NULL) {
perror("popen");
exit(-1);
}
while(fgets(buf, sizeof(buf), fp) != NULL) {
printf("%s", buf);
}
if(pclose(fp)) {
perror("pclose");
exit(-1);
}
}
void print_node_allocations() {
char buf[1024];
snprintf(buf, sizeof(buf), "numastat -c %d", getpid());
printf("\x1B[32m");
print_command(buf);
printf("\x1B[0m");
}
int main(int argc, char **argv) {
int *a = numa_alloc_local(N * sizeof(int));
size_t len = (N * sizeof(int)) & PAGE_MASK;
unsigned long mymask = *numa_get_mems_allowed()->maskp;
unsigned long maxnode = numa_get_mems_allowed()->size;
// pin thread to core zero
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(0, &mask);
if (sched_setaffinity(syscall(SYS_gettid), sizeof(mask), &mask) < 0) {
perror("sched_setaffinity");
exit(-1);
}
// initialize array
printf("\n\n(1) array allocated on local node\n");
a[0] = 997;
for(size_t i=1; i < N; i++) {
a[i] = a[i-1] * a[i-1] % 1000000000;
}
print_node_allocations();
// attempt to get it to be uniformly interleaved on all nodes
printf("\n\n(2) array interleaved on all nodes\n");
if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
perror("mbind failed");
exit(-1);
}
print_node_allocations();
// what if we interleave on all but the local node?
printf("\n\n(3) array interleaved on all nodes (except local node)\n");
mymask -= 0x01;
if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
perror("mbind failed");
exit(-1);
}
print_node_allocations();
return 0;
}
gcc -o interleave_all interleave_all.c -lnuma && sudo ./interleave_all
使用产量编译和运行:
(1) array allocated on local node
Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
Node 0 Node 1 Node 2 Node 3 Total
------ ------ ------ ------ -----
Huge 0 0 0 0 0
Heap 0 0 0 0 0
Stack 0 0 0 0 0
Private 514 0 0 0 514
------- ------ ------ ------ ------ -----
Total 514 0 0 0 514
(2) array interleaved on all nodes
Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
Node 0 Node 1 Node 2 Node 3 Total
------ ------ ------ ------ -----
Huge 0 0 0 0 0
Heap 0 0 0 0 0
Stack 0 0 0 0 0
Private 514 0 0 0 514
------- ------ ------ ------ ------ -----
Total 514 0 0 0 514
(3) array interleaved on all nodes (except local node)
Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
Node 0 Node 1 Node 2 Node 3 Total
------ ------ ------ ------ -----
Huge 0 0 0 0 0
Heap 0 0 0 0 0
Stack 0 0 0 0 0
Private 2 171 171 171 514
------- ------ ------ ------ ------ -----
Total 2 171 171 171 514