6

使用mbind,可以为给定的映射内存段设置内存策略。

问:我如何知道mbind在所有节点上交错一个段?

如果在分配之后但在使用之前完成,那么MPOL_INTERLEAVE在所有节点上都会按照我们的预期进行——内存将在所有节点上统一分配。

但是,如果该段已经被写入并分配在例如节点 0 中,则无法告诉内核在所有 NUMA 节点上统一交错。

该操作简单地变为无操作,因为内核将其解释为“请将此段放在这组节点上”。由于我们正在传递所有 NUMA 节点的集合,因此没有在外部分配需要移动的内存。

最小、完整和可验证的示例

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <sys/syscall.h>
#include <numaif.h>
#include <numa.h>

#define N ((1<<29) / sizeof(int))

#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define PAGE_MASK (~(PAGE_SIZE - 1))

void print_command(char *cmd) {
  FILE *fp;
  char buf[1024];

  if ((fp = popen(cmd, "r")) == NULL) {
    perror("popen");
    exit(-1);
  }

  while(fgets(buf, sizeof(buf), fp) != NULL) {
    printf("%s", buf);
  }

  if(pclose(fp))  {
    perror("pclose");
    exit(-1);
  }
}

void print_node_allocations() {
  char buf[1024];
  snprintf(buf, sizeof(buf), "numastat -c %d", getpid());
  printf("\x1B[32m");
  print_command(buf);
  printf("\x1B[0m");
}

int main(int argc, char **argv) {
  int *a = numa_alloc_local(N * sizeof(int));
  size_t len = (N * sizeof(int)) & PAGE_MASK;
  unsigned long mymask = *numa_get_mems_allowed()->maskp;
  unsigned long maxnode = numa_get_mems_allowed()->size;

  // pin thread to core zero
  cpu_set_t mask;
  CPU_ZERO(&mask);
  CPU_SET(0, &mask);
  if (sched_setaffinity(syscall(SYS_gettid), sizeof(mask), &mask) < 0) {
    perror("sched_setaffinity");
    exit(-1);
  }

  // initialize array
  printf("\n\n(1) array allocated on local node\n");
  a[0] = 997;
  for(size_t i=1; i < N; i++) {
    a[i] = a[i-1] * a[i-1] % 1000000000;
  }
  print_node_allocations();

  // attempt to get it to be uniformly interleaved on all nodes
  printf("\n\n(2) array interleaved on all nodes\n");
  if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
    perror("mbind failed");
    exit(-1);
  }
  print_node_allocations();

  // what if we interleave on all but the local node?
  printf("\n\n(3) array interleaved on all nodes (except local node)\n");
  mymask -= 0x01;
  if (mbind(a, len, MPOL_INTERLEAVE, &mymask, maxnode, MPOL_MF_MOVE_ALL | MPOL_MF_STRICT) == -1) {
    perror("mbind failed");
    exit(-1);
  }
  print_node_allocations();

  return 0;
}

gcc -o interleave_all interleave_all.c -lnuma && sudo ./interleave_all使用产量编译和运行:

(1) array allocated on local node

Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
         Node 0 Node 1 Node 2 Node 3 Total
         ------ ------ ------ ------ -----
Huge          0      0      0      0     0
Heap          0      0      0      0     0
Stack         0      0      0      0     0
Private     514      0      0      0   514
-------  ------ ------ ------ ------ -----
Total       514      0      0      0   514


(2) array interleaved on all nodes

Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
         Node 0 Node 1 Node 2 Node 3 Total
         ------ ------ ------ ------ -----
Huge          0      0      0      0     0
Heap          0      0      0      0     0
Stack         0      0      0      0     0
Private     514      0      0      0   514
-------  ------ ------ ------ ------ -----
Total       514      0      0      0   514


(3) array interleaved on all nodes (except local node)

Per-node process memory usage (in MBs) for PID 20636 (interleave_all)
         Node 0 Node 1 Node 2 Node 3 Total
         ------ ------ ------ ------ -----
Huge          0      0      0      0     0
Heap          0      0      0      0     0
Stack         0      0      0      0     0
Private       2    171    171    171   514
-------  ------ ------ ------ ------ -----
Total         2    171    171    171   514
4

0 回答 0