c - 自修改代码中可能的指令缓存同步问题？

Question

很多相关问题< x86指令缓存如何同步？> 提到 x86 应该在自修改代码中正确处理 i-cache 同步。我编写了以下代码，它可以在与执行交错的不同线程中打开和关闭函数调用。我使用比较和交换操作作为额外的保护，以便修改是原子的。但是我遇到了间歇性崩溃（SIGSEGV、SIGILL）并且分析核心转储让我怀疑处理器是否正在尝试执行部分更新的指令。下面给出代码和分析。可能是我在这里遗漏了一些东西。让我知道是否是这种情况。

切换.c

#include <stdio.h>
#include <inttypes.h>
#include <time.h>
#include <pthread.h>
#include <sys/mman.h>
#include <errno.h>
#include <unistd.h>

int active = 1; // Whether the function is toggled on or off
uint8_t* funcAddr = 0; // Address where function call happens which we need to toggle on/off
uint64_t activeSequence = 0; // Byte sequence for toggling on the function CALL
uint64_t deactiveSequence = 0; // NOP byte sequence for toggling off the function CALL

inline int modify_page_permissions(uint8_t* addr) {

  long page_size = sysconf(_SC_PAGESIZE);
  int code = mprotect((void*)(addr - (((uint64_t)addr)%page_size)), page_size,
    PROT_READ | PROT_WRITE | PROT_EXEC);

  if (code) {
    fprintf(stderr, "mprotect was not successfull! code %d\n", code);
    fprintf(stderr, "errno value is : %d\n", errno);
    return 0;
  }

  // If the 8 bytes we need to modify straddles a page boundary make the next page writable too
  if (page_size - ((uint64_t)addr)%page_size < 8) {
    code = mprotect((void*)(addr-((uint64_t)addr)%page_size+ page_size) , page_size,
      PROT_READ | PROT_WRITE | PROT_EXEC);
    if (code) {
      fprintf(stderr, "mprotect was not successfull! code %d\n", code);
      fprintf(stderr, "errno value is : %d\n", errno);
      return 0;;
    }
  }

  return 1;
}

void* add_call(void* param) {

  struct timespec ts;
  ts.tv_sec = 0;
  ts.tv_nsec = 50000;

  while (1) {
    if (!active) {
      if (activeSequence != 0) {
        int status = modify_page_permissions(funcAddr);
        if (!status) {
          return 0;
        }

        uint8_t* start_addr = funcAddr - 8;

        fprintf(stderr, "Activating foo..\n");
        uint64_t res = __sync_val_compare_and_swap((uint64_t*) start_addr,
                                    *((uint64_t*)start_addr), activeSequence);
        active = 1;
      } else {
        fprintf(stderr, "Active sequence not initialized..\n");
      }
    }

    nanosleep(&ts, NULL);
  }

}

int remove_call(uint8_t* addr) {

  if (active) {
    // Remove gets called first before add so we initialize active and deactive state byte sequences during the first call the remove
    if (deactiveSequence == 0) {
      uint64_t sequence =  *((uint64_t*)(addr-8));
      uint64_t mask = 0x0000000000FFFFFF;
      uint64_t deactive = (uint64_t) (sequence & mask);
      mask = 0x9090909090000000; // We NOP 5 bytes of CALL instruction and leave rest of the 3 bytes as it is

      activeSequence = sequence;
      deactiveSequence = deactive |  mask;
      funcAddr = addr;
    }

    int status = modify_page_permissions(addr);
    if (!status) {
      return -1;
    }

    uint8_t* start_addr = addr - 8;

    fprintf(stderr, "Deactivating foo..\n");
    uint64_t res = __sync_val_compare_and_swap((uint64_t*)start_addr,
                                  *((uint64_t*)start_addr), deactiveSequence);
    active = 0;
    // fprintf(stderr, "Result : %p\n", res);
  }
}

int counter = 0;

void foo(int i) {

  // Use the return address to determine where we need to patch foo CALL instruction (5 bytes)
  uint64_t* addr = (uint64_t*)__builtin_extract_return_addr(__builtin_return_address(0));

  fprintf(stderr, "Foo counter : %d\n", counter++);
  remove_call((uint8_t*)addr);
}

// This thread periodically checks if the method is inactive and if so reactivates it
void spawn_add_call_thread() {
  pthread_t tid;
  pthread_create(&tid, NULL, add_call, (void*)NULL);
}

int main() {

  spawn_add_call_thread();

  int i=0;
  for (i=0; i<1000000; i++) {
    // fprintf(stderr, "i : %d..\n", i);
   foo(i);
  }

  fprintf(stderr, "Final count : %d..\n\n\n", counter);
}

核心转储分析

Program terminated with signal 4, Illegal instruction.
#0  0x0000000000400a28 in main () at toggle.c:123
(gdb) info frame
 Stack level 0, frame at 0x7fff7c8ee360:
   rip = 0x400a28 in main (toggle.c:123); saved rip 0x310521ed5d
 source language c.
 Arglist at 0x7fff7c8ee350, args:
 Locals at 0x7fff7c8ee350, Previous frame's sp is 0x7fff7c8ee360
 Saved registers:
 rbp at 0x7fff7c8ee350, rip at 0x7fff7c8ee358
(gdb) disas /r 0x400a28,+30
 Dump of assembler code from 0x400a28 to 0x400a46:
  => 0x0000000000400a28 <main+64>:   ff (bad)
     0x0000000000400a29 <main+65>:   ff (bad)
     0x0000000000400a2a <main+66>:   ff eb  ljmpq  *<internal disassembler error>
     0x0000000000400a2c <main+68>:   e7 48  out    %eax,$0x48
 (gdb) disas /r main
  Dump of assembler code for function main:
     0x00000000004009e8 <+0>:    55 push   %rbp
     ...
     0x0000000000400a24 <+60>:   89 c7  mov    %eax,%edi
     0x0000000000400a26 <+62>:   e8 11 ff ff ff callq  0x40093c <foo>
     0x0000000000400a2b <+67>:   eb e7  jmp    0x400a14 <main+44>

因此可以看出，指令指针似乎位于 CALL 指令内的地址内，处理器显然正试图执行该未对齐的指令，从而导致非法指令错误。

score 3 · Accepted Answer

在 80x86 上，大多数调用使用相对位移，而不是绝对地址。本质上它是“在此处调用代码+ <位移>”而不是“在<地址>处调用代码”。

对于 64 位代码，位移可能是 8 位或 32 位。它永远不是 64 位的。

例如，对于一个 2 字节的“使用 8 位位移调用”指令，您将在调用指令、call操作码本身和指令的操作数（位移）之前丢弃 6 个字节。

再举一个例子，对于一个 5 字节的“32 位位移调用”指令，您将在调用指令、call操作码本身和指令的操作数（位移）之前丢弃 3 个字节。

然而...

这些不是唯一的调用方式。例如，您可以使用函数指针进行调用，其中被调用代码的地址根本不在指令中（但可能在寄存器中或内存中的变量）。还有一个称为“尾调用优化”的优化，其中 acall后跟 aret被替换为 a jmp（可能有一些额外的堆栈欺骗来传递参数，清理调用者的局部变量等）。

本质上; 您的代码严重损坏，您无法涵盖所有可能的极端情况，您不应该一开始就这样做，并且您可能应该使用函数指针而不是自我修改代码（这会更快、更容易并且也便携）。

score 3 · Accepted Answer

我认为您的问题是您用 5 个 1 字节 NOP 替换了 5 字节 CALL 指令。考虑一下当您的线程执行了 3 个 NOP 时会发生什么，然后您的主线程决定将 CALL 指令换回。您的线程的 PC 将在 CALL 指令中间占据三个字节，因此将执行意外且可能是非法的操作说明。

您需要做的是将 5 字节 CALL 指令与 5 字节 NOP 交换。您只需要找到一个不执行任何操作的多字节指令（例如对自身执行或操作），如果您需要一些额外的字节，请在前面添加一些前缀字节，例如 gs 覆盖前缀和地址大小覆盖前缀（两者这不会做任何事情）。通过使用 5 字节 NOP，您的线程将被保证在 CALL 指令处或在 CALL 指令之后，但绝不会在其中。

c - 自修改代码中可能的指令缓存同步问题？

2 回答 2

Related

Reference