2

我正在尝试编写一个汇编程序,该程序在 c 中调用一个函数,假设 char 数组中的当前字符符合某些条件,它将用预定义的字符替换字符串中的某些字符。

我的 c 文件:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

//display *((char *) $edi)
// These functions will be implemented in assembly:
//

int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;


int isvowel (int c) {

   if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u') 
      return 1 ;

   if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U') 
      return 1 ;

   return 0 ;
}

int main(){
    char *str1;
    int r;
// I ran my code through a debugger again, and it seems that when displaying 
// the character stored in ecx is listed as "A" (correct) right before the call
// to "add ecx, 1" at which point ecx somehow resets to 0 when it should be "B"

    str1 = strdup("ABC 123 779 Hello World") ;
    r = strrepl(str1, '#', &isdigit) ;
    printf("str1 = \"%s\"\n", str1) ;
    printf("%d chararcters were replaced\n", r) ;
    free(str1) ;
    return 0;
}

还有我的 .asm 文件:

; File: strrepl.asm
; Implements a C function with the prototype:
;
;   int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
;
; 
; Result: chars in string are replaced with the replacement character and string is returned.

    SECTION .text
    global  strrepl


_strrepl:   nop
strrepl:
    push    ebp         ; set up stack frame
    mov ebp, esp

    push    esi         ; save registers
    push    ebx
    xor eax, eax
    mov ecx, [ebp + 8]      ;load string (char array) into ecx
    jecxz   end         ;jump if [ecx] is zero
    mov esi, [ebp + 12]     ;move the replacement character into esi
    mov edx, [ebp + 16]     ;move function pointer into edx
    xor bl, bl          ;bl will be our counter


firstLoop:
    add bl, 1           ;inc bl would work too
    add ecx, 1
    mov eax, [ecx]  
    cmp eax, 0
    jz  end
    push    eax         ; parameter for (*isinsubset)
    ;BREAK
    call    edx         ; execute (*isinsubset)

    add esp, 4          ; "pop off" the parameter
    mov ebx, eax        ; store return value




end:
    pop ebx         ; restore registers
    pop esi
    mov esp, ebp        ; take down stack frame
    pop ebp
    ret

当通过 gdb 运行它并在 ;BREAK 处放置断点时,在我对 call 命令采取步骤后,它会出现段错误,并出现以下错误:

Program received signal SIGSEGV, Segmentation fault.
0x0081320f in isdigit () from /lib/libc.so.6

isdigit 是我的 c 文件中包含的标准 c 库的一部分,所以我不知道该怎么做。

编辑:我已经编辑了我的 firstLoop 并包含了一个 secondLoop,它应该用“#”替换任何数字,但它似乎替换了整个数组。

firstLoop:

    xor eax, eax

    mov edi, [ecx]
    cmp edi, 0
    jz  end

    mov edi, ecx        ; save array


    movzx   eax, byte [ecx]     ;load single byte into eax  
    mov ebp, edx        ; save function pointer
    push    eax         ; parameter for (*isinsubset)           
    call    edx         ; execute (*isinsubset)

    ;cmp    eax, 0
    ;jne    end

    mov ecx, edi        ; restore array
    cmp eax, 0
    jne secondLoop  
    mov edx, ebp        ; restore function pointer
    add esp, 4          ; "pop off" the parameter
    mov ebx, eax        ; store return value
    add ecx, 1
    jmp firstLoop

secondLoop:
    mov [ecx], esi
    mov edx, ebp
    add esp, 4
    mov ebx, eax
    add ecx, 1
    jmp     firstLoop

使用 gdb,当代码进入 secondloop 时,一切正常。ecx 显示为“1”,这是从 .c 文件传入的字符串中的第一个数字。Esi 应显示为“#”。但是,在我执行 mov [ecx] 之后,esi 它似乎分崩离析。ecx 此时显示为“#”,但是一旦我增加 1 以到达数组中的下一个字符,它就会被列为“/000”并显示。1 之后的每个字符被“#”替换为“/000”并显示。在我让 secondLoop 尝试用“#”替换字符之前,我只是让 firstLoop 用它自己循环,看看它是否可以在不崩溃的情况下通过整个数组。确实如此,并且在每次增加 ecx 后都显示为正确的字符。

4

2 回答 2

1

在您firstLoop:使用以下方法从字符串中加载字符:

mov eax, [ecx]

这是在平局加载 4 个字节而不是单个字节。因此int,您传递给isdigit()的 可能远远超出它的处理范围(它可能使用简单的表查找)。

您可以使用以下 Intel asm 语法加载单个字节:

movzx eax, byte ptr [ecx]

其他几件事:

  • 它还会产生这样的效果,即它可能无法正确检测到字符串的结尾,因为空终止符后面可能没有其他三个零字节。
  • 我不确定为什么ecx在处理字符串中的第一个字符之前增加
  • 您发布的汇编代码似乎实际上并没有在字符串上循环
于 2013-05-07T08:35:23.617 回答
0

我在您的代码中添加了一些注释:-

  ; this is OK: setting up the stack frame and saving important register
  ; on Win32, the registers that need saving are: esi, edi and ebx
  ; the rest can be used without needing to preserve them
  push    ebp
  mov ebp, esp
  push    esi
  push    ebx

  xor eax, eax
  mov ecx, [ebp + 8]

  ; you said that this checked [ecx] for zero, but I think you've just written
  ; that wrong, this checks the value of ecx for zero, the [reg] form usually indicates
  ; the value at the address defined by reg
  ; so this is effectively doing a null pointer check (which is good)
  jecxz   end

  mov esi, [ebp + 12]
  mov edx, [ebp + 16]
  xor bl, bl

firstLoop:
  add bl, 1
  ; you increment ecx before loading the first character, this means
  ; that the function ignores the first character of the string
  ; and will therefore produce an incorrect result if the string
  ; starts with a character that needs replacing
  add ecx, 1
  ; characters are 8 bit, not 32 bit (mentioned in comments elsewhere)
  mov eax, [ecx]  
  cmp eax, 0
  jz  end
  push    eax
  ; possibly segfaults due to character out of range
  ; also, as mentioned elsewhere, the function you call here must conform to the 
  ; the standard calling convention of the system (e.g, preserve esi, edi and ebx for
  ; Win32 systems), so eax, ecx and edx can change, so next time you call
  ; [edx] it might be referencing random memory
  ; either save edx on the stack (push before pushing parameters, pop after add esp)
  ; or just load edx with [ebp+16] here instead of at the start
  call    edx

  add esp, 4
  mov ebx, eax

  ; more functionality required here!



end:
  ; restore important values, etc
  pop ebx
  pop esi
  mov esp, ebp
  pop ebp
  ; the result of the function should be in eax, but that's not set up properly yet
  ret

对您的内部循环的评论:-

firstLoop:

    xor eax, eax

    ; you're loading a 32 bit value and checking for zero,
    ; strings are terminated with a null character, an 8 bit value,
    ; not a 32 bit value, so you're reading past the end of the string
    ; so this is unlikely to correctly test the end of string

    mov edi, [ecx]
    cmp edi, 0
    jz  end

    mov edi, ecx        ; save array


    movzx   eax, byte [ecx]     ;load single byte into eax  
    ; you need to keep ebp! its value must be saved (at the end, 
    ; you do a mov esp,ebp)
    mov ebp, edx        ; save function pointer
    push    eax         ; parameter for (*isinsubset)           
    call    edx         ; execute (*isinsubset)

    mov ecx, edi        ; restore array
    cmp eax, 0
    jne secondLoop  
    mov edx, ebp        ; restore function pointer
    add esp, 4          ; "pop off" the parameter
    mov ebx, eax        ; store return value
    add ecx, 1
    jmp firstLoop

secondLoop:
    ; again, your accessing the string using a 32 bit value, not an 8 bit value
    ; so you're replacing the matched character and the three next characters
    ; with the new value
    ; the upper 24 bits are probably zero so the loop will terminate on the
    ; next character
    ; also, the function seems to be returning a count of characters replaced,
    ; but you're not recording the fact that characters have been replaced
    mov [ecx], esi
    mov edx, ebp
    add esp, 4
    mov ebx, eax
    add ecx, 1
    jmp     firstLoop

您似乎确实对内存的工作方式有疑问,您对 8 位和 32 位内存访问感到困惑。

于 2013-05-07T09:55:50.000 回答