我有一个问题,即在 clone() 系统调用之后的某个时间 realloc() 死锁。
我的代码是:
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <linux/types.h>
#define CHILD_STACK_SIZE 4096*4
#define gettid() syscall(SYS_gettid)
#define log(str) fprintf(stderr, "[pid:%d tid:%d] "str, getpid(),gettid())
int clone_func(void *arg){
int *ptr=(int*)malloc(10);
int i;
for (i=1; i<200000; i++)
ptr = realloc(ptr, sizeof(int)*i);
free(ptr);
return 0;
}
int main(){
int flags = 0;
flags = CLONE_VM;
log("Program started.\n");
int *ptr=NULL;
ptr = malloc(16);
void *child_stack_start = malloc(CHILD_STACK_SIZE);
int ret = clone(clone_func, child_stack_start +CHILD_STACK_SIZE, flags, NULL, NULL, NULL, NULL);
int i;
for (i=1; i<200000; i++)
ptr = realloc(ptr, sizeof(int)*i);
free(ptr);
return 0;
}
gdb 中的调用栈是:
[pid:13268 tid:13268] Program started.
^Z[New LWP 13269]
Program received signal SIGTSTP, Stopped (user).
0x000000000040ba0e in __lll_lock_wait_private ()
(gdb) bt
#0 0x000000000040ba0e in __lll_lock_wait_private ()
#1 0x0000000000408630 in _L_lock_11249 ()
#2 0x000000000040797f in realloc ()
#3 0x0000000000400515 in main () at test-realloc.c:36
(gdb) i thr
2 LWP 13269 0x000000000040ba0e in __lll_lock_wait_private ()
* 1 LWP 13268 0x000000000040ba0e in __lll_lock_wait_private ()
(gdb) thr 2
[Switching to thread 2 (LWP 13269)]#0 0x000000000040ba0e in __lll_lock_wait_private ()
(gdb) bt
#0 0x000000000040ba0e in __lll_lock_wait_private ()
#1 0x0000000000408630 in _L_lock_11249 ()
#2 0x000000000040797f in realloc ()
#3 0x0000000000400413 in clone_func (arg=0x7fffffffe53c) at test-realloc.c:20
#4 0x000000000040b889 in clone ()
#5 0x0000000000000000 in ?? ()
我的操作系统是 debian linux-2.6.32-5-amd64,带有 GNU C 库 (Debian EGLIBC 2.11.3-4) 稳定版本 2.11.3。我深深怀疑 eglibc 是这个 bug 的罪魁祸首。在 clone() 系统调用上,在使用 realloc() 之前还不够吗?