我有一个程序,它从一个域名列表文件中读取。它执行异步 DNS,然后使用异步 epoll 循环下载每个域的登录页面。
该程序在数千次迭代中运行良好,然后出现*** buffer overflow detected ***: terminated
错误。这是回溯:
Program received signal SIGABRT, Aborted.
__pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351415616) at pthread_kill.c:44
44 pthread_kill.c: No such file or directory.
(gdb) bt
#0 __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351415616) at pthread_kill.c:44
#1 __pthread_kill_internal (signo=6, threadid=140737351415616) at pthread_kill.c:80
#2 __GI___pthread_kill (threadid=140737351415616, signo=signo@entry=6) at pthread_kill.c:91
#3 0x00007ffff7db0476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4 0x00007ffff7d967b7 in __GI_abort () at abort.c:79
#5 0x00007ffff7df75e6 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff7f48ef4 "*** %s ***: terminated\n") at ../sysdeps/posix/libc_fatal.c:155
#6 0x00007ffff7ea322a in __GI___fortify_fail (msg=msg@entry=0x7ffff7f48e9a "buffer overflow detected") at fortify_fail.c:26
#7 0x00007ffff7ea1b46 in __GI___chk_fail () at chk_fail.c:28
#8 0x00007ffff7ea316b in __fdelt_chk (d=<optimised out>) at fdelt_chk.c:25
#9 0x00007ffff7f97362 in ares_fds () from /lib/x86_64-linux-gnu/libcares.so.2
#10 0x000055555555682d in wait_ares (channel=0x555556bb32a0) at epoll_recv_with_async_dns.c:80
#11 0x000055555555773c in main (argc=2, argv=0x7fffffffe0a8) at epoll_recv_with_async_dns.c:303
如您所见,回溯指向对ares_fds
. 有问题的代码行是:
nfds = ares_fds(channel, &read_fds, &write_fds);
我看不到那行代码中如何存在缓冲区溢出。我可以做些什么来进一步调试并找到并解决问题的任何想法。对于那些感兴趣的人来说,最小的复制器如下:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/socket.h>
#include <resolv.h>
#include <sys/epoll.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <time.h>
#include <ares.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#define MAXWAITING 1000 /* Max. number of parallel DNS queries */
#define MAXTRIES 3 /* Max. number of tries per domain */
#define DNSTIMEOUT 3000 /* Max. number of ms for first try */
#define SERVERS "1.0.0.1,8.8.8.8" /* DNS server to use (Cloudflare & Google) */
#define MAXDOMAINS 8192
#define MAX_CONNECTIONS 8192
#define TIMEOUT 10000
int epfd;
int sockfd[MAX_CONNECTIONS];
struct epoll_event event[MAX_CONNECTIONS];
struct sockaddr_in dest[MAX_CONNECTIONS];
char resolved[MAXDOMAINS][254];
char ips[MAXDOMAINS][128];
int current = 0, active = 0, next = 0;
char servers[MAX_CONNECTIONS][128];
char domains[MAX_CONNECTIONS][254];
int i, num_ready, connections = 0, done = 0, total_bytes = 0, total_domains = 0, iterations = 0, count = 0;
static int nwaiting;
static void state_cb(void *data, int s, int read, int write)
{
//printf("Change state fd %d read:%d write:%d\n", s, read, write);
}
static void callback(void *arg, int status, int timeouts, struct hostent *host)
{
nwaiting--;
if(!host || status != ARES_SUCCESS){
//fprintf(stderr, "Failed to lookup %s\n", ares_strerror(status));
return;
}
char ip[INET6_ADDRSTRLEN];
if (host->h_addr_list[0] != NULL){
inet_ntop(host->h_addrtype, host->h_addr_list[0], ip, sizeof(ip));
strcpy(resolved[current], host->h_name);
strcpy(ips[current], ip);
if (current < MAXDOMAINS - 1) current++; else current = 0;
active++;
printf("active %d\r", active);
}
}
static void wait_ares(ares_channel channel)
{
struct timeval *tvp, tv;
fd_set read_fds, write_fds;
int nfds = 0;
FD_ZERO(&read_fds);
FD_ZERO(&write_fds);
nfds = ares_fds(channel, &read_fds, &write_fds);
if (nfds > 0) {
tvp = ares_timeout(channel, NULL, &tv);
select(nfds, &read_fds, &write_fds, NULL, tvp);
ares_process(channel, &read_fds, &write_fds);
}
}
int main(int argc, char *argv[]) {
sigaction(SIGPIPE, &(struct sigaction){SIG_IGN}, NULL);
FILE * fp;
char domain[128];
size_t len = 0;
ssize_t read;
ares_channel channel;
int status, dns_done = 0;
int optmask;
status = ares_library_init(ARES_LIB_INIT_ALL);
if (status != ARES_SUCCESS) {
printf("ares_library_init: %s\n", ares_strerror(status));
return 1;
}
struct ares_options options = {
.timeout = DNSTIMEOUT, /* set first query timeout */
.tries = MAXTRIES /* set max. number of tries */
};
optmask = ARES_OPT_TIMEOUTMS | ARES_OPT_TRIES;
status = ares_init_options(&channel, &options, optmask);
if (status != ARES_SUCCESS) {
printf("ares_init_options: %s\n", ares_strerror(status));
return 1;
}
status = ares_set_servers_csv(channel, SERVERS);
if (status != ARES_SUCCESS) {
printf("ares_set_servers_csv: %s\n", ares_strerror(status));
return 1;
}
fp = fopen(argv[1], "r");
if (!fp)
exit(EXIT_FAILURE);
do{
if (nwaiting >= MAXWAITING || dns_done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!dns_done) {
if (fscanf(fp, "%128s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
dns_done = 1;
}
}
} while (active < MAX_CONNECTIONS);
/*---Open sockets for streaming---*/
for (i = 0; i < MAX_CONNECTIONS; i++)
{
if ( (sockfd[i] = socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0)) < 0 ) {
perror("Socket");
exit(errno);
}
count++;
}
while (1)
{
/*---Do async DNS---*/
while (/*active < MAXDOMAINS &&*/ nwaiting > 0) {
//printf("active = %d MAXDOMAINS = %d nwaiting = %d MAXWAITING = %d\n", active, MAXDOMAINS, nwaiting, MAXWAITING);
if (nwaiting >= MAXWAITING || dns_done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!dns_done) {
if (fscanf(fp, "%127s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
dns_done = 1;
}
}
} //while (active < MAXDOMAINS);
if (done && count == 0) break;
}
ares_destroy(channel);
ares_library_cleanup();
fclose(fp);
printf("\nFinished without errors\n");
return 0;
}
如果我注释掉创建套接字的部分,则不会发生中止:
/*---Open sockets for streaming---*/
for (i = 0; i < MAX_CONNECTIONS; i++)
{
if ( (sockfd[i] = socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0)) < 0 ) {
perror("Socket");
exit(errno);
}
count++;
}
所以无论问题是什么,它都与我有许多套接字文件描述符这一事实有关。有任何想法吗?
进一步编辑:
进一步调试似乎表明问题与打开的套接字数量有关。如果我将创建的套接字数量减少到 1017,则不再发生中止。如果我创建 1018 个套接字,程序将中止。