0

该程序应该将网页打印到终端

用法:./prog www.page.com 80 /folder/index.html

程序总是按照它应该做的,但总是在最后出现段错误

#define _XOPEN_SOURCE 600
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <stdlib.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>
#include <termios.h>

#define HOST "localhost"
#define PAGE "/"
#define PORT "80"
#define USERAGENT "HTML 1.1"

char *build_get_query(char *host,char *page){
    char *query;
    char *getpage = page;
    char *tpl = "GET /%s HTTP/1.1\r\nHost: %s\r\nUser-Agent: %s\r\nConnection: close\r\n\r\n";

if(getpage[0]=='/'){
        getpage=getpage + 1; /*Removing leading / */
    }
    /* -5 is to consider the %s %s %s in tpl and the ending \0 */
    query=malloc(strlen(host)+strlen(getpage)+strlen(USERAGENT)+strlen(tpl)-5);
    sprintf(query, tpl, getpage, host, USERAGENT);

    return query;
}

int main(int argc, char **argv){
    int sockfd;

    struct addrinfo hints, *servinfo, *p;
    int s;  

    char *page;
    char *stringA;
    int tmpres;
    char *sstr;

    if(argc<3){
        fprintf(stderr,"\n\tUSAGE:\n\t%s hostname/ip PORTnr /PAGE/index.shtml \n\n",argv[0]);
        fprintf(stderr,"\texample:\n\t%s www.anypage.com 80 /folder/index.shtml \n\n",argv[0]);
        exit(1);
    }

    memset(&hints,0,sizeof(struct addrinfo));
    hints.ai_family=AF_INET;    /* Allow IPv4 or IPv6 */
    hints.ai_socktype=SOCK_STREAM; /* stream socket */
    hints.ai_flags=0;
    hints.ai_protocol=0;          /* Any protocol */
    if((s=getaddrinfo(argv[1],PORT,&hints,&servinfo))!=0){
        fprintf(stderr,"\n\tgetaddrinfo: %s\n",gai_strerror(s));
        exit(EXIT_FAILURE);
    }
    for(p=servinfo; p!=NULL; p=p->ai_next){
        if((sockfd=socket(p->ai_family,p->ai_socktype,p->ai_protocol))==-1){
            fprintf(stderr,"\n\tSOCKET FAIL\n\n");
            continue;
        }

        if((connect(sockfd,p->ai_addr,p->ai_addrlen))<0){
            close(sockfd);
            fprintf(stderr,"\n\tCONECTION FAIL\n\n");
            continue;
        }
        break;
    }
    if(p==NULL){
        fprintf(stderr,"\n\tCONECTION FAIL\n\n");
        exit(2);
    }
    freeaddrinfo(servinfo);


    if(argc > 3){
        page=argv[3];
    }else{
        page=PAGE;
    }

    if((stringA=malloc(BUFSIZ))==NULL){
        printf("\n\tMEMORY ERROR\n\n");
        exit(1);
    }
    stringA=build_get_query(argv[1],page);
    if((write(sockfd,stringA,strlen(stringA)))<0){
        printf("\n\tERROR ON SEND\n\n");
    }
    memset(stringA, 0, sizeof(stringA));

    while((tmpres=read(sockfd,stringA,BUFSIZ))>0){

        if((sstr=strstr(stringA,"<!DOCTYPE "))!=NULL){
            stringA=sstr;
        }
        if((sstr=strstr(stringA,"</html>"))!=NULL){
            strncpy(sstr+7,"\n",(BUFSIZ-strlen(stringA))+5);
        }
        fprintf(stdout,"%s",stringA);

    }
    if(tmpres<0){
        perror("READ ERROR");
    }

    close(sockfd);
    return 0;
}

gdb 信息

Program received signal SIGSEGV, Segmentation fault.
_dl_fini () at dl-fini.c:183
183 dl-fini.c: No such file or directory.
    in dl-fini.c
(gdb) bt
#0  _dl_fini () at dl-fini.c:183
#1  0x0015d1ef in __run_exit_handlers (status=0, listp=0x289324, run_list_atexit=true) at exit.c:78
#2  0x0015d25f in *__GI_exit (status=0) at exit.c:100
#3  0x00144bde in __libc_start_main (main=0x804892d <main>, argc=4, ubp_av=0xbffff454, init=0x8048d00 <__libc_csu_init>, 
    fini=0x8048cf0 <__libc_csu_fini>, rtld_fini=0x11e080 <_dl_fini>, stack_end=0xbffff44c) at libc-start.c:258
#4  0x08048811 in _start ()
(gdb) 

我对 gdb 信息很感兴趣,因为我sudo apt-get install build-essential 大约一年前安装了,直到今天才出现问题。

4

2 回答 2

2

我看到的是您将BUFSIZ字节分配给缓冲区,并且您将最多BUFSIZ字节读入缓冲区,但是您试图在缓冲区上使用 astrstr()和 a strlen()......如果您实际上已将BUFSIZ字节读入缓冲区可能也不是以空结尾的字符串。

我建议您分配BUFSIZ+1memset( stringA, 0, BUFSIZ+1 )防止随机遍历内存,strlen()这会在 while 循环中产生一些非常未定义的行为,并且还可能导致printf()循环内的行为异常,或者至少printf( "%.*s", BUFSIZ, stringA );用于输出。

char readBuffer[BUFSIZ+1] = { 0 };

stringA=build_get_query(argv[1],page);

if((write(sockfd,stringA,strlen(stringA)))<0){
    printf("\n\tERROR ON SEND\n\n");
}

while((tmpres=read(sockfd,readBuffer,BUFSIZ))>0){

    if((sstr=strstr(readBuffer,"<!DOCTYPE "))!=NULL){
        strcpy( readBuffer, sstr );
    }
    if((sstr=strstr(readBuffer,"</html>"))!=NULL){
        strncpy(sstr+7,"\n",(BUFSIZ-strlen(readBuffer))+5);
    }
    fprintf(stdout,"%s",readBuffer);

    memset( readBuffer, 0 , BUFSIZ+1 );
}

PS:实际上,在阅读了其他回复之后,我意识到您犯了一个更大的错误,这取决于您将 stringA 重新分配给其他地址(从构建查询返回)后指向 BUFSIZ 数组的指针。

于 2013-05-06T16:47:30.283 回答
2

并且分配给 stringA 的缓冲区大小不是 BUFSIZ。毕竟它接收到在 build_get_query 中分配的缓冲区的地址,它可能比 BUFSIZ 还要小。我不知道您为什么使用 stringA=malloc(BUFSIZ) 并在下一条指令中将 stringA 指针更改为另一个缓冲区。那里有很多内存泄漏。

于 2013-05-06T17:09:19.220 回答