0

我的 C 程序没有将它正在显示的相同数据写入文件。如何将输出到屏幕的内容保存在文件中。我正在尝试保存网页,文件名由第三个选项定义,[站点] [页面] [路径]

#include <stdio.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdlib.h>
#include <netdb.h>
#include <string.h>

int create_tcp_socket();
char *get_ip(char *host);
char *build_get_query(char *host, char *page);
void usage();

#define HOST "coding.debuntu.org"
#define PAGE "/"
#define PORT 80
#define USERAGENT "HTMLGET 1.0"
#define OS "mac osx"

int main(int argc, char **argv)
{
  struct sockaddr_in *remote;
  int sock;
  int tmpres;
  char *ip;
  char *get;
  char buf[BUFSIZ+1];
  char *host;
  char *page;
  char *HTMLfile;

  if(argc == 1){
    usage();
    exit(2);
  }  
  host = argv[1];
  if(argc > 2){
    page = argv[2];
  }else{
    page = PAGE;
  }
  HTMLfile = argv[3];

  sock = create_tcp_socket();
  ip = get_ip(host);
  fprintf(stderr, "<!--\nIP is %s\n", ip);
  remote = (struct sockaddr_in *)malloc(sizeof(struct sockaddr_in *));
  remote->sin_family = AF_INET;
  tmpres = inet_pton(AF_INET, ip, (void *)(&(remote->sin_addr.s_addr)));
  if( tmpres < 0)  
  {
    perror("Can't set remote->sin_addr.s_addr");
    exit(1);
  }else if(tmpres == 0)
  {
    fprintf(stderr, "%s is not a valid IP address\n", ip);
    exit(1);
  }
  remote->sin_port = htons(PORT);

  if(connect(sock, (struct sockaddr *)remote, sizeof(struct sockaddr)) < 0){
    perror("Could not connect");
    exit(1);
  }
  get = build_get_query(host, page);
  fprintf(stderr, "nQuery is:\n<<START>>\n%s<<END>>\n-->\n", get);

  //Send the query to the server
  int sent = 0;
  while(sent < strlen(get))
  {
    tmpres = send(sock, get+sent, strlen(get)-sent, 0);
    if(tmpres == -1){
      perror("Can't send query");
      exit(1);
    }
    sent += tmpres;
  }
  //now it is time to receive the page
  memset(buf, 0, sizeof(buf));
  int htmlstart = 0;
  char * htmlcontent;
  while((tmpres = recv(sock, buf, BUFSIZ, 0)) > 0){
    if(htmlstart == 0)
    {
      /* Under certain conditions this will not work.
      * If the \r\n\r\n part is splitted into two messages
      * it will fail to detect the beginning of HTML content
      */
      htmlcontent = strstr(buf, "\r\n\r\n");
      if(htmlcontent != NULL){
        htmlstart = 1;
        htmlcontent += 4;
      }
    }else{
      htmlcontent = buf;
    }
    if(htmlstart){
      fprintf(stdout, "%s", htmlcontent);
    }

    FILE *f;
    f = fopen(HTMLfile, "w");
    fprintf(f, "%s", htmlcontent); //stderr, "%s"
    fclose(f);

    memset(buf, 0, tmpres);
  }
  if(tmpres < 0)
  {
    perror("Error receiving data");
  }
  free(get);
  free(remote);
  free(ip);
  close(sock);
  return 0;
}

void usage()
{
  fprintf(stderr, "USAGE: htmlget host [page]\n\
\thost: the website hostname. ex: coding.debuntu.org\n\
\tpage: the page to retrieve. ex: index.html, default: /\n");
}


int create_tcp_socket()
{
  int sock;
  if((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0){
    perror("Can't create TCP socket");
    exit(1);
  }
  return sock;
}


char *get_ip(char *host)
{
  struct hostent *hent;
  int iplen = 15; //XXX.XXX.XXX.XXX
  char *ip = (char *)malloc(iplen+1);
  memset(ip, 0, iplen+1);
  if((hent = gethostbyname(host)) == NULL)
  {
    herror("Can't get IP");
    exit(1);
  }
  if(inet_ntop(AF_INET, (void *)hent->h_addr_list[0], ip, iplen) == NULL)
  {
    perror("Can't resolve host");
    exit(1);
  }
  return ip;
}

char *build_get_query(char *host, char *page)
{
  char *query;
  char *getpage = page;
  char *tpl = "GET /%s HTTP/1.0\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n";
  if(getpage[0] == '/'){
    getpage = getpage + 1;
    fprintf(stderr,"Removing leading \"/\", converting %s to %s\n", page, getpage);
  }
  // -5 is to consider the %s %s %s in tpl and the ending \0
  query = (char *)malloc(strlen(host)+strlen(getpage)+strlen(USERAGENT)+strlen(OS)+strlen(tpl)-5);
  sprintf(query, tpl, getpage, host, USERAGENT);
  return query;
//  FILE *f;
//  f = fopen(HTMLfile, "w");
//  fprintf(f, htmlcontent);
//  fclose(f);

}

这是我保存文件的位置:

FILE *f;
f = fopen(HTMLfile, "w");
fprintf(f, "%s", htmlcontent); //stderr, "%s"
fclose(f);

这是它为我的问题保存的结果(此页面)

(['_setCustomVar', 1, 'tags', '|c|file|networking|']); 
_gaq.push(['_trackPageview']);
    var _qevents = _qevents || [];
    (function () {
        var ssl='https:'==document.location.protocol,
            s=document.getElementsByTagName('script')[0],
            ga=document.createElement('script');
        ga.type='text/javascript';
        ga.async=true;
        ga.src=(ssl?'https://ssl':'http://www')+'.google-analytics.com/ga.js';
        s.parentNode.insertBefore(ga,s);
        var sc=document.createElement('script');
        sc.type='text/javascript';
        sc.async=true;
        sc.src=(ssl?'https://secure':'http://edge')+'.quantserve.com/quant.js';
        s.parentNode.insertBefore(sc,s);
    })();
    _qevents.push({ qacct: "p-c1rF4kxgLUzNc" });
    </script>        

</body>
</html>
4

2 回答 2

1
FILE *f;
f = fopen(HTMLfile, "w");
fprintf(f, "%s", htmlcontent); //stderr, "%s"
fclose(f);

而不是每次通过循环打开和关闭文件,您应该在循环之前打开它一次,在写入时保持打开,然后在循环之后关闭它。

于 2013-08-28T22:10:52.350 回答
0

不要使用“w”打开文件,而是使用“a”。更好的是,只打开一次文件。

f = fopen(HTMLfile, "a");

w" write:创建一个空文件,用于输出操作。如果已经存在同名文件,则丢弃其内容,将该文件视为新的空文件。

“a” 附加:在文件末尾打开文件以进行输出。输出操作总是在文件末尾写入数据,扩展它。重新定位操作(fseek、fsetpos、rewind)被忽略。如果文件不存在,则创建该文件。

于 2013-08-28T22:12:35.230 回答