1

编辑:修复了代码中的重复错误。我试图创建一个从标准输入读取的词频分析程序。我有两个问题。

  1. 目前我正在使用 '\n' 来指示我的程序何时应该停止读取输入,我需要它读取直到用户完成输入。使用 EOF 或空终止符 '\0' 会更好吗
  2. 这可能是一个愚蠢的问题,但我无法弄清楚我的输出有什么问题,它每次都会将字母加倍。

示例输入:“This is a test test of the program for frequency is a this for for”

输出:

thhiiss 1
iiss 2
aa 2
tteesstt 2
ooff 1
tthhee 1
pprrooggrraamm 1
ffoorr 3
ffrreeqquueennccyy 1
tthhiiss 1

如您所见,每个单词的计数接近正确,但无法弄清楚字母重复的原因。这是我使用的代码:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "list.h"

#define MAXWORD 100

//===========================================================================
struct lnode {
    struct lnode *next;
    struct lnode *counter;
    struct lnode *pLast;   
    struct lnode *prev;
    struct lnode *head;
    char *word;
    int line;
    int count;
    int freq;
};

struct lnode *start = NULL;

//===========================================================================
struct lnode *createWordCounter(char *str)
  {
     struct lnode *pCounter = NULL;
     pCounter = (struct lnode*)malloc(sizeof(struct lnode));
     pCounter->word = (char*)malloc(strlen(str)+1);
     strcpy(pCounter->word, str);
     pCounter->freq = 1;
     pCounter->next = NULL;
     return pCounter;
  }
//===========================================================================
void addWord(char *str)
{
  struct lnode *pCounter = NULL;
  struct lnode *pLast = NULL;

  if(start == NULL)
  {
    start = createWordCounter(str);
    return;
  }
  // If the word is in the list, increment its count 
  pCounter = start;
  int temp = pCounter->freq;
  while(pCounter != NULL)
  {
    if(strcmp(str, pCounter->word) == 0)
    {
      pCounter->freq++;
      return;
    }
    pLast = pCounter;            
    pCounter = pCounter->next;  
  }

  // Word is not in the list, add it 
  pLast->next = createWordCounter(str);
}
//===========================================================================
int getNextWord(char *buf, int bufsize) {
    char *p = buf;
    char ch;
    do {
        ch = getchar();
        if (ch == '\n') 
            return 0;
        } while (!((ch >= 'A' && ch <= 'Z')||( ch >= 'a' && ch <= 'z')));
    do {
        if (p - buf < bufsize - 1){
             if( ch >= 97 && ch <= 122)//making the ch lowercase if needed
                   *p++ = ch;
             else{ch += 32;
                  *p++ = ch;}
              }//End of if
        ch = getchar();
        } while (((ch >= 'A' && ch <= 'Z')||( ch >= 'a' && ch <= 'z')));
        *p = '\0';
        return 1;
        }
//===========================================================================
void show(struct lnode *pWord)
{
printf("%s %i\n", pWord->word, pWord->freq);
}
//===========================================================================

int main(){
    struct lnode *counter = NULL;
    int size = 1000;
    char buf[MAXWORD];
    while(getNextWord(buf, size) != 0 ){
        addWord(buf);
    }

    counter = start;

    while(counter != NULL)
    {
        show(counter);
        counter = counter->next;
    }

    counter = start;

    while(counter != NULL)
    {
    free(counter->word);
    start = counter;
    counter = counter->next;
    free(start);
    }

return 0;
}

这是我第一次发帖,如果我做错了什么请告诉我。任何帮助表示赞赏。

谢谢。

4

2 回答 2

1

仔细看这个,它分配ch了两次*p

     if( ch >= 97 && ch <= 122)//making the ch lowercase if needed
           *p++ = ch;
     else{ch += 32;}
       *p++ = ch;

我认为 else 语句上的尾随“}”放错了位置。

     if( ch >= 97 && ch <= 122) { //making the ch lowercase if needed
           *p++ = ch;
     } else {
       ch += 32;
       *p++ = ch;
     }

此外,如果您了解函数 isalpha, islower, isupper, tolower, ,您的代码的可读性会大大提高toupperman ctype获取信息。

于 2013-10-08T21:17:15.703 回答
0
Would it be better to use EOF or the null terminator '\0' ?

使用 EOF,因为如果您按下ctl+D getchar()将输入视为EOF. 您还可以使用\nOR 逻辑与\n,EOF

what is wrong with my output it doubles the letters up every time ?

在函数中查看下面的代码 getNextWord()

   if( ch >= 97 && ch <= 122)//making the ch lowercase if needed
           *p++ = ch;   
      //this checks if input character is lowercase character, then store it into buffer
     else{ch += 32;} // if input character is lowercase character, won't execute else part
          *p++ = ch;   
          // now again you are copying same input character into buffer next location 

修改以上部分。

于 2013-10-08T21:25:33.977 回答