我正在尝试编写一个交叉引用器,它打印一个单词列表和它们出现的行号,忽略诸如 a、an、the 等“噪音”单词。(Kernighan 和 Ritchie,ANSI 版 p 143 问题6-3)。以下是代码:
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"getch.h"
#include"search_tools.h" /*this contains the binsearch function*/
#include"getword.h"
#define MAXWORD 100
char* noise[] = {"a","and","if","is","the"}; /*noise words that we need to exclude*/
int linecount = 1;
struct tnode {
int line_number;
char* word;
struct tnode* left; /*for all those words lexicographically less than word*/
struct tnode* middle; /*for all those words lexicographically equal to word*/
struct tnode* right; /*for all those words lexicographically greater than word*/
};
/*this function tells you where you should put your new word*/
struct tnode* addtree (struct tnode* p, char* w) {
int cond;
if (p == NULL) {
p = (struct tnode*)malloc(sizeof(struct tnode));
p -> word = strdup(w);
p -> line_number = linecount;
p -> left = p -> right = p -> middle = NULL;
}
else if ((cond = strcmp(w,p->word)) == 0) {
p -> middle = addtree(p -> middle,w);
}
else if (cond > 0) {
p -> right = addtree(p -> right,w);
}
else if (cond < 0) {
p -> left = addtree(p -> left, w);
}
else {
;
}
return p;
}
void treeprint (struct tnode* p) {
struct tnode* q;
if (p != NULL) {
treeprint(p->left);
printf("%s occurs in the following lines:\n",p -> word);
for (q = p; q != NULL; q = q->middle)
printf("%4d ",q -> line_number);
printf("\n\n");
treeprint(p->right);
}
}
int main (int argc, char* argv[]) {
struct tnode* root;
char word[MAXWORD];
root = NULL;
int c;
while ((c = getword(word,MAXWORD)) != EOF) {
if (isalpha(word[0]) && binsearch(word,noise,5) == -1)
root = addtree(root,word);
else if (c == '\n')
linecount++;
}
treeprint(root);
return 0;
}
这是我使用的 getword 函数:
int getword (char* word, int lim) {
int c;
char* w;
w = word;
while (isspace(c = getch())) /*skip white spaces*/
;
if (c != EOF)
*w++ = c;
if (!isalpha(c)) { /*the first character in a word can be a #, as in #define or #include*/
*w = '\0';
return c;
}
while(isalnum(c = getch()) && --lim > 0)
*w++ = c;
ungetch(c);
*w = '\0';
return word[0]; /*signal that a word has been collected*/
}
以下是用于搜索 char 指针的 binsearch:
int binsearch (char * word, struct key tab[], int n) {
int cond;
int low, high, mid;
low = 0;
high = n -1;
while (low <= high) {
mid = (low+high)/2;
if ((cond = strcmp(word,tab[mid].word)) < 0)
high = mid - 1;
else if (cond > 0)
low = mid + 1;
else /*found it*/
return mid;
}
return -1;
}
如果 getword 遇到非字母(单词的第一个字符)或非数字字符,它应该返回它。因此,我将 lincecount 设置为在 getword 返回“\n”时递增。但这似乎不会发生。