c - 赛格。哈希表 ADT 中的故障 - C

Question

编辑：

Hash.c 已根据评论的修订进行了更新，但我仍然遇到 Seg 错误。我一定在这里遗漏了你们所说的一些东西

我使用 C 创建了一个哈希表 ADT，但是当我尝试在 ADT 中调用函数 (find_hash) 时遇到分段错误。

我已经发布了我创建的 parse.c、hash.c 和 hash.h 的所有 3 个文件，因此您可以看到所有变量。我们正在从文件 gettysburg.txt 中读取，该文件也附在文件中

当我调用 find_hash 时，parse.c 中发生了段错误。我一生都无法弄清楚这里发生了什么。如果您需要更多信息，我当然可以提供。

很抱歉，我刚刚在这个问题上完全难倒了一个星期的大量代码。提前致谢

我运行程序的方式是先：gcc -o parse parse.c hash.c 然后：cat gettysburg.txt | 解析

解析.c

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "hash.h"

#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000

#define TRUE 1
#define FALSE 0


void lower_case_word(char *);
void dump_dictionary(Phash_table ); 

/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);

typedef struct user_data_ {   
    char word[WORD_SIZE];
    int freq_counter;
} user_data, *Puser_data;

int main(void)
{
   char c, word1[WORD_SIZE];
   int char_index = 0, dictionary_size = 0, num_words = 0, i;
   int total=0, largest=0;
   float average = 0.0;

   Phash_table t;                  //Pointer to main hash_table
   int (*Phash_func)(char *)=NULL;         //Function Pointers
   int (*Pcmp_func)(void *, void *)=NULL;
   Puser_data data_node;                   //pointer to hash table above
   user_data * find;


   printf("Parsing input ...\n");

   Phash_func = hash_func;   //Assigning Function pointers
   Pcmp_func = cmp_func;
   t = new_hash(1000,Phash_func,Pcmp_func);

  // Read in characters until end is reached 
  while ((c = getchar()) != EOF) {
    if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
        (c == ':') || (c == '\n')) {
          // End of a word 
      if (char_index) {
          // Word is not empty 
        word1[char_index] = '\0';
        lower_case_word(word1);

        data_node = (Puser_data)malloc(sizeof(user_data));  
        strcpy(data_node->word,word1);
        printf("%s\n", data_node->word);


    //!!!!!!SEG FAULT HERE!!!!!!

        if (!((user_data *)find_hash(t, data_node->word))){   //SEG FAULT!!!!
         insert_hash(t,word1,(void *)data_node); 
        }

        char_index = 0;
        num_words++;
      }
    } else {
      // Continue assembling word 
      word1[char_index++] = c;
    }
  }

  printf("There were %d words; %d unique words.\n", num_words,
     dictionary_size);
  dump_dictionary(t);  //???

  }

void lower_case_word(char *w){
  int i = 0;

  while (w[i] != '\0') {
    w[i] = tolower(w[i]);
    i++;
  }
}

void dump_dictionary(Phash_table t){  //???
  int i;
  user_data *cur, *cur2;

  stat_hash(t, &(t->total), &(t->largest), &(t->average));   //Call to stat hash
    printf("Number of unique words:  %d\n", t->total);
    printf("Largest Bucket:  %d\n", t->largest);
    printf("Average Bucket:  %f\n", t->average);  

  cur = start_hash_walk(t);
  printf("%s:  %d\n", cur->word, cur->freq_counter);

  for (i = 0; i < t->total; i++)
     cur2 = next_hash_walk(t);
     printf("%s:  %d\n", cur2->word, cur2->freq_counter);
}

int hash_func(char *string){
    int i, sum=0, temp, index;

    for(i=0; i < strlen(string);i++){
        sum += (int)string[i];  
    }
    index = sum % 1000;
return (index); 
}


/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){

user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;

    if(cur1->freq_counter < cur2->freq_counter){
        return(-1);}
        else{ if(cur1->freq_counter > cur2->freq_counter){
                return(1);}
                else return(0);}
}

哈希.c

#include "hash.h"

Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
    int i;
    Phash_table t;

    t = (Phash_table)malloc(sizeof(hash_table));   //creates the main hash table
    t->buckets = (hash_entry **)malloc(sizeof(hash_entry *)*size);  //creates the hash table of "size" buckets
    t->size = size;   //Holds the number of buckets
    t->hash_func = hash_func;   //assigning the pointer to the function in the user's program
    t->cmp_func = cmp_func;     // "  "  
    t->total=0;
    t->largest=0;
    t->average=0;
    t->sorted_array = NULL;
    t->index=0;
    t->sort_num=0;

    for(i=0;i<size;i++){   //Sets all buckets in hash table to NULL
        t->buckets[i] = NULL;}

    return(t);
}

void free_hash(Phash_table table){
    int i;
    hash_entry *cur;

    for(i = 0; i<(table->size);i++){
        if(table->buckets[i] != NULL){
            for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
                free(cur->key);  //Freeing memory for key and data
                free(cur->data);
            }
      free(table->buckets[i]);    //free the whole bucket
    }}
    free(table->sorted_array);
    free(table);
}

void insert_hash(Phash_table table, char *key, void *data){
    Phash_entry new_node;   //pointer to a new node of type hash_entry
    int index;

    new_node = (Phash_entry)malloc(sizeof(hash_entry));
    new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
    new_node->data = data;       //stores the user's data into the node
    strcpy(new_node->key,key);   //copies the key into the node

                                //calling the hash function in the user's program
    index = table->hash_func(key);    //index will hold the hash table value for where the new node will be placed
    table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
    table->total++;   //increment the total (total # of buckets)
}

void *find_hash(Phash_table table, char *key){
    int i;
    hash_entry *cur;
   printf("Inside find_hash\n"); //REMOVE

    for(i = 0;i<table->size;i++){
        if(table->buckets[i]!=NULL){            
            for(cur = table->buckets[i]; cur->next != NULL; cur = cur->next){
                if(strcmp(table->buckets[i]->key, key) == 0)
                return((table->buckets[i]->data));}  //returns the data to the user if the key values match
        }    //otherwise return NULL, if no match was found.
    }   
    return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){

    int node_num[table->size];  //creates an array, same size as table->size(# of buckets)
    int i,j, count = 0;
    int largest_buck = 0;
    hash_entry *cur;

    for(i = 0; i < table->size; i ++){
        if(table->buckets[i] != NULL){
            for(cur=table->buckets[i]; cur->next!=NULL; cur = cur->next){
                count ++;}
            node_num[i] = count;
            count = 0;}
        }

    for(j = 0; j < table->size; j ++){      
        if(node_num[j] > largest_buck)
            largest_buck = node_num[j];}

    *total = table->total;
    *largest = largest_buck;
    *average = (table->total) / (table->size);
}

void *start_hash_walk(Phash_table table){
    Phash_table temp = table;
    int i, j, k;
    hash_entry *cur;  //CHANGE IF NEEDED to HASH_TABLE *

    if(table->sorted_array != NULL) free(table->sorted_array);

    table->sorted_array = (void**)malloc(sizeof(void*)*(table->total));

    for(i = 0; i < table->total; i++){
        if(table->buckets[i]!=NULL){
            for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
                table->sorted_array[i] = table->buckets[i]->data;
        }}
    }

    for(j = (table->total) - 1; j > 0; j --)    {
        for(k = 1; k <= j; k ++){
            if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
                temp -> buckets[0]-> data = table->sorted_array[k-1];
                table->sorted_array[k-1] = table->sorted_array[k];
                table->sorted_array[k] = temp->buckets[0] -> data;
            }
        }
    }
    return table->sorted_array[table->sort_num];
}

void *next_hash_walk(Phash_table table){ 

    table->sort_num ++;
    return table->sorted_array[table->sort_num];
}

哈希.h

#include <stdio.h> 
#include <stdlib.h>
#include <string.h>

typedef struct hash_entry_ {    //Linked List
    void *data;                 //Generic pointer
    char *key;                  //String-based key value
    struct hash_entry_ *next;   //Self-Referencing pointer
} hash_entry, *Phash_entry;

typedef struct hash_table_ {
    hash_entry **buckets;           //Pointer to a pointer to a Linked List of type hash_entry
    int (*hash_func)(char *);
    int (*cmp_func)(void *, void *);
    int size;
    void **sorted_array;         //Array used to sort each hash entry
    int index;
    int total;
    int largest;
    float average;  
    int sort_num;
} hash_table, *Phash_table;


Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);

葛底斯堡.txt

Four score and seven years ago, our fathers brought forth upon this continent a new nation: conceived in liberty, and dedicated to the proposition that all men are created equal.

Now we are engaged in a great civil war. . .testing whether that nation, or any nation so conceived and so dedicated. . . can long endure. We are met on a great battlefield of that war.

We have come to dedicate a portion of that field as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.

But, in a larger sense, we cannot dedicate. . .we cannot consecrate. . . we cannot hallow this ground. The brave men, living and dead, who struggled here have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember, what we say here, but it can never forget what they did here.

It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us. . .that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion. . . that we here highly resolve that these dead shall not have died in vain. . . that this nation, under God, shall have a new birth of freedom. . . and that government of the people. . .by the people. . .for the people. . . shall not perish from the earth.

score 2 · Accepted Answer

此代码的几个问题之一可能是循环，例如：

for(table->buckets[i]; 
    table->buckets[i]->next != NULL; 
    table->buckets[i] = table->buckets[i]->next)
  ...

for 循环 ( ) 的初始化部分table->buckets[i]无效。如果i是 0 和table->buckets[0] == NULL，则此循环 ( table->buckets[i]->next != NULL) 上的条件将取消引用空指针并崩溃。

至少，这就是您的代码似乎在我的盒子上崩溃的地方。当我将您的几个循环更改为：

if (table->buckets[i] != NULL) {
  for(; 
      table->buckets[i]->next != NULL; 
      table->buckets[i] = table->buckets[i]->next)
    ...
}

...它一直在崩溃，但在不同的地方。也许这会帮助你摆脱困境？

编辑：另一个潜在的问题是那些 for 循环是破坏性的。当您调用find_hash时，您真的希望修改所有这些存储桶吗？

我建议使用类似的东西：

hash_entry *cur;
// ...
if (table->buckets[i] != NULL) {
  for (cur = table->buckets[i]; cur->next != NULL; cur = cur->next) {
    // ...
  }
}

当我这样做并注释掉你的dump_dictionary函数时，你的代码会运行而不会崩溃。

score 0 · Accepted Answer

唔，

这是哈希.c

#include "hash.h"

Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
    int i;
    Phash_table t;

    t = (Phash_table)calloc(1, sizeof(hash_table));   //creates the main hash table
    t->buckets = (hash_entry **)calloc(size, sizeof(hash_entry *));  //creates the hash table of "size" buckets
    t->size = size;   //Holds the number of buckets
    t->hash_func = hash_func;   //assigning the pointer to the function in the user's program
    t->cmp_func = cmp_func;     // "  "  
    t->total=0;
    t->largest=0;
    t->average=0;

    for(i=0;t->buckets[i] != NULL;i++){   //Sets all buckets in hash table to NULL
        t->buckets[i] = NULL;}

    return(t);
}

void free_hash(Phash_table table){
    int i;

    for(i = 0; i<(table->size);i++){
        if(table->buckets[i]!=NULL)
            for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                free(table->buckets[i]->key);  //Freeing memory for key and data
                free(table->buckets[i]->data);
            }
      free(table->buckets[i]);    //free the whole bucket
    }
    free(table->sorted_array);
    free(table);
}

void insert_hash(Phash_table table, char *key, void *data){
    Phash_entry new_node;   //pointer to a new node of type hash_entry
    int index;

    new_node = (Phash_entry)calloc(1,sizeof(hash_entry));
    new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
    new_node->data = data;       //stores the user's data into the node
    strcpy(new_node->key,key);   //copies the key into the node

                                //calling the hash function in the user's program
    index = table->hash_func(key);    //index will hold the hash table value for where the new node will be placed
    table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
    table->total++;   //increment the total (total # of buckets)
}

void *find_hash(Phash_table table, char *key){
    int i;
hash_entry *cur;

   printf("Inside find_hash\n"); //REMOVE

    for(i = 0;i<table->size;i++){
        if(table->buckets[i]!=NULL){
            for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
            //for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                if(strcmp(cur->key, key) == 0)
                   return((cur->data));}  //returns the data to the user if the key values match
        }    //otherwise return NULL, if no match was found.
    }   
    return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){

    int node_num[table->size];    
    int i,j, count = 0;
    int largest_buck = 0;
    hash_entry *cur;

    for(i = 0; i < table->size; i ++)
    {
        if(table->buckets[i]!=NULL)            
            for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
            //for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                count ++;}

        node_num[i] = count;
        count = 0;
    }

    for(j = 0; j < table->size; j ++){
        if(node_num[j] > largest_buck)
            largest_buck = node_num[j];}

    *total = table->total;
    *largest = largest_buck;
    *average = (table->total) /(float) (table->size); //oook: i think you want a fp average
}

void *start_hash_walk(Phash_table table){
    void* temp = 0; //oook: this was another way of overwriting your input table 
    int i, j, k; 
    int l=0; //oook: new counter for elements in your sorted_array
    hash_entry *cur;

    if(table->sorted_array !=NULL) free(table->sorted_array);

    table->sorted_array = (void**)calloc((table->total), sizeof(void*));

    for(i = 0; i < table->size; i ++){
    //for(i = 0; i < table->total; i++){  //oook: i don't think you meant total ;)
        if(table->buckets[i]!=NULL)
            for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
            //for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                table->sorted_array[l++] = cur->data;
            }
    }

    //oook: sanity check/assert on expected values
    if (l != table->total)
    {
        printf("oook: l[%d] != table->total[%d]\n",l,table->total);
    }

    for(j = (l) - 1; j > 0; j --)    {
        for(k = 1; k <= j; k ++){
            if (table->sorted_array[k-1] && table->sorted_array[k])
            {
                if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
                    temp = table->sorted_array[k-1]; //ook. changed temp to void* see assignment
                    table->sorted_array[k-1] = table->sorted_array[k];
                    table->sorted_array[k] = temp;
                }
            }
            else
                printf("if (table->sorted_array[k-1] && table->sorted_array[k])\n");
        }
    }
    return table->sorted_array[table->sort_num];
}

void *next_hash_walk(Phash_table table){ 

    /*oook: this was blowing up since you were incrementing past the size of sorted_array..
    NB: *you **need** to implement some bounds checking here or you will endup with more seg-faults!!*/
    //table->sort_num++
    return table->sorted_array[table->sort_num++];
}

这是 parse.c

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <assert.h>  //oook: added so you can assert ;)
#include "hash.h"

#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000

#define TRUE 1
#define FALSE 0


void lower_case_word(char *);
void dump_dictionary(Phash_table ); 

/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);

typedef struct user_data_ {   
    char word[WORD_SIZE];
    int freq_counter;
} user_data, *Puser_data;

int main(void)
{
   char c, word1[WORD_SIZE];
   int char_index = 0, dictionary_size = 0, num_words = 0, i;
   int total=0, largest=0;
   float average = 0.0;

   Phash_table t;                  //Pointer to main hash_table
   int (*Phash_func)(char *)=NULL;         //Function Pointers
   int (*Pcmp_func)(void *, void *)=NULL;
   Puser_data data_node;                   //pointer to hash table above
   user_data * find;


   printf("Parsing input ...\n");

   Phash_func = hash_func;   //Assigning Function pointers
   Pcmp_func = cmp_func;
   t = new_hash(1000,Phash_func,Pcmp_func);

  // Read in characters until end is reached 
  while ((c = getchar()) != EOF) {
    if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
        (c == ':') || (c == '\n')) {
          // End of a word 
      if (char_index) {
          // Word is not empty 
        word1[char_index] = '\0';
        lower_case_word(word1);

        data_node = (Puser_data)calloc(1,sizeof(user_data));  
        strcpy(data_node->word,word1);
        printf("%s\n", data_node->word);


    //!!!!!!SEG FAULT HERE!!!!!!

        if (!((user_data *)find_hash(t, data_node->word))){   //SEG FAULT!!!!
        dictionary_size++;
         insert_hash(t,word1,(void *)data_node); 
        }

        char_index = 0;
        num_words++;
      }
    } else {
      // Continue assembling word 
      word1[char_index++] = c;
    }
  }

  printf("There were %d words; %d unique words.\n", num_words,
     dictionary_size);
  dump_dictionary(t);  //???

  }

void lower_case_word(char *w){
  int i = 0;

  while (w[i] != '\0') {
    w[i] = tolower(w[i]);
    i++;
  }
}

void dump_dictionary(Phash_table t){  //???
  int i;
  user_data *cur, *cur2;

  stat_hash(t, &(t->total), &(t->largest), &(t->average));   //Call to stat hash
    printf("Number of unique words:  %d\n", t->total);
    printf("Largest Bucket:  %d\n", t->largest);
    printf("Average Bucket:  %f\n", t->average);  

  cur = start_hash_walk(t);
  if (!cur) //ook: do test or assert for null values
  {
    printf("oook: null== (cur = start_hash_walk)\n");
    exit(-1);
  }
  printf("%s:  %d\n", cur->word, cur->freq_counter);
  for (i = 0; i < t->total; i++)
  {//oook: i think you needed these braces
      cur2 = next_hash_walk(t);
      if (!cur2) //ook: do test or assert for null values
      {
        printf("oook: null== (cur2 = next_hash_walk(t) at i[%d])\n",i);
      }
      else
        printf("%s:  %d\n", cur2->word, cur2->freq_counter);
  }//oook: i think you needed these braces
}

int hash_func(char *string){
    int i, sum=0, temp, index;

    for(i=0; i < strlen(string);i++){
        sum += (int)string[i];  
    }
    index = sum % 1000;
return (index); 
}


/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){

user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;

    /* ooook: do assert on programmatic errors. 
    this function *requires non-null inputs.  */
    assert(cur1 && cur2);  
    if(cur1->freq_counter < cur2->freq_counter){
        return(-1);}
        else{ if(cur1->freq_counter > cur2->freq_counter){
                return(1);}
                else return(0);}
}

跟着//ooks

说明：

有一两个地方会爆炸。
快速修复和回答您的问题在parse.c，大约L100：

  cur = start_hash_walk(t);
  printf("%s:  %d\n", cur->word, cur->freq_counter);

..在调用之前检查cur是否可以修复您的直接段错误。nullprintf

但为什么会cur呢null？〜因为这个坏男孩：
void *start_hash_walk(Phash_table table)

你hash_func(char *string)可以（&确实）返回非唯一值。这当然没问题，只是你还没有实现你的链表链。因此，您最终会table->sorted_array包含少于table->total元素〜或者如果您遍历所有table->size存储桶，您会这样做；）

还有一两个其他问题。现在我进一步黑了 Nate Kohl for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next)，for(cur=table->buckets[i]; cur != NULL; cur=cur->next)因为你没有锁链。但这是*您的 TODO，对此已经说得够多了。

最后。请注意，next_hash_walk(Phash_table table)您有：

table->sort_num++
return table->sorted_array[table->sort_num];

哎哟! 请检查那些数组边界！

注释

1）如果您的功能不是为了更改输入而设计的，那么请输入const。这样，编译器很可能会在您无意中丢弃某些东西时告诉您。

2) 对数组索引进行边界检查。

3) 在尝试使用 Null 指针之前对其进行测试/断言。

4）对您的每个功能进行单元测试；在编译和测试之前不要写太多代码。

5) 使用最少的测试数据；精心制作它，以便对您的代码进行限制测试并尝试以狡猾的方式破坏它。

6）初始化你的数据结构！

7)切勿使用埃及牙套！{
只是开玩笑 ;)
}

PS 到目前为止做得很好〜>指针是棘手的小东西！& 一个包含所有必要细节的好问题，所以 +1 和 gl ;)

(//oook: 也许添加一个作业标签)

c - 赛格。哈希表 ADT 中的故障 - C

编辑：

解析.c

哈希.c

哈希.h

葛底斯堡.txt

2 回答 2

Related

Reference