-1

您好,有人可以帮我找出导致问题的原因吗?出于某种原因, find_hash 函数给我带来了问题。它应该失败if(table -> buckets_array[i] != NULL){if(table -> buckets_array[i] != '\0'){但事实并非如此,它会进行下一次检查,这给了我一个分段错误。自从我最初将它设置为 table -> buckets_array[i] = NULL 以来,是什么导致前 2 个 if 语句通过?

编辑:感谢wildplasser我为我的 find_hash 函数提出了一个更好的解决方案。

if(table->buckets_array[table->hash_func(key)] == NULL){
    return NULL;
  }else{
    return table -> buckets_array[table->hash_func(key)] -> data;
  }

谢谢大家的帮助。

/*hash.h*/
#include<stdio.h>
#include<stdlib.h>

typedef struct data_{
  char *key;
  void *data;
  struct data_ *next;
}data_el;

typedef struct hash_table_ {
  void **order;
  int *number_next_calls;
  int *number_buckets;
  int *buckets_size;
  int *worst;
  int *total;
  float *average;
  int (*hash_func)(char *);
  int (*comp_func)(void*, void*);
  data_el **buckets_array;
} hash_table, *Phash_table;

Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);

static void lower_case_word(char *w);

/*hash.c*/
#include"hash.h"

Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *)){
  int i;

  Phash_table table_p;
  hash_table hash_table;

  table_p = (Phash_table)malloc(sizeof(hash_table));

  /*Setting function pointers*/
  table_p->hash_func = hash_func;
  table_p->comp_func = cmp_func;

  /*Create buckets array*/
  table_p->buckets_array = (data_el **)malloc(sizeof(data_el *)*(size+1));
  table_p->buckets_size = (int *)malloc(sizeof(int)*(size+1));

  /*Setting order array*/
  table_p->order = NULL;

  /*Setting inital condictions*/
  table_p->worst = (int *)malloc(sizeof(int));
  table_p->total = (int *)malloc(sizeof(int));
  table_p->average = (float *)malloc(sizeof(float));
  table_p->number_buckets = (int *)malloc(sizeof(int));

  *(table_p->number_buckets) = size;

  for(i = 0; i < size; i++){
    table_p->buckets_array[i] = NULL;
  }
  return table_p;
}

void free_hash(Phash_table table){

  int i;
  i = 0;
  data_el *cur;
  data_el *prev;

  /*Free order array*/
  if(table->order != NULL){
    free(table->order);
  }

  /*Free Buckets array and buckets_size array*/
  if(table ->buckets_array != NULL){
    for(i=0; i < *(table->number_buckets); i++){
      if(table->buckets_array[i] != NULL){

    /*Travers the linked list and free it*/
    cur = table->buckets_array[i];
    prev = cur;

    while((cur = cur->next) != NULL){
      free(prev);
      prev = cur;
    }
    /*Free the last node*/
    free(cur);
    }
    }
  }
  free(table);
}

void insert_hash(Phash_table table, char *key, void *data){
  int index;
  data_el *p, *cur;

  index = table->hash_func(key);

  /*Head insertion*/
  if(table->buckets_array[index] == NULL){
    table->buckets_array[index] = (data_el *)malloc(sizeof(data_el));
    table->buckets_array[index]->data = data;
    table->buckets_array[index]->next =  NULL;
    table->buckets_array[index]->key = key;
  }else{
    cur = table->buckets_array[index];
    p = (data_el *)malloc(sizeof(data_el));
    p->key = key;
    p->data = data;
    p->next = cur;
    cur = p;
    table->buckets_array[index] = cur;
  }

  /*Update Total*/
  *table->total += 1;

  /*Update Bucket Size*/
  (table->buckets_size[index]) +=1;

  /*Updates Worst Case*/
  if((table->buckets_size[index]) > *(table->worst)){
    *(table->worst) = (table->buckets_size[index]);
  }else{
    *(table->worst) +=1;
  }

  /*Update Average*/
  int temp_total,temp_buckets;

  temp_total = *(table->total);
  temp_buckets = *(table->number_buckets);
  *(table->average)= (float)temp_total/(float)temp_buckets;
}

void *find_hash(Phash_table table, char *key){
  int i;
  i = 0;

  while(1){
    if(table->buckets_array[i] == '\0'){
      printf("End of Array");
      break;
    }
    if(table->buckets_array[i] != NULL){
      printf("%Checking");
      if(table->buckets_array[i] != '\0'){
    if(table->buckets_array[i]->key != NULL){
      if( strcmp(table->buckets_array[i]->key,key) == 0 ){
        printf("Found it\n");
        break;
      }
    }
      }
    }
    i++;
  }

  if(table->buckets_array[i] != NULL){
    printf("new asdasd %d", *((int *)table->buckets_array[i]->data));
    return table->buckets_array[i]->data;
  }else{
    return NULL;
  }
}

void stat_hash(Phash_table table, int *total, int *largest, float *average){
  total =  (table->total);
  largest = (table->worst);
  average =  (table->average);
}

void *start_hash_walk(Phash_table table){
  int i, max_buckets,step,next_calls;
  step = 0;
  data_el *cur;
  next_calls = 0;

  max_buckets = *(table ->number_buckets);
  table->order = (void**)malloc(sizeof(void *)*(*(table->total)));

  /*Set number_next_calls to 0*/
  table->number_next_calls = &next_calls;
  *(table->number_next_calls) = 0;

  /*Traverse the ADT and put all data into order array*/
  for(i = 0; i < max_buckets; i++){
    if(table->buckets_array[i] != NULL){
      cur = table->buckets_array[i];
      while(cur){
    table->order[step] = cur->data;
    step ++;
    cur = cur->next;
      }
    }
  }

  /*Bubble Short*/
  int j,k;

  for(j = 0; j < (step - 1);j++){
    for(k = 0;k < (step -(j+1));k ++){
      if((table->comp_func)(table->order[j],table->order[j+1]) < 5){
    void *temp;

    temp = table->order[j];
    table->order[j] = table->order[j+1];
    table->order[j+1] =  temp;
    printf("Swaping %s with %s\n",table->order[j],table->order[j+1]);
      }
    }
  }
  return table->order;
}

void *next_hash_walk(Phash_table table){

  /*
    Check the amount of times next_has_walk has been
    if higher than total, it will return null
  */

  if(*(table->number_next_calls) >= *(table->total)){
    return NULL;
  }else{
    *(table->number_next_calls) = *(table->number_next_calls) + 1;
    return table->order[*(table->number_next_calls)];
  }
}

/*project4.c*/

#include"hash.h"

#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0

int hash_function(char *word){

  int sum,i;
  i = 0;
  sum = 0;
  while(word[i] != '\0'){
    sum = sum + word[i];
    i++;
  }
  return sum%1000;
}

int main(void){

  /*Creating buckets array*/
  Phash_table dictionary;
  void *test;

  dictionary = new_hash(DICTIONARY_SIZE,hash_function,comp);

  int i;
  i = 0;
  void *frequency[DICTIONARY_SIZE];
  int char_index = 0, dictionary_size = 0, num_words = 0;
  char c, word[WORD_SIZE];

  printf("Parsing input ...\n");

  while ((c = getchar()) != EOF) {
    if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
    (c == ':') || (c == '\n')) {

      /* End of a word */
      if (char_index) {
    /* Word is not empty */
    word[char_index] = '\0';
    lower_case_word(word);
    if(!find_hash(dictionary,word) ){
      insert_hash(dictionary,word,frequency[hash_function(word)]);
    }
    frequency[hash_function(word)]++;
    char_index = 0;
    num_words++;
      }
    }else{
      word[char_index++] = c;
    }
  }

  printf("There were %d words; %d unique words.\n", num_words,dictionary_size);
}

void lower_case_word(char *w){
  int i = 0;

  while (w[i] != '\0') {
    w[i] = tolower(w[i]);
    i++;
  }
}
4

3 回答 3

2

您的find_hash()函数不返回值。

您省略了一些功能:

Phash_table new_hash(int size, int (*hash)(char *), int (*comp)(void *, void *));
void lower_case_word(char *word);
int hash_function(char *);
int comp_function(void *, void *);

您没有指定 DICTIONARY_SIZE 或 WORD_SIZE。

你还没有cmain().

您尚未定义int main(void)(或int main(int argc, char **argv))。返回类型是int,它需要 0 或 2 个参数。

您既没有定义也没有初始化char_index. 你没有定义word。您没有定义或初始化num_words.

通常,您应该向 SO 提交可编译的代码。缺少的功能和标准标题是一回事。如果您省略它们的定义,这是合法的(但有点麻烦)。

您可能需要考虑比较函数是否应该采用void const *(或const void *)参数而不仅仅是void *; 它将使其更普遍可用,例如与qsort()or一起使用bsearch()

我不清楚哈希函数是否会受益于 avoid *而不是 a char *; 它可能应该是一个char const *void const *而不是一个不合格的指针。

从风格上讲,不要在其中一个->.运算符周围放置空格。它们比其他任何东西都更紧密。通常,在二元运算符周围放置空格,例如/. 通常,在逗号后放置空格(但要更加一致)。


哈希.h

不要(通常——这是一种常见的情况)在头文件中声明静态函数。一个静态函数只需要一个文件;把它放在标题中是没有意义的。标头用于在源文件之间共享信息。如果某些东西不会被其他文件使用,那么声明它是没有意义的。

保护标头不被重新包含:

#ifndef HASH_H_INCLUDED
#define HASH_H_INCLUDED

...rest of header here...

#endif /* HASH_H_INCLUDED */

不要在标题中包含任何不需要使用标题的内容。“hash.h”中的任何材料都不需要,<stdlib.h>因此<stdio.h>它可能不应该存在。

#includeC 标准在和标题名称之间使用空格;对他们来说足够好的东西应该对你来说足够好。

项目4.c

您没有对编译器警告给予足够的关注,或者没有设置足够的编译器警告。如果您使用的是 GCC,请-Wall至少编译。当我在测试编译时,我使用了,例如:

gcc -O3 -g -std=c99 -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes -c project4.c

我希望我的代码能够在这组选项下干净地编译。

您需要#include <ctype.h>声明tolower().

函数comp()没有在任何地方定义。我补充说:

static int comp(void *v1, void *v2)
{
    char const *s1 = v1; 
    char const *s2 = v2; 
    return strcmp(s1, s2);
}

我还放static前面hash_function()以避免需要外部定义。

有一次编译时,我收到了警告:

project4.c:51: warning: comparison is always false due to limited range of data type

这说明了一个问题。您声明cchar,但随后写道:

while ((c = getchar()) != EOF)

这是一个禁忌。该函数getchar()返回一个int(是的,名字不好)。它可以返回的值集包括每个字符加上一个不同的值 EOF。您必须使用 anint来获得可靠的行为。出现两个问题之一。要么你永远不会得到 EOF,因为赋值是一个(隐式)无符号类型,并且 -1 被映射到 0xFF,当 0xFF 提升为 anint时,它是正数,因此不等于 -1;或者您将有效字符(通常是 U+00FF、ÿ、带有分音符号的小拉丁字母 Y,俗称 y-变音符号)误解为 EOF。

您应该考虑使用类似的东西!isalnum()而不是条件:

 if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
     (c == ':') || (c == '\n'))

哈希.c

代码省略#include <string.h>

编译器说:

hash.c: In function ‘find_hash’:
hash.c:142: warning: too few arguments for format
hash.c: In function ‘start_hash_walk’:
hash.c:219: warning: format ‘%s’ expects type ‘char *’, but argument 2 has type ‘void *’
hash.c:219: warning: format ‘%s’ expects type ‘char *’, but argument 3 has type ‘void *’

行号应该用少许盐处理;我使用 Allman 样式布局,因此在格式化代码时添加了许多行。但是如果你的编译器没有给你这种建议,你应该寻找一个可以提供的编译器。

第 142 行包含:

printf("%Checking");

%是不需要的。但是,在打印换行符之前,您可能永远看不到输出,或者您使用它fflush(stdout);来强制输出数据。

printf("Checking\n");

作为一般规则,尤其是在调试期间,以换行符结束打印的消息。

第 219 行是:

 printf("Swaping %s with %s\n",table->order[j],table->order[j+1]);

它可能应该是:

 printf("Swapping %s with %s\n", (char *)table->order[j], (char *)table->order[j+1]);

因为order是一个void **。当然,你可能会更好char **


运营

修复了这些(次要)问题后,代码将编译并链接。在某些文本上运行时,输出如下所示:

Parsing input ...
End of ArrayEnd of ArrayEnd of ArrayEnd of ArrayEnd of ArrayEnd of Array

输出换行符!可能还值得打印出您刚刚处理的字符串,以及您是否添加了它或已经在数据中找到了它。

中的一个问题:您在使用数组之前main()没有对其进行初始化。frequency此外,数组存在的原因并不完全清楚。为什么频率信息没有正确保存在哈希表中?为什么频率信息是一个数组void *而不是,比如说,int

在一组样本数据上,程序将输入总结为:

There were 68 words; 0 unique words.

唯一词的数量最初令人担忧,但事实证明,您初始化dictionary_size为 0 并且从不更改它然后打印它,因此大部分输出是正确的。


我认为您需要为自己编写一个结构转储程序。这是一个函数,当给定一个(指向)结构的指针时,它会打印有关结构中数据的合理诊断信息。我使用一般形式:

void dump_xyzstructure(FILE *fp, char const *tag, xyzstructure const *data);

然后我可以写:

dump_xyzstructure(stdout, "After initialization", &xyz);

dump_xyzstructure(logfp, "At this point", &xyz);

等等。


您的哈希表非常复杂。但是,您在哈希表之外有调用哈希函数的代码,我不确定这是一个好主意。大多数情况下,它不应该需要。由于您的哈希表需要支持频率计数,因此哈希表应该为您记录频率;你不应该在主程序的辅助代码中这样做。

您可以通过简化结构来简化生活。您不必动态分配结构中的所有内容。特别是在 64 位机器上,这是对空间的巨大浪费。(好吧:这是对空间的微不足道的浪费,但也很浪费精力。)

 /*Setting inital condictions*/
 table_p->worst = (int *)malloc(sizeof(int));
 table_p->total = (int *)malloc(sizeof(int));
 table_p->average = (float *)malloc(sizeof(float));
 table_p->number_buckets = (int *)malloc(sizeof(int));

没有这些指针的数据结构会更紧凑,只使用结构中的一个元素。它也将简化释放代码(并且它简化了打印代码,因为它不必担心指针是否已被分配):

typedef struct hash_table
{
  void    **order;
  int       number_next_calls;
  int       number_buckets;
  int      *buckets_size;        // Number of entries in each bucket
  int       worst;
  int       total;
  float     average;
  int     (*hash_func)(char *); 
  int     (*comp_func)(void*, void*);
  data_el **buckets_array;
} hash_table, *Phash_table;

可以按需计算平均值;它并不保证在结构中占有一席之地,真的。丢失所有这些指针会大大简化代码。

例如,这个片段:

if (*(table->number_next_calls) >= *(table->total))
{
    return NULL;
}
else
{
    *(table->number_next_calls) = *(table->number_next_calls) + 1;
    return table->order[*(table->number_next_calls)];
}

变成:

if (table->number_next_calls >= table->total)
{
    return NULL;
}
else
{
    table->number_next_calls = table->number_next_calls + 1;
    return table->order[table->number_next_calls];
}

甚至:

if (table->number_next_calls >= table->total)
    return NULL;
else
    return table->order[++table->number_next_calls];

示例文本

这是我在 SO 评论中经常使用的东西:

欢迎来到 StackOverflow。请注意,在这里说“谢谢”的首选方式是投票赞成好的问题和有用的答案(一旦你有足够的声誉这样做),并接受对你提出的任何问题最有帮助的答案(这也给出了你的声誉小幅提升)。请参阅[FAQ](http://stackoverflow.com/faq)特别是[How do I ask questions here?](http://stackoverflow.com/faq#howtoask)

样本输出

显示的大部分数据来自dump_hash_table()函数;我需要看看什么有效,什么无效。它的大部分实际上都在工作——我不需要修复大部分算法(最坏情况的计算是错误的并且是固定的)。在解析输入时,我忽略了跟踪代码。

Hash Table: 0x10E700900 At end of input
 Order = 0x00000000
 Buckets = 0x7FD9A3800000
 Hash = 0x10E607A40
 Comp = 0x10E607AA0
 Next calls: 0
 Buckets: 1000
 Worst: 4
 Total: 74
 Average: 0.074000
   Bucket   3: 2
   Bucket  67: 1
   Bucket  97: 1
   Bucket  99: 2
   Bucket 105: 1
   Bucket 211: 2
   Bucket 213: 1
   Bucket 219: 2
   Bucket 220: 1
   Bucket 226: 1
   Bucket 227: 4
   Bucket 229: 1
   Bucket 307: 3
   Bucket 312: 3
   Bucket 317: 1
   Bucket 319: 4
   Bucket 321: 3
   Bucket 328: 1
   Bucket 334: 1
   Bucket 337: 1
   Bucket 349: 3
   Bucket 418: 3
   Bucket 420: 3
   Bucket 421: 1
   Bucket 425: 1
   Bucket 431: 1
   Bucket 433: 1
   Bucket 438: 1
   Bucket 448: 2
   Bucket 451: 1
   Bucket 463: 1
   Bucket 531: 1
   Bucket 537: 1
   Bucket 542: 1
   Bucket 551: 1
   Bucket 634: 2
   Bucket 646: 1
   Bucket 649: 2
   Bucket 651: 1
   Bucket 656: 1
   Bucket 663: 1
   Bucket 748: 1
   Bucket 752: 2
   Bucket 771: 1
   Bucket 880: 1
   Bucket 888: 1
   Bucket 942: 1
   Bucket 959: 1
 Unique words: 74
 Used Buckets: 48
There were 74 words; 0 unique words.

哈希.h

注意hash.h现在必须包含<stdio.h>才能声明dump_hash_table()。包括<stdlib.h>不是必需的。

/*hash.h*/

#ifndef HASH_H_INCLUDED
#define HASH_H_INCLUDED

#include <stdio.h>
#include <stdlib.h>

typedef struct data
{
  char *key;
  void *data;
  struct data *next;
} data_el;

typedef struct hash_table
{
  void    **order;
  int       number_next_calls;
  int       number_buckets;
  int      *buckets_size;       // Array of sizes of each bucket
  int       worst;
  int       total;
  float     average;
  int     (*hash_func)(char *);
  int     (*comp_func)(void*, void*);
  data_el **buckets_array;
} hash_table, *Phash_table;

Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);

void dump_hash_table(FILE *fp, char const *tag, hash_table const *table);

#endif /* HASH_H_INCLUDED */

哈希.c

/*hash.c*/
#include "hash.h"
#include <string.h>
#include <inttypes.h>

Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *))
{
    int i;

    Phash_table table_p;
    hash_table hash_table;

    table_p = (Phash_table)malloc(sizeof(hash_table));

    /*Setting function pointers*/
    table_p->hash_func = hash_func;
    table_p->comp_func = cmp_func;

    /*Create buckets array*/
    table_p->buckets_array = (data_el **)malloc(sizeof(data_el *)*(size+1));
    table_p->buckets_size = (int *)malloc(sizeof(int)*(size+1));

    /*Setting order array*/
    table_p->order = NULL;

    /*Setting inital conditions*/
    table_p->worst = 0;
    table_p->total = 0;
    table_p->average = 0.0;
    table_p->number_buckets = size;

    for (i = 0; i < size; i++)
    {
        table_p->buckets_array[i] = NULL;
    }
    return table_p;
}

void free_hash(Phash_table table)
{
    int i;
    i = 0;
    data_el *cur;
    data_el *prev;

    /*Free order array*/
    if (table->order != NULL)
    {
        free(table->order);
    }

    /*Free Buckets array and buckets_size array*/
    if (table->buckets_array != NULL)
    {
        for (i = 0; i < table->number_buckets; i++)
        {
            if (table->buckets_array[i] != NULL)
            {

                /*Travers the linked list and free it*/
                cur = table->buckets_array[i];
                prev = cur;

                while ((cur = cur->next) != NULL)
                {
                    free(prev);
                    prev = cur;
                }
                /*Free the last node*/
                free(cur);
            }
        }
    }
    free(table);
}

void insert_hash(Phash_table table, char *key, void *data)
{
    int index;
    data_el *p, *cur;

    printf("Inserting: <<%s>> (data: %d)\n", key, *(int *)data);

    index = table->hash_func(key);

    /*Head insertion*/
    if (table->buckets_array[index] == NULL)
    {
        table->buckets_array[index] = (data_el *)malloc(sizeof(data_el));
        table->buckets_array[index]->data = data;
        table->buckets_array[index]->next =  NULL;
        table->buckets_array[index]->key = key;
    }
    else
    {
        cur = table->buckets_array[index];
        p = (data_el *)malloc(sizeof(data_el));
        p->key = key;
        p->data = data;
        p->next = cur;
        cur = p;
        table->buckets_array[index] = cur;
    }

    /*Update Total*/
    table->total += 1;

    /*Update Bucket Size*/
    table->buckets_size[index] +=1;

    /*Updates Worst Case*/
    if (table->buckets_size[index] > table->worst)
    {
        table->worst = table->buckets_size[index];
    }

    /*Update Average*/
    table->average = (float)table->total / (float)table->number_buckets;
}

void *find_hash(Phash_table table, char *key)
{
    int i = 0;

    while (1)
    {
        if (table->buckets_array[i] == '\0')
        {
            printf("End of Array\n");
            break;
        }
        if (table->buckets_array[i] != NULL)
        {
            printf("Checking:\n");
            if (table->buckets_array[i] != '\0')
            {
                if (table->buckets_array[i]->key != NULL)
                {
                    if (strcmp(table->buckets_array[i]->key, key) == 0 )
                    {
                        printf("Found it\n");
                        break;
                    }
                }
            }
        }
        i++;
    }

    if (table->buckets_array[i] != NULL)
    {
        printf("New entry %d\n", *((int *)table->buckets_array[i]->data));
        return table->buckets_array[i]->data;
    }
    else
    {
        return NULL;
    }
}

void stat_hash(Phash_table table, int *total, int *largest, float *average)
{
    *total   = table->total;
    *largest = table->worst;
    *average = table->average;
}

void *start_hash_walk(Phash_table table)
{
    int i, max_buckets, step, next_calls;
    step = 0;
    data_el *cur;
    next_calls = 0;

    max_buckets = table ->number_buckets;
    table->order = (void**)malloc(sizeof(void *) * table->total);
    table->number_next_calls = 0;

    /*Traverse the ADT and put all data into order array*/
    for (i = 0; i < max_buckets; i++)
    {
        if (table->buckets_array[i] != NULL)
        {
            cur = table->buckets_array[i];
            while (cur)
            {
                table->order[step] = cur->data;
                step ++;
                cur = cur->next;
            }
        }
    }

    /*Bubble Short*/
    int j, k;

    for (j = 0; j < (step - 1);j++)
    {
        for (k = 0;k < (step -(j+1));k ++)
        {
            if ((table->comp_func)(table->order[j], table->order[j+1]) < 5)
            {
                void *temp;

                temp = table->order[j];
                table->order[j] = table->order[j+1];
                table->order[j+1] =  temp;
                printf("Swapping %s with %s\n", (char *)table->order[j], (char *)table->order[j+1]);
            }
        }
    }
    return table->order;
}

void *next_hash_walk(Phash_table table)
{
    /*
       Check the amount of times next_has_walk has been
       if higher than total, it will return null
     */

    if (table->number_next_calls >= table->total)
        return NULL;
    else
        return table->order[++table->number_next_calls];
}

void dump_hash_table(FILE *fp, char const *tag, hash_table const *table)
{
    fprintf(fp, "Hash Table: 0x%.8" PRIXPTR " %s\n", (uintptr_t)table, tag);
    if (table == 0)
        return;
    fprintf(fp, " Order = 0x%.8" PRIXPTR "\n", (uintptr_t)table->order);
    fprintf(fp, " Buckets = 0x%.8" PRIXPTR "\n", (uintptr_t)table->buckets_array);
    fprintf(fp, " Hash = 0x%.8" PRIXPTR "\n", (uintptr_t)table->hash_func);
    fprintf(fp, " Comp = 0x%.8" PRIXPTR "\n", (uintptr_t)table->comp_func);
    fprintf(fp, " Next calls: %d\n", table->number_next_calls);
    fprintf(fp, " Buckets: %d\n",    table->number_buckets);
    fprintf(fp, " Worst: %d\n",      table->worst);
    fprintf(fp, " Total: %d\n",      table->total);
    fprintf(fp, " Average: %f\n",    table->average);
    if (table->buckets_size != 0)
    {
        int unique_words = 0;
        int used_buckets = 0;
        for (int i = 0; i < table->number_buckets; i++)
        {
            unique_words += table->buckets_size[i];
            if (table->buckets_size[i] != 0)
            {
                used_buckets++;
                fprintf(fp, "   Bucket %3d: %d\n", i, table->buckets_size[i]);
            }
        }
        fprintf(fp, " Unique words: %d\n", unique_words);
        fprintf(fp, " Used Buckets: %d\n", used_buckets);
    }
}

项目4.c

/*project4.c*/

#include "hash.h"
#include <string.h>
#include <ctype.h>

#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0

static void lower_case_word(char *w);

static int hash_function(char *word)
{
    int sum, i;
    i = 0;
    sum = 0;
    while (word[i] != '\0')
    {
        sum = sum + word[i];
        i++;
    }
    return sum%1000;
}

static int comp(void *v1, void *v2)
{
    char const *s1 = v1;
    char const *s2 = v2;
    return strcmp(s1, s2);
}

int main(void)
{
    /*Creating buckets array*/
    Phash_table dictionary = new_hash(DICTIONARY_SIZE, hash_function, comp);
    int frequency[DICTIONARY_SIZE] = { 0 };
    int char_index = 0, dictionary_size = 0, num_words = 0;
    char word[WORD_SIZE];
    int c;

    printf("Parsing input ...\n");

    while ((c = getchar()) != EOF) 
    {
        if (isalnum(c))
            word[char_index++] = c;
        else
        {
            /* End of a word */
            if (char_index)
            {
                /* Word is not empty */
                word[char_index] = '\0';
                lower_case_word(word);
                printf("Got: %s\n", word);
                if (!find_hash(dictionary, word))
                {
                    insert_hash(dictionary, word, &frequency[hash_function(word)]);
                }
                frequency[hash_function(word)]++;
                char_index = 0;
                num_words++;
            }
        }
    }

    dump_hash_table(stdout, "At end of input", dictionary);

    printf("There were %d words; %d unique words.\n", num_words, dictionary_size);
}

static void lower_case_word(char *w)
{
    int i = 0;

    while (w[i] != '\0')
    {
        w[i] = tolower(w[i]);
        i++;
    }
}
于 2012-05-06T03:24:24.677 回答
1

这太复杂了。我会在代码中评论。

void *find_hash(Phash_table table, char *key){
  int i;
  i = 0;

  while(1){

for() 循环的好地方。

    if(table->buckets_array[i] == '\0'){

->buckets_array[] 是指向指向 data_el 的指针数组的指针 '\0' 是一个 int(字符常量在 C 中是一个 int)

      printf("End of Array");
      break;
    }
    if(table->buckets_array[i] != NULL){

您已经对此进行了测试并跳出了循环。条件始终为真。

      printf("%Checking");
      if(table->buckets_array[i] != '\0'){

无需第三次测试。

    if(table->buckets_array[i]->key != NULL){

啊!,现在我们正在获取节点的有效负载

      if( strcmp(table->buckets_array[i]->key,key) == 0 ){
        printf("Found it\n");
        break;
      }
    }
      }
    }

这里的大括号杂乱无章,我懒得阅读,或者数(4)或马赫他们。

    i++;
  }


  if(table->buckets_array[i] != NULL){

你似乎真的很喜欢这种情况。

    printf("new asdasd %d", *((int *)table->buckets_array[i]->data));
    return table->buckets_array[i]->data;

啊!有效载荷被交付。您可以将打印保留给呼叫者。

  }else{
    return NULL;
  }
}

精简版:

void *find_hash(Phash_table table, char *key){
      int i;
      unsigned i;

      for (i=0; i < *table_p->number_buckets; i++){
         data_el *cur;

         for (cur = table->buckets_array[i]; cur != NULL; cur = cur->next ) {
            if( strcmp(cur->key,key) ) continue;

            return cur->data; /* found it ! */
            }
         }
    return NULL;
}

另外:malloc int-size 对象(如 table_p->number_buckets)是不切实际的,更实际的是将定义更改为:

typedef struct hash_table_ {
  void **order;  /* This might be intended as a permutation array ? */
  unsigned number_next_calls;
  unsigned int number_buckets;

  unsigned int *buckets_size; /* I think this was intended as a histogram */
  unsigned int worst;
  unsigned int total;
  float average;
  int (*hash_func)(char *);
  int (*comp_func)(void*, void*);
  data_el **buckets_array;
} hash_table, *Phash_table;

将指针隐藏在 typedef 后面(如您的 Phash_table)通常被认为是不好的做法。

正如我昨天评论的那样,您的哈希函数很危险,因为它返回一个带符号的 int。它可能会生成负索引。(在碰撞方面,这也很糟糕,因为 "ABC"、"ACB"、"CAB" 都散列到相同的值。最好使用 K&R、FNV 或 Jenkins。)

于 2012-05-06T14:19:11.620 回答
0

也许您可以更正代码,使其成为您真正要测试的内容。也许包括所有必要的位,以便我们可以编译它(我喜欢)。

请注意,find_hash它没有返回值,但在调用它之前调用它insert_hash会给你带来麻烦。

编辑(现在显示完整代码)

comp调用中的函数new_hash- 因此无法编译。

你在用 malloc 做什么:

table_p->worst = (int *)malloc(sizeof(int));
table_p->total = (int *)malloc(sizeof(int));
table_p->average = (float *)malloc(sizeof(float));
table_p->number_buckets = (int *)malloc(sizeof(int));

malloc 的返回值无论如何都不应该强制转换,但是在这里您为 int 分配空间,然后告诉编译器您为int *. 这些字段真的需要是指针吗?

在您的 find_hash 中,循环应该是一个 for:

for (i=0; table->buckets_array[i] == ...; ++i) {
    etc
}

但是,正如 sarnold 所说,你为什么要table->buckets_array[i]同时针对NULL(0) 和'\0'(0) 进行测试。这是非常奇怪的东西。

我不会花太多时间调试它。相反,我建议对代码进行审查(或让同事审查它)并解决一些问题。其中有:

  • 弄清楚 table->buckets_array[i] 应该包含什么
  • 取消转换 malloc 返回
  • 不为整数数据分配空间
  • 格式化
  • 添加const到未更改的参数
  • 尽可能提取嵌套循环
  • 使用正确的循环
于 2012-05-06T02:59:14.310 回答