所以我有一个任务是在 c 中创建一个读取几个句子(一个 140mb 文件)的程序,并且基于第二个输入,它是一个数字,我需要返回第 N 个最常见的单词。我的想法是建立一个带有线性探测的哈希表,每次我得到一个新元素时,我都会根据它的位置和 djb2 对它进行相应的哈希处理,否则如果发生冲突,我会重新哈希处理。之后,我根据出现应用快速排序,然后最终按索引访问。我在用 c 中的线性探测完成哈希表时遇到问题。我很确定我已经完成了它,但每次我运行时,我都会在 lldb 上遇到堆缓冲区溢出。我试图发现这个问题,但我仍然无法弄清楚。


我认为我没有触及分配区域之外的内存,但我不是 100% 确定。

知道出了什么问题吗?这是 table.c 实现,您可以在下面看到结构的形式。


thread #1: tid = 0x148b44, 0x0000000100166b20 libclang_rt.asan_osx_dynamic.dylib`__asan::AsanDie(), queue = 'com.apple.main-thread', stop reason = Heap buffer overflow

 "access_size": 1,
 "access_type": 1,
 "address": 105690555220216,
 "description": "heap-buffer-overflow",
 "instrumentation_class": "AddressSanitizer",
 "pc": 4294981434,
 "stop_type": "fatal_error"


#include "table.h"
#include "entities.h"

static inline entry_t* entryInit(const char* const value){

    unsigned int len   = strlen(value);
    entry_t* entry     = malloc(sizeof(entry));
    entry->value       = malloc(sizeof(char*) * len);
    strncpy(entry->value, value, strlen(value));
    entry->exists      = 1;
    entry->occurence   = 1;

    return entry;

table_t* tableInit(const unsigned int size){

    table_t* table     = malloc(sizeof(table_t));
    table->entries     = malloc(size*sizeof(entry_t));
    table->seed        = getPrime();
    table->size        = size;
    table->usedEntries = 0U;

    return table;

//okay, there is definitely an issue here
table_t* tableResize(table_t* table, const unsigned int newSize){

    //most likely wont happen but if there is an overflow then we have a problem
    if(table->size > newSize) return NULL;

    //create a temp array of the realloced array, then do changes there
    entry_t* temp = calloc(newSize,sizeof(entry_t));

    table->size = newSize;

    //temp pointer to an entry
    entry_t *tptr = NULL;
    unsigned int pos = 0;
    unsigned int index = 0;

    while(pos != table->size){

        tptr = &table->entries[pos];

        if(tptr->exists == 1){

            index = hashString(table->seed, tptr->value, table->size, pos);

            temp[index] = *entryInit(tptr->value);

            temp[index].occurence = tptr->occurence;


        else pos++;

   table->entries = temp;
   //TODO: change table destroy to free the previous array from the table

   return table;

//insert works fine, it is efficient enough to add something in the table
unsigned int tableInsert(table_t* table,const char* const value){

    //decide when to resize, might create a large enough array to bloat the memory?
    if(table->usedEntries >(unsigned int)(2*(table->size/3))) table = tableResize(table, table->size*2);

    entry_t* entry = NULL;
    unsigned int index;
    auto int position = 0;

    while(position != table->size){

        //calculate the hash of our string as a function of the current position on the table
        index = hashString(table->seed,value,table->size, position);
        entry = &table->entries[index];

        if(entry->exists == 0){

            *entry = *entryInit(value);
            return index;

        } else if (entry->exists == 1 && strcmp(entry->value, value) == 0){

            return index;

        } else{

//there might be an issue here
static inline void tableDestroy(const table_t* const table){

    entry_t* entry = NULL;

    for (auto int i = 0; i < table->size; ++i){

        entry =&table->entries[i];

 //printf("Value: %s  Occurence: %d  Exists: %d \n",entry->value, entry->occurence, entry->exists );

       if(&table->entries[i] !=NULL)free(&table->entries[i]);


#pragma once

typedef struct __attribute__((packed)) __entry {

    char *value;
    unsigned int exists : 1;
    unsigned int occurence;

} entry_t;

typedef struct __table {

    int size;
    int usedEntries;
    entry_t *entries;
    unsigned int seed;

} table_t;


void readFromFile(const char* const fileName, table_t* table){

    FILE *fp = fopen(fileName, "r");

    if(!fp) fprintf(stderr,"error reading file. \n");

    char word[15];//long enough to hold the biggest word in the text?
    int position = 0;
    char ch;

    while((ch = fgetc(fp))!= EOF){

        //discard all the ascii chars that are not letters
        if(!(ch  >= 65 && ch <= 90) && !(ch >= 97 && ch <= 122)){

        word[position]= '\0';

        if(word[0] == NULL)continue;

        tableInsert(table, word);

        position = 0;


        else word[position++] = ch;




