所以我有一个任务是在 c 中创建一个读取几个句子(一个 140mb 文件)的程序,并且基于第二个输入,它是一个数字,我需要返回第 N 个最常见的单词。我的想法是建立一个带有线性探测的哈希表,每次我得到一个新元素时,我都会根据它的位置和 djb2 对它进行相应的哈希处理,否则如果发生冲突,我会重新哈希处理。之后,我根据出现应用快速排序,然后最终按索引访问。我在用 c 中的线性探测完成哈希表时遇到问题。我很确定我已经完成了它,但每次我运行时,我都会在 lldb 上遇到堆缓冲区溢出。我试图发现这个问题,但我仍然无法弄清楚。
我的堆栈内存不足了吗?该文件相对较小以消耗如此多的内存。我使用了地址清理程序,并且在插入时出现了堆缓冲区溢出。
我认为我没有触及分配区域之外的内存,但我不是 100% 确定。
知道出了什么问题吗?这是 table.c 实现,您可以在下面看到结构的形式。
这是来自地址消毒器的更详细信息:
thread #1: tid = 0x148b44, 0x0000000100166b20 libclang_rt.asan_osx_dynamic.dylib`__asan::AsanDie(), queue = 'com.apple.main-thread', stop reason = Heap buffer overflow
{
"access_size": 1,
"access_type": 1,
"address": 105690555220216,
"description": "heap-buffer-overflow",
"instrumentation_class": "AddressSanitizer",
"pc": 4294981434,
"stop_type": "fatal_error"
}
表.c:
#include "table.h"
#include "entities.h"
static inline entry_t* entryInit(const char* const value){
unsigned int len = strlen(value);
entry_t* entry = malloc(sizeof(entry));
entry->value = malloc(sizeof(char*) * len);
strncpy(entry->value, value, strlen(value));
entry->exists = 1;
entry->occurence = 1;
return entry;
}
table_t* tableInit(const unsigned int size){
table_t* table = malloc(sizeof(table_t));
table->entries = malloc(size*sizeof(entry_t));
table->seed = getPrime();
table->size = size;
table->usedEntries = 0U;
return table;
}
//okay, there is definitely an issue here
table_t* tableResize(table_t* table, const unsigned int newSize){
//most likely wont happen but if there is an overflow then we have a problem
if(table->size > newSize) return NULL;
//create a temp array of the realloced array, then do changes there
entry_t* temp = calloc(newSize,sizeof(entry_t));
table->size = newSize;
//temp pointer to an entry
entry_t *tptr = NULL;
unsigned int pos = 0;
unsigned int index = 0;
while(pos != table->size){
tptr = &table->entries[pos];
if(tptr->exists == 1){
index = hashString(table->seed, tptr->value, table->size, pos);
temp[index] = *entryInit(tptr->value);
temp[index].occurence = tptr->occurence;
break;
}
else pos++;
}
table->entries = temp;
//TODO: change table destroy to free the previous array from the table
free(temp);
return table;
}
//insert works fine, it is efficient enough to add something in the table
unsigned int tableInsert(table_t* table,const char* const value){
//decide when to resize, might create a large enough array to bloat the memory?
if(table->usedEntries >(unsigned int)(2*(table->size/3))) table = tableResize(table, table->size*2);
entry_t* entry = NULL;
unsigned int index;
auto int position = 0;
while(position != table->size){
//calculate the hash of our string as a function of the current position on the table
index = hashString(table->seed,value,table->size, position);
entry = &table->entries[index];
if(entry->exists == 0){
*entry = *entryInit(value);
table->usedEntries++;
return index;
} else if (entry->exists == 1 && strcmp(entry->value, value) == 0){
entry->occurence++;
return index;
} else{
position++;
}
}
}
//there might be an issue here
static inline void tableDestroy(const table_t* const table){
entry_t* entry = NULL;
for (auto int i = 0; i < table->size; ++i){
entry =&table->entries[i];
//printf("Value: %s Occurence: %d Exists: %d \n",entry->value, entry->occurence, entry->exists );
if(&table->entries[i] !=NULL)free(&table->entries[i]);
}
free(table);
}
实体.h:
#pragma once
typedef struct __attribute__((packed)) __entry {
char *value;
unsigned int exists : 1;
unsigned int occurence;
} entry_t;
typedef struct __table {
int size;
int usedEntries;
entry_t *entries;
unsigned int seed;
} table_t;
这是我从文件中读取并处理文本的方式:
void readFromFile(const char* const fileName, table_t* table){
FILE *fp = fopen(fileName, "r");
if(!fp) fprintf(stderr,"error reading file. \n");
char word[15];//long enough to hold the biggest word in the text?
int position = 0;
char ch;
while((ch = fgetc(fp))!= EOF){
//discard all the ascii chars that are not letters
if(!(ch >= 65 && ch <= 90) && !(ch >= 97 && ch <= 122)){
word[position]= '\0';
if(word[0] == NULL)continue;
tableInsert(table, word);
position = 0;
continue;
}
else word[position++] = ch;
}
}
有什么建议我的代码有什么问题吗?我相信调整大小可能有问题,我还没有正确删除,因为我在内存管理方面遇到了很多问题。
提前致谢!