我需要这方面的帮助。
我正在尝试实现 B-tree 插入,但由于我对这种算法感到困惑,所以我陷入了某些困境。根据作者 Cormen 的算法简介一书中的 B-tree 伪代码,在我看来,我正在编写正确的 B-tree 插入代码,但是当我在运行算法后检查磁盘中的 B-tree 时,数据是错误的。我不知道为什么我在这段代码中做错了。
算法简介页面: http: //staff.ustc.edu.cn/~csli/graduate/algorithms/book6/chap19.htm
我的 btree.c 文件:
#include "../include/btree.h"
#include <stdio.h>
struct BTreeNode
{
// leaf
char leaf;
// numberOfIndexedKeys
int numberOfIndexedKeys;
// RRNnode
int RRNnode;
// Set of keys
int key[4];
// Set of references to other file
ll dataRegReference[4];
// Subtree pointers
int pointers[5];
};
void BTreeWriteHeader(BTreeHeader_t *header, s_file_t *indexFile)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
fseek(indexFile->fp, 0, SEEK_SET);
fwrite(&header->status, sizeof(char), 1, indexFile->fp);
fwrite(&header->noderoot, sizeof(int), 1, indexFile->fp);
fwrite(&header->RRNnextNode, sizeof(int), 1, indexFile->fp);
fwrite(header->trash, sizeof(char), 68, indexFile->fp);
}
BTreeHeader_t *BTreeReadHeader(s_file_t *indexFile)
{
if (indexFile->fp == NULL || indexFile->fileConsistency == '0')
{
printf("Processing file failed.");
return NULL;
}
BTreeHeader_t *header = (BTreeHeader_t *)calloc(1, sizeof(BTreeHeader_t));
fseek(indexFile->fp, 0, SEEK_SET);
fread(&header->status, sizeof(char), 1, indexFile->fp);
fread(&header->noderoot, sizeof(int), 1, indexFile->fp);
fread(&header->RRNnextNode, sizeof(int), 1, indexFile->fp);
fread(header->trash, sizeof(char), 68, indexFile->fp);
return header;
}
void BTreeWriteNode(BTreeNode_t *WriteNode, s_file_t *indexFile)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
fseek(indexFile->fp, 77 * (WriteNode->RRNnode + 1), SEEK_SET);
fwrite(&WriteNode->leaf, sizeof(char), 1, indexFile->fp);
fwrite(&WriteNode->numberOfIndexedKeys, sizeof(int), 1, indexFile->fp);
fwrite(&WriteNode->RRNnode, sizeof(int), 1, indexFile->fp);
for (int i = 0; i < MAX_INDEXED_KEYS; i++)
{
fwrite(&WriteNode->pointers[i], sizeof(int), 1, indexFile->fp);
fwrite(&WriteNode->key[i], sizeof(int), 1, indexFile->fp);
fwrite(&WriteNode->dataRegReference[i], sizeof(ll), 1,
indexFile->fp);
}
fwrite(&WriteNode->pointers[MAX_INDEXED_KEYS], sizeof(int), 1, indexFile->fp);
}
BTreeNode_t *BTreeReadNode(s_file_t *indexFile, int RRNnode)
{
if (indexFile->fp == NULL || indexFile->fileConsistency == '0')
{
printf("Processing file failed.");
return NULL;
}
BTreeNode_t *ReadNode = (BTreeNode_t *)calloc(1, sizeof(BTreeNode_t));
fseek(indexFile->fp, 77 * (RRNnode + 1), SEEK_SET);
fread(&ReadNode->leaf, sizeof(char), 1, indexFile->fp);
fread(&ReadNode->numberOfIndexedKeys, sizeof(int), 1, indexFile->fp);
fread(&ReadNode->RRNnode, sizeof(int), 1, indexFile->fp);
for (int i = 0; i < MAX_INDEXED_KEYS; i++)
{
fread(&ReadNode->pointers[i], sizeof(int), 1, indexFile->fp);
fread(&ReadNode->key[i], sizeof(int), 1, indexFile->fp);
fread(&ReadNode->dataRegReference[i], sizeof(ll), 1, indexFile->fp);
}
fread(&ReadNode->pointers[MAX_INDEXED_KEYS], sizeof(int), 1, indexFile->fp);
return ReadNode;
}
// Creating B-Tree Header
void BTreeHeaderCreate(s_file_t *indexFile)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
BTreeHeader_t *header = (BTreeHeader_t *)calloc(1, sizeof(BTreeHeader_t));
header->RRNnextNode = 0;
header->noderoot = -1;
header->status = '0';
strcpy(
header->trash,
"@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@");
BTreeWriteHeader(header, indexFile);
}
// INSERTION
// =========================================================================
void cleanNode(BTreeNode_t *node)
{
for (int i = 0; i < MAX_INDEXED_KEYS; i++)
{
node->key[i] = node->dataRegReference[i] = node->pointers[i] = -1;
}
node->pointers[MAX_INDEXED_KEYS] = -1;
node->numberOfIndexedKeys = 0;
}
BTreeNode_t *createNode(int RRNnode, char leaf)
{
BTreeNode_t *newNode = (BTreeNode_t *)calloc(1, sizeof(BTreeNode_t));
newNode->RRNnode = RRNnode;
newNode->leaf = leaf;
newNode->numberOfIndexedKeys = 0;
cleanNode(newNode);
return newNode;
}
void InsertInNode(BTreeNode_t *root, s_file_t *indexFile, int key, ll dataRegReference, int pointerLeft, int pointerRight)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
if (root->numberOfIndexedKeys > MAX_INDEXED_KEYS)
{
printf("Aqui!");
}
/*
for(int i = 0; i < root->numberOfIndexedKeys; i++){
if(key.key == root->chaves[i].key) return;
}
*/
int numberOfIndexedKeysOfNode = root->numberOfIndexedKeys - 1;
while (numberOfIndexedKeysOfNode >= 0 && key < root->key[numberOfIndexedKeysOfNode])
{
// Shifting keys and dataRegReference
root->key[numberOfIndexedKeysOfNode + 1] = root->key[numberOfIndexedKeysOfNode];
root->dataRegReference[numberOfIndexedKeysOfNode + 1] = root->dataRegReference[numberOfIndexedKeysOfNode];
// Shifting pointers
root->pointers[numberOfIndexedKeysOfNode + 2] = root->pointers[numberOfIndexedKeysOfNode + 1];
numberOfIndexedKeysOfNode--;
}
numberOfIndexedKeysOfNode++;
root->pointers[numberOfIndexedKeysOfNode + 1] = root->pointers[numberOfIndexedKeysOfNode];
root->key[numberOfIndexedKeysOfNode] = key;
root->dataRegReference[numberOfIndexedKeysOfNode] = dataRegReference;
root->pointers[numberOfIndexedKeysOfNode] = pointerLeft;
root->pointers[numberOfIndexedKeysOfNode + 1] = pointerRight;
root->numberOfIndexedKeys++;
BTreeWriteNode(root, indexFile);
}
BTreeNode_t *SearchNode(s_file_t *indexFile, int RRNnode, int key)
{
if (indexFile->fp == NULL || indexFile->fileConsistency == '0')
{
printf("Processing file failed.");
return NULL;
}
BTreeNode_t *searchNode = BTreeReadNode(indexFile, RRNnode);
int pos = 0;
while (key > searchNode->key[pos] && pos < searchNode->numberOfIndexedKeys)
pos++;
if (key == searchNode->key[pos])
return searchNode;
if (searchNode->leaf == '1')
return NULL;
return SearchNode(indexFile, searchNode->pointers[pos], key);
}
void SplitChild(BTreeHeader_t *header, s_file_t *indexFile, BTreeNode_t *root, BTreeNode_t *child)
{
int keys[MAX_INDEXED_KEYS];
ll dataRegReference[MAX_INDEXED_KEYS];
int pointers[MAX_INDEXED_KEYS + 1];
for (int i = 0; i < MAX_INDEXED_KEYS; i++)
{
keys[i] = child->key[i];
dataRegReference[i] = child->dataRegReference[i];
pointers[i] = child->pointers[i];
}
pointers[MAX_INDEXED_KEYS] = child->pointers[MAX_INDEXED_KEYS];
cleanNode(child);
BTreeNode_t *newNode = createNode(header->RRNnextNode, child->leaf);
header->RRNnextNode++;
InsertInNode(child, indexFile, keys[0], dataRegReference[0], pointers[0], pointers[1]);
InsertInNode(child, indexFile, keys[1], dataRegReference[1], pointers[1], pointers[2]);
pointers[2] = child->RRNnode;
InsertInNode(root, indexFile, keys[2], dataRegReference[2], pointers[2], newNode->RRNnode);
InsertInNode(newNode, indexFile, keys[3], dataRegReference[3], pointers[3], pointers[4]);
}
void InsertNonFull(BTreeHeader_t *header, BTreeNode_t *root, s_file_t *indexFile, int key, ll dataRegReference, int pointerLeft, int pointerRight)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
int numberOfIndexedKeysOfNode = root->numberOfIndexedKeys - 1;
if (root->leaf == '1')
{
InsertInNode(root, indexFile, key, dataRegReference, pointerLeft, pointerRight);
}
else
{
while (numberOfIndexedKeysOfNode >= 0 && key < root->key[numberOfIndexedKeysOfNode])
numberOfIndexedKeysOfNode--;
numberOfIndexedKeysOfNode++;
BTreeNode_t *childNode = NULL;
childNode = BTreeReadNode(indexFile, root->pointers[numberOfIndexedKeysOfNode]);
if (childNode->numberOfIndexedKeys == MAX_INDEXED_KEYS)
{
SplitChild(header, indexFile, root, childNode);
}
if (key > root->key[numberOfIndexedKeysOfNode])
numberOfIndexedKeysOfNode++;
InsertNonFull(header, childNode, indexFile, key, dataRegReference, pointerLeft, pointerRight);
}
}
void BtreeInsertion(s_file_t *indexFile, int codLinha, ll offsetRegistro,
char removido)
{
if (indexFile->fp == NULL)
{
printf("Processing file failed.");
return;
}
if (removido == '0')
return;
BTreeHeader_t *header = BTreeReadHeader(indexFile);
BTreeNode_t *root = BTreeReadNode(indexFile, header->noderoot);
if (root->numberOfIndexedKeys == MAX_INDEXED_KEYS)
{
BTreeNode_t *newRoot = createNode(header->RRNnextNode + 1, '0');
SplitChild(header, indexFile, newRoot, root);
header->RRNnextNode++;
header->noderoot = newRoot->RRNnode;
InsertNonFull(header, newRoot, indexFile, codLinha, offsetRegistro, -1, -1);
}
else
InsertNonFull(header, root, indexFile, codLinha, offsetRegistro, -1, -1);
BTreeWriteHeader(header, indexFile);
}
void BtreeCreate(s_file_t *indexFile)
{
BTreeHeaderCreate(indexFile);
BTreeHeader_t *header = BTreeReadHeader(indexFile);
BTreeNode_t *newRoot = createNode(header->RRNnextNode, '1');
header->RRNnextNode++;
header->noderoot = newRoot->RRNnode;
BTreeWriteHeader(header, indexFile);
BTreeWriteNode(newRoot, indexFile);
free(header);
free(newRoot);
}
// ==========================================================================
// B-TREE TRAVERSAL
// ===========================================================================
void InOrderTraversal(int RRNroot, s_file_t *indexFile)
{
if (RRNroot == -1)
return;
BTreeNode_t *root = BTreeReadNode(indexFile, RRNroot);
//printf("node: %d ", root->RRNnode);
for (int i = 0; i < root->numberOfIndexedKeys; i++)
{
InOrderTraversal(root->pointers[i], indexFile);
//printf("(%d, %lld)", root->chaves[i].key, root->chaves[i].dataRegReference);
}
InOrderTraversal(root->pointers[root->numberOfIndexedKeys], indexFile);
}
我的 btree.h 文件:
#ifndef BTREE_H_
#define BTREE_H_
#include "funcoes-gerais.h"
#include "linha.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_INDEXED_KEYS 4
typedef struct BTreeNode BTreeNode_t;
typedef struct BTreeHeader BTreeHeader_t;
typedef long long int ll;
struct BTreeHeader {
char status;
int noderoot;
int RRNnextNode;
// This guy is not important, ignore this
char trash[69];
};
void BTreeHeaderCreate(s_file_t *indexFile);
void BtreeInsertion(s_file_t *indexFile, int codLinha, ll offsetRegistro,
char removido);
void BtreeCreate(s_file_t *indexFile);
void InOrderTraversal(int RRNroot, s_file_t *indexFile);
BTreeHeader_t *BTreeReadHeader(s_file_t *indexFile);
void BTreeWriteHeader(BTreeHeader_t *header, s_file_t *indexFile);
#endif
在这个 B 树中,顺序是 5。那么,我在同一个节点中最多可以得到 4 个键和 5 个指针。我的疑问之一是 splitChild 算法。如果我根据本书的算法准确地获得节点中的最大密钥,我将如何拆分节点并提升密钥?如果我给每个节点多一个空间来插入键,并且仅当它在节点中具有最大 + 1 个键时才拆分,则算法将仅在下一次插入时拆分。这让我变得一团糟。