c - 我的跳过列表是否真的在 N 而不是 log(N) 中搜索？

Question

所以我最终认为我完成了一个工作跳过列表的创建，并认为我应该检查我的工作并研究搜索功能的时间安排。我找到了一个关于如何使用clock()和实现它的教程：

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "skiplist.h"

int main() {
    // initialize
    SkipList *list = initSkipList();
    // numbers to search for, array to store time
    int n[] = {1, 10, 50, 100, 1000, 5000, 10000, 25000, 50000, 100000, 200000};
    double time[11];

    // insert
    for (int i = 1; i <= 200000; i++)
        insertElement(list, i);

    // search, time, print
    for (int i = 0; i < 11; i++) {
        clock_t t;
        t = clock();
        findElement(list, n[i]);
        t = clock() - t;
        double time_taken = ((double)t)/CLOCKS_PER_SEC; // in seconds
        ti[i] = time_taken;
        printf("fun() took %f seconds to execute \n", time_taken);
    }
    return 0;
}

我想通过这样做并绘制 N 与时间的关系，我会得到一个看起来对数的图形，但我的图形看起来是线性的：

我是否对如何安排我的函数计时以尝试测试时间复杂性有误解，或者我的跳过列表只是没有按应有的方式搜索？这是我的整个跳过列表实现以防万一，但调用了搜索功能findElement()：

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>

#ifndef SKIPLIST_H_
#define SKIPLIST_H_


// number of lists
#define MAXLEVEL 5 

// node with pointers
typedef struct Node {
    int data;
    struct Node *next[1]; // or C99 using flexible array members: *next[]
} Node;


// skiplist
typedef struct SkipList {
    Node *header;
    int level;
    int count;
} SkipList;


// initialize skip list
SkipList* initSkipList() {  
    int i;
    SkipList *list = calloc(1, sizeof(SkipList));
    if (!list) {
        printf("Memory Error\n");
        exit(1);
    }
    //list->level = 0;  
    if ((list->header = calloc(1, sizeof(Node) + MAXLEVEL*sizeof(Node*))) == 0) {
        printf("Memory Error\n");
        exit(1);
    }   
    for (i = 0; i <= MAXLEVEL; i++)
        list->header->next[i] = list->header;   // or = list->header?
    printf("Skip list initialized.\n");
    //srand(time(NULL));
    return list;
}

// insert into skip list, return pointer to node
Node* insertElement(SkipList *list,int data) {
    int i, newLevel;
    Node* temp = list->header;
    Node *update[MAXLEVEL+1];

    // find where data belongs
    for (i = list->level; i >= 0; i--) {
        while(temp->next[i] != list->header && temp->next[i]->data < data)
            temp = temp->next[i];
        update[i] = temp;
    }
    temp = temp->next[0];
    // if element already exists, just return it (no duplicates)
    if (temp != list->header && temp->data == data)
        return temp;

    // determine level (coin flips till failure or max level reached)
    for (newLevel = 0; (rand() < RAND_MAX/2) && (newLevel < MAXLEVEL); newLevel++); // Pr(4) == Pr(5)??
    if (newLevel > list->level) {
        for (i = list->level + 1; i <= newLevel; i++)
            update[i] =  list->header;
        list->level = newLevel;
    }

    // make new  node
    if ((temp = calloc(1, sizeof(Node) + newLevel*sizeof(Node*))) == 0) {
        printf("Memory Error\n");
        exit(1);
    }
    temp->data = data;
    list->count++;
    // update next links
    for (i = 0; i <= newLevel; i++) {
        temp->next[i] = update[i]->next[i];
        update[i]->next[i] = temp;
    }

    //printf("Element %d inserted into list. (level %d)\n", data, newLevel);
    return temp;
}


// delete node containing data
void deleteElement(SkipList *list, int data) {
    int i;
    Node *update[MAXLEVEL+1], *temp;
    temp = list->header;
    for (i = list->level; i >= 0; i--) {
        while (temp->next[i] != list->header && temp->next[i]->data < data)
            temp = temp->next[i];
        update[i] = temp;
    }
    // move to (possible) node to delete
    temp = temp->next[0];
    // if doesn't exist
    if (temp == list->header || temp->data != data) {
        printf("Element %d doesn't exist.\n", data);    
        return;
    }
    // adjust next pointers
    for (i = 0; i <= list->level; i++) {
        if (update[i]->next[i] != temp) break;
        update[i]->next[i] = temp->next[i];
    }
    free (temp);
    printf("Element %d deleted from list.\n", data);
    list->count--;
    // adjust header level
    while ((list->level > 0) && (list->header->next[list->level] == list->header)) // double check
        list->level--;
}


// find node containing data
Node* findElement(SkipList *list, int data){
    int i;
    Node *temp = list->header;
    for (i = list->level; i >= 0; i--) {
        while (temp->next[i] != list->header && temp->next[i]->data < data)
            temp = temp->next[i];
    }
    temp = temp->next[0];
    if (temp != list->header && temp->data == data) {
        printf("Element %d found and returned.\n", data);
        return (temp);
    }
    printf("Element %d not found.\n", data);
    return 0;
}


/* Dynamic array for use in printSkipList() function */
typedef struct {
    int *array;
    size_t used;
    size_t size;
} Array;
void initArray(Array *a, size_t initialSize) {
    a->array = malloc(initialSize * sizeof(int));
    a->used = 0;
    a->size = initialSize;
}
void insertArray(Array *a, int element) {
    if (a->used == a->size) {
        a->size *= 2;
        a->array = realloc(a->array, a->size * sizeof(int));
    }
    a->array[a->used++] = element;
}
void freeArray(Array *a) {
    free(a->array);
    a->array = NULL;
    a->used = a->size = 0;
}


// print skip-list info and representational figure
void printSkipList(SkipList *list) {
    int i, j, k, pos = 0, prevPos = 0, diff, numDigits;
    Node* temp = list->header;
    Array a;

    // fill dynamic array with level 0 elements
    initArray(&a, 10);
    while (temp->next[0] != list->header) {
        temp = temp->next[0];
        insertArray(&a, temp->data);
    }
    temp = list->header;
    // print number of levels
    printf("\nNumber of levels in skip-list: %d\n", list->level + 1);
    printf("Number of elements in skip-list: %d\n", list->count);
    printf("Skip-list figure: \n");
    // print data
    for (i = list->level; i >= 0; i--) {
        pos = 0, prevPos = 0;
        while (temp->next[i] != list->header) {
        numDigits = 0;      
            temp = temp->next[i];
            while (temp->data != a.array[pos]) {
                numDigits += floor (log10 (abs (a.array[pos]))) + 1;
                pos++;
            }
            pos++;
            diff = (pos - prevPos) - 1; 
            if (diff >= 1) {
                for (j = 0; j < (4*diff)+numDigits; j++) 
                        printf(" ");    
            }           
            printf("%d -> ", temp->data);
            prevPos = pos;
        }
        temp = list->header;
        printf("\n");       
    }
    printf("\n\n");
}

#endif // SKIPLIST_H_

非常感谢任何建议，谢谢。

score 2 · Accepted Answer

你的 MAXLEVEL 太小了。我认为原始论文的级别最高为 lg(n)，即列表大小的 log base 2。

来自 Puch 1990 年的原始跳过列表论文：

确定 MaxLevel

由于我们可以安全地将级别限制在 L(n)，我们应该选择 MaxLevel = L(N)（其中 N 是跳过列表中元素数量的上限）。如果 p = l/2，使用 MaxLevel = 16 适用于包含多达 2^16 个元素的数据结构

一般来说，如果 p 是 1/X，则使用列表大小的 log base X。

MAXLEVEL=5，我得到的结果和你看到的差不多。

evaitl@bb ~/se $ ./foo 
Skip list initialized.
Element 1 found and returned.
fun() took 0.000014 seconds to execute 
Element 10 found and returned.
fun() took 0.000002 seconds to execute 
Element 50 found and returned.
fun() took 0.000002 seconds to execute 
Element 100 found and returned.
fun() took 0.000002 seconds to execute 
Element 1000 found and returned.
fun() took 0.000003 seconds to execute 
Element 5000 found and returned.
fun() took 0.000004 seconds to execute 
Element 10000 found and returned.
fun() took 0.000006 seconds to execute 
Element 25000 found and returned.
fun() took 0.000011 seconds to execute 
Element 50000 found and returned.
fun() took 0.000021 seconds to execute 
Element 100000 found and returned.
fun() took 0.000044 seconds to execute 
Element 200000 found and returned.
fun() took 0.000087 seconds to execute

将 MAXLEVEL 提高到 20，我得到：

evaitl@bb ~/se $ ./foo 
Skip list initialized.
Element 1 found and returned.
fun() took 0.000016 seconds to execute 
Element 10 found and returned.
fun() took 0.000003 seconds to execute 
Element 50 found and returned.
fun() took 0.000003 seconds to execute 
Element 100 found and returned.
fun() took 0.000002 seconds to execute 
Element 1000 found and returned.
fun() took 0.000002 seconds to execute 
Element 5000 found and returned.
fun() took 0.000003 seconds to execute 
Element 10000 found and returned.
fun() took 0.000004 seconds to execute 
Element 25000 found and returned.
fun() took 0.000003 seconds to execute 
Element 50000 found and returned.
fun() took 0.000004 seconds to execute 
Element 100000 found and returned.
fun() took 0.000003 seconds to execute 
Element 200000 found and returned.
fun() took 0.000004 seconds to execute

将 400000 和 800000 添加到您的 n[]：

evaitl@bb ~/se $ ./foo 
Skip list initialized.
Element 1 found and returned.
fun() took 0.000016 seconds to execute 
Element 10 found and returned.
fun() took 0.000001 seconds to execute 
Element 50 found and returned.
fun() took 0.000001 seconds to execute 
Element 100 found and returned.
fun() took 0.000002 seconds to execute 
Element 1000 found and returned.
fun() took 0.000002 seconds to execute 
Element 5000 found and returned.
fun() took 0.000002 seconds to execute 
Element 10000 found and returned.
fun() took 0.000002 seconds to execute 
Element 25000 found and returned.
fun() took 0.000004 seconds to execute 
Element 50000 found and returned.
fun() took 0.000003 seconds to execute 
Element 100000 found and returned.
fun() took 0.000003 seconds to execute 
Element 200000 found and returned.
fun() took 0.000003 seconds to execute 
Element 400000 found and returned.
fun() took 0.000004 seconds to execute 
Element 800000 found and returned.
fun() took 0.000003 seconds to execute

score 1 · Accepted Answer

您的计时方法非常规。通常，要检查算法性能，您

取几次尝试的平均值。
时间几个不同大小容器的性能。

在伪代码中：

For N in [2..9]:
  Fill container with 10^N items
  lookupTime = 0;
  generate M values in range
  for i in [1..M]:
    lookupTime += duration(lookup(value[i]))
  performance[N] = lookupTime/M;

c - 我的跳过列表是否真的在 N 而不是 log(N) 中搜索？

2 回答 2

Related

Reference