前段时间我已经发了一个帖子,询问关于 LRU 缓存(在 C++ 中)的良好设计。您可以在那里找到问题、答案和一些代码:
我现在尝试对这段代码进行多线程处理(使用 pthread)并得到了一些非常出乎意料的结果。在尝试使用锁定之前,我已经创建了一个系统,其中每个线程都访问自己的缓存(参见代码)。我在 4 核处理器上运行此代码。我尝试用 1 个线程和 4 个线程运行它。当它在 1 个线程上运行时,我在缓存中执行 100 万次查找,在 4 个线程上,每个线程执行 250K 查找。我期望用 4 个线程来减少时间,但结果恰恰相反。1个线程运行2.2秒,4个线程运行超过6秒??我只是无法理解这个结果。
我的代码有问题吗?这可以以某种方式解释(线程管理需要时间)。如果能得到专家的反馈,那就太好了。非常感谢 -
我编译这段代码: c++ -o cache cache.cpp -std=c++0x -O3 -lpthread
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <errno.h>
#include <sys/time.h>
#include <list>
#include <cstdlib>
#include <cstdio>
#include <memory>
#include <list>
#include <unordered_map>
#include <stdint.h>
#include <iostream>
typedef uint32_t data_key_t;
using namespace std;
//using namespace std::tr1;
class TileData
{
public:
data_key_t theKey;
float *data;
static const uint32_t tileSize = 32;
static const uint32_t tileDataBlockSize;
TileData(const data_key_t &key) : theKey(key), data(NULL)
{
float *data = new float [tileSize * tileSize * tileSize];
}
~TileData()
{
/* std::cerr << "delete " << theKey << std::endl; */
if (data) delete [] data;
}
};
typedef shared_ptr<TileData> TileDataPtr; // automatic memory management!
TileDataPtr loadDataFromDisk(const data_key_t &theKey)
{
return shared_ptr<TileData>(new TileData(theKey));
}
class CacheLRU
{
public:
list<TileDataPtr> linkedList;
unordered_map<data_key_t, TileDataPtr> hashMap;
CacheLRU() : cacheHit(0), cacheMiss(0) {}
TileDataPtr getData(data_key_t theKey)
{
unordered_map<data_key_t, TileDataPtr>::const_iterator iter = hashMap.find(theKey);
if (iter != hashMap.end()) {
TileDataPtr ret = iter->second;
linkedList.remove(ret);
linkedList.push_front(ret);
++cacheHit;
return ret;
}
else {
++cacheMiss;
TileDataPtr ret = loadDataFromDisk(theKey);
linkedList.push_front(ret);
hashMap.insert(make_pair<data_key_t, TileDataPtr>(theKey, ret));
if (linkedList.size() > MAX_LRU_CACHE_SIZE) {
const TileDataPtr dropMe = linkedList.back();
hashMap.erase(dropMe->theKey);
linkedList.remove(dropMe);
}
return ret;
}
}
static const uint32_t MAX_LRU_CACHE_SIZE = 100;
uint32_t cacheMiss, cacheHit;
};
int numThreads = 1;
void *testCache(void *data)
{
struct timeval tv1, tv2;
// Measuring time before starting the threads...
double t = clock();
printf("Starting thread, lookups %d\n", (int)(1000000.f / numThreads));
CacheLRU *cache = new CacheLRU;
for (uint32_t i = 0; i < (int)(1000000.f / numThreads); ++i) {
int key = random() % 300;
TileDataPtr tileDataPtr = cache->getData(key);
}
std::cerr << "Time (sec): " << (clock() - t) / CLOCKS_PER_SEC << std::endl;
delete cache;
}
int main()
{
int i;
pthread_t thr[numThreads];
struct timeval tv1, tv2;
// Measuring time before starting the threads...
gettimeofday(&tv1, NULL);
#if 0
CacheLRU *c1 = new CacheLRU;
(*testCache)(c1);
#else
for (int i = 0; i < numThreads; ++i) {
pthread_create(&thr[i], NULL, testCache, (void*)NULL);
//pthread_detach(thr[i]);
}
for (int i = 0; i < numThreads; ++i) {
pthread_join(thr[i], NULL);
//pthread_detach(thr[i]);
}
#endif
// Measuring time after threads finished...
gettimeofday(&tv2, NULL);
if (tv1.tv_usec > tv2.tv_usec)
{
tv2.tv_sec--;
tv2.tv_usec += 1000000;
}
printf("Result - %ld.%ld\n", tv2.tv_sec - tv1.tv_sec,
tv2.tv_usec - tv1.tv_usec);
return 0;
}