2

http://www.thesalmons.org/john/random123/releases/1.00/docs/index.html

我很难看到 opencl 和 random123 的示例,因为我是 OpenCL 的新手,我不确定在使用 Visual Studio 2010 时如何使用提供的信息。

任何可以编写使用上述库生成随机数并使用 Visual Studio 2010 的指南的人。

更新: 我解决了如下问题,现在想知道如何更改种子,以便在每次运行时获得随机数。

int main(int argc, char **argv)
{       
    const char *kernelname = "counthits";
    unsigned count =10000;

    cl_int              err;
    cl::Context         cl_context;
    cl::Program         program;
    cl::Kernel          cl_kernel;
    cl::Buffer          cl_out;
    cl::CommandQueue    cl_queue;

    size_t i, nthreads, hits_sz;
    size_t cores, work_group_size;
    cl_uint2 *          hits_host;

    double              d = 0.; // timer

    d = timer(&d);
    progname = argv[0];

    std::vector< cl::Platform > platformList;   
    CHECK(cl::Platform::get(&platformList));        
    CHECKERR(  cl_context = createCLContext(CL_DEVICE_TYPE_GPU,cl_vendor::VENDOR_AMD, &err) );

    std::vector<cl::Device> devices;
    CHECKERR( devices = cl_context.getInfo<CL_CONTEXT_DEVICES>(&err) );


    size_t length = 0;
    const char * sourceStr = loadFileToString("pi_opencl_kernel.ocl","",&length);

    cl::Program::Sources sources(1, std::make_pair(sourceStr, length));
    program = cl::Program(cl_context, sources);

    CHECK( program.build(devices,"-I D:\\libs\\Random123\\1.06\\include") );

    CHECKERR(work_group_size = devices[0].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(&err) );
    CHECKERR(cores = devices[0].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(&err) );
    cores *= 16*4; //Tahiti.

    if (work_group_size > 64) work_group_size /= 2;
    nthreads = cores * work_group_size*32; //2048*128 = 262144

    if (count == 0)
    count = NTRIES/nthreads; //38

    printf("Count: %lu\n",count);



    hits_sz = nthreads * sizeof(hits_host[0]);//2097152
    CHECKNOTZERO(hits_host = (cl_uint2 *)malloc(hits_sz));

    CHECKERR    ( cl_out = cl::Buffer(  cl_context,  CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, hits_sz, hits_host, &err));
    CHECKERR    ( cl_kernel = cl::Kernel(program,kernelname,&err) );
    CHECK       ( cl_kernel.setArg( 0, count) );
    CHECK       ( cl_kernel.setArg( 1, cl_out) );

    CHECKERR (cl_queue = cl::CommandQueue(cl_context, devices[0], 0, &err) );
    cl::Event event;

    CHECK( cl_queue.enqueueNDRangeKernel(cl_kernel,cl::NullRange,cl::NDRange(nthreads), cl::NDRange(work_group_size), NULL,  &event) );
    event.wait();
    CHECK( cl_queue.enqueueReadBuffer(cl_out, CL_TRUE, 0,hits_sz, hits_host) );

    unsigned long hits = 0, tries = 0;
    for (i = 0; i < nthreads; i++) {
#ifdef _DEBUG   
        printf("%lu %u %u\n", (unsigned long)i, hits_host[i].s[0], hits_host[i].s[1]);
#endif
    hits += hits_host[i].s[0];
    tries += hits_host[i].s[1];
    }


    return pi_check(hits, tries);
}

核心:

#include <Random123/threefry.h>

/*
 * counthits generates 2*n x,y points and returns hits[tid] with
 * the count of number of those points within the unit circle on
 * each thread.
 */
__kernel void counthits(unsigned n, __global uint2 *hitsp) {
    unsigned tid = get_global_id(0);
    unsigned hits = 0, tries = 0;
    threefry4x32_key_t k = {{tid, 0xdecafbad, 0xfacebead, 0x12345678}};
    threefry4x32_ctr_t c = {{0, 0xf00dcafe, 0xdeadbeef, 0xbeeff00d}};
    while (tries < n) {
        union {
            threefry4x32_ctr_t c;
            int4 i;
        } u;
        c.v[0]++;
        u.c = threefry4x32(c, k);
        long x1 = u.i.x, y1 = u.i.y;
        long x2 = u.i.z, y2 = u.i.w;
        if ((x1*x1 + y1*y1) < (1L<<62)) {
            hits++;
        }
        tries++;
        if ((x2*x2 + y2*y2) < (1L<<62)) {
            hits++;
        }
        tries++;
    }
    hitsp[tid].x = hits;
    hitsp[tid].y = tries;
}
4

1 回答 1

0

我没有对此进行测试,但粗略地说,类似于以下内容:

  1. 尝试将 counthits 的签名更改为:

_kernel void counthits(unsigned n, __global uint2 *hitsp, unsigned seed)

  1. 用种子替换 0xdecafbad

  2. 添加

char *seedstr = getenv("COUNTHITS_SEED");

无符号种子 = seedstr ?atoi(seedstr) : 0xdecafbad;

...

检查(cl_kernel.setArg(2,种子));

到主程序(这个 setArg 在 setArg(1, ...) 之后,你可以,of)。

于 2016-03-04T01:03:09.680 回答