

 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
package jcudavectoradd;

 * @author Sanjula
 * JCuda - Java bindings for NVIDIA CUDA driver and runtime API
 * http://www.jcuda.org
 * Copyright 2011 Marco Hutter - http://www.jcuda.org
import static jcuda.driver.JCudaDriver.*;

import java.io.*;

import jcuda.*;
import jcuda.driver.*;

 * This is a sample class demonstrating how to use the JCuda driver
 * bindings to load and execute a CUDA vector addition kernel.
 * The sample reads a CUDA file, compiles it to a PTX file
 * using NVCC, loads the PTX file as a module and executes
 * the kernel function. <br />
public class JCudaVectorAdd
     * Entry point of this sample
     * @param args Not used
     * @throws IOException If an IO error occurs
    public static void main(String args[]) throws IOException
        // Enable exceptions and omit all subsequent error checks

        // Create the PTX file by calling the NVCC
        String ptxFileName = preparePtxFile("JCudaVectorAddKernel.cu");
        //String ptxFileName = "JCudaVectorAddKernel.ptx";
        // Initialize the driver and create a context for the first device.
        CUdevice device = new CUdevice();
        cuDeviceGet(device, 0);
        CUcontext context = new CUcontext();
        cuCtxCreate(context, 0, device);

        // Load the ptx file.
        CUmodule module = new CUmodule();
        cuModuleLoad(module, ptxFileName);

        // Obtain a function pointer to the "add" function.
        CUfunction function = new CUfunction();
        cuModuleGetFunction(function, module, "add");

        int numElements = 100000;

        // Allocate and fill the host input data
        float hostInputA[] = new float[numElements];
        float hostInputB[] = new float[numElements];
        for(int i = 0; i < numElements; i++)
            hostInputA[i] = (float)i;
            hostInputB[i] = (float)i;

        // Allocate the device input data, and copy the
        // host input data to the device
        CUdeviceptr deviceInputA = new CUdeviceptr();
        cuMemAlloc(deviceInputA, numElements * Sizeof.FLOAT);
        cuMemcpyHtoD(deviceInputA, Pointer.to(hostInputA),
            numElements * Sizeof.FLOAT);
        CUdeviceptr deviceInputB = new CUdeviceptr();
        cuMemAlloc(deviceInputB, numElements * Sizeof.FLOAT);
        cuMemcpyHtoD(deviceInputB, Pointer.to(hostInputB),
            numElements * Sizeof.FLOAT);

        // Allocate device output memory
        CUdeviceptr deviceOutput = new CUdeviceptr();
        cuMemAlloc(deviceOutput, numElements * Sizeof.FLOAT);

        // Set up the kernel parameters: A pointer to an array
        // of pointers which point to the actual values.
        Pointer kernelParameters = Pointer.to(
            Pointer.to(new int[]{numElements}),

        // Call the kernel function.
        int blockSizeX = 256;
        int gridSizeX = (int)Math.ceil((double)numElements / blockSizeX);
            gridSizeX,  1, 1,      // Grid dimension
            blockSizeX, 1, 1,      // Block dimension
            0, null,               // Shared memory size and stream
            kernelParameters, null // Kernel- and extra parameters

        // Allocate host output memory and copy the device output
        // to the host.
        float hostOutput[] = new float[numElements];
        cuMemcpyDtoH(Pointer.to(hostOutput), deviceOutput,
            numElements * Sizeof.FLOAT);

        // Verify the result
        boolean passed = true;
        for(int i = 0; i < numElements; i++)
            float expected = i+i;
            if (Math.abs(hostOutput[i] - expected) > 1e-5)
                    "At index "+i+ " found "+hostOutput[i]+
                    " but expected "+expected);
                passed = false;
        System.out.println("Test "+(passed?"PASSED":"FAILED"));

        // Clean up.

     * The extension of the given file name is replaced with "ptx".
     * If the file with the resulting name does not exist, it is
     * compiled from the given file using NVCC. The name of the
     * PTX file is returned.
     * @param cuFileName The name of the .CU file
     * @return The name of the PTX file
     * @throws IOException If an I/O error occurs
    private static String preparePtxFile(String cuFileName) throws IOException
        int endIndex = cuFileName.lastIndexOf('.');
        if (endIndex == -1)
            endIndex = cuFileName.length()-1;
        String ptxFileName = cuFileName.substring(0, endIndex+1)+"ptx";
        File ptxFile = new File(ptxFileName);
        if (ptxFile.exists())
            return ptxFileName;
        File cuFile = new File(cuFileName);
        if (!cuFile.exists())
            throw new IOException("Input file not found: "+cuFileName);
        String modelString = "-m"+System.getProperty("sun.arch.data.model");
        String command =
            "nvcc " + modelString + " -ptx "+
            cuFile.getPath()+" -o "+ptxFileName;

        Process process = Runtime.getRuntime().exec(command);

        String errorMessage =
            new String(toByteArray(process.getErrorStream()));
        String outputMessage =
            new String(toByteArray(process.getInputStream()));
        int exitValue = 0;
            exitValue = process.waitFor();
        catch (InterruptedException e)
            throw new IOException(
                "Interrupted while waiting for nvcc output", e);

        if (exitValue != 0)
            System.out.println("nvcc process exitValue "+exitValue);
            throw new IOException(
                "Could not create .ptx file: "+errorMessage);

        System.out.println("Finished creating PTX file");
        return ptxFileName;

     * Fully reads the given InputStream and returns it as a byte array
     * @param inputStream The input stream to read
     * @return The byte array containing the data from the input stream
     * @throws IOException If an I/O error occurs
    private static byte[] toByteArray(InputStream inputStream)
        throws IOException
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        byte buffer[] = new byte[8192];
        while (true)
            int read = inputStream.read(buffer);
            if (read == -1)
            baos.write(buffer, 0, read);
        return baos.toByteArray();


extern "C"
__global__ void add(int n, float *a, float *b, float *sum)
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    if (i<n)
        sum[i] = a[i] + b[i];


当我编译这段代码时,我得到了这个错误。我正在使用 NetBeans 8.2 并安装了 Cuda。它在 Visual Studio 2015 中完美运行。但它不适用于java。


1 回答 1


我在环境变量中添加了 Visual Studio cl.exe 路径

C:\Program Files\Microsoft Visual Studio 10.0\VC\bin 转到我的电脑 -> 属性 -> 高级系统设置 -> 环境变量。在这里在列表中查找“PATH”,然后添加上面的路径(或任何 cl.exe 的位置)。

于 2017-07-05T15:47:27.847 回答