我也是 jCUDA 和 java 的新手。我正在尝试在 Redhat Linux 上使用 eclipse 从 NVIDIA 示例中编译矢量添加程序。
我遵循的步骤: 1. 输入:nvcc -ptx JCudaVectorAddKernel.cu -> 它生成 JCudaVectorAddKernel.ptx 文件 2. 执行以下程序:JCudaVectorAdd.java:
包 JCudaVectorAdd;
import static jcuda.driver.JCudaDriver.*;
import java.io.*;
import jcuda.*;
import jcuda.driver.*;
public class JCudaVectorAdd
{
public static void main(String[] args) throws IOException
{
// Enable exceptions and omit all subsequent error checks
JCudaDriver.setExceptionsEnabled(true);
// Create the PTX file by calling the NVCC
String ptxFileName = preparePtxFile("JCudaVectorAddKernel.cu");
// Initialize the driver and create a context for the first device.
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);
// Load the ptx file.
CUmodule module = new CUmodule();
cuModuleLoad(module, ptxFileName);
// Obtain a function pointer to the "add" function.
CUfunction function = new CUfunction();
cuModuleGetFunction(function, module, "add");
int numElements = 100000;
// Allocate and fill the host input data
float hostInputA[] = new float[numElements];
float hostInputB[] = new float[numElements];
for(int i = 0; i < numElements; i++)
{
hostInputA[i] = (float)i;
hostInputB[i] = (float)i;
}
// Allocate the device input data, and copy the
// host input data to the device
CUdeviceptr deviceInputA = new CUdeviceptr();
cuMemAlloc(deviceInputA, numElements * Sizeof.FLOAT);
cuMemcpyHtoD(deviceInputA, Pointer.to(hostInputA),
numElements * Sizeof.FLOAT);
CUdeviceptr deviceInputB = new CUdeviceptr();
cuMemAlloc(deviceInputB, numElements * Sizeof.FLOAT);
cuMemcpyHtoD(deviceInputB, Pointer.to(hostInputB),
numElements * Sizeof.FLOAT);
// Allocate device output memory
CUdeviceptr deviceOutput = new CUdeviceptr();
cuMemAlloc(deviceOutput, numElements * Sizeof.FLOAT);
// Set up the kernel parameters: A pointer to an array
// of pointers which point to the actual values.
Pointer kernelParameters = Pointer.to(
Pointer.to(new int[]{numElements}),
Pointer.to(deviceInputA),
Pointer.to(deviceInputB),
Pointer.to(deviceOutput)
);
// Call the kernel function.
int blockSizeX = 256;
int gridSizeX = (int)Math.ceil((double)numElements / blockSizeX);
cuLaunchKernel(function,
gridSizeX, 1, 1, // Grid dimension
blockSizeX, 1, 1, // Block dimension
0, null, // Shared memory size and stream
kernelParameters, null // Kernel- and extra parameters
);
cuCtxSynchronize();
// Allocate host output memory and copy the device output
// to the host.
float hostOutput[] = new float[numElements];
cuMemcpyDtoH(Pointer.to(hostOutput), deviceOutput,
numElements * Sizeof.FLOAT);
// Verify the result
boolean passed = true;
for(int i = 0; i < numElements; i++)
{
float expected = i+i;
if (Math.abs(hostOutput[i] - expected) > 1e-5)
{
System.out.println(
"At index "+i+ " found "+hostOutput[i]+
" but expected "+expected);
passed = false;
break;
}
}
System.out.println("Test "+(passed?"PASSED":"FAILED"));
// Clean up.
cuMemFree(deviceInputA);
cuMemFree(deviceInputB);
cuMemFree(deviceOutput);
}
private static String preparePtxFile(String cuFileName) throws IOException
{
int endIndex = cuFileName.lastIndexOf('.');
if (endIndex == -1)
{
endIndex = cuFileName.length()-1;
}
String ptxFileName = cuFileName.substring(0, endIndex+1)+"ptx";
File ptxFile = new File(ptxFileName);
if (ptxFile.exists())
{
return ptxFileName;
}
File cuFile = new File(cuFileName);
if (!cuFile.exists())
{
throw new IOException("Input file not found: "+cuFileName);
}
String modelString = "-m"+System.getProperty("sun.arch.data.model");
String command =
"nvcc " + modelString + " -ptx "+
cuFile.getPath()+" -o "+ptxFileName;
System.out.println("Executing\n"+command);
Process process = Runtime.getRuntime().exec(command);
String errorMessage = new String(toByteArray(process.getErrorStream()));
String outputMessage= new String(toByteArray(process.getInputStream()));
int exitValue = 0;
try
{
exitValue = process.waitFor();
}
catch (InterruptedException e)
{
Thread.currentThread().interrupt();
throw new IOException(
"Interrupted while waiting for nvcc output", e);
}
if (exitValue != 0)
{
System.out.println("nvcc process exitValue "+exitValue);
System.out.println("errorMessage:\n"+errorMessage);
System.out.println("outputMessage:\n"+outputMessage);
throw new IOException(
"Could not create .ptx file: "+errorMessage);
}
System.out.println("Finished creating PTX file");
return ptxFileName;
}
private static byte[] toByteArray(InputStream inputStream) throws IOException
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte buffer[] = new byte[8192];
while (true)
{
int read = inputStream.read(buffer);
if (read == -1)
{
break;
}
baos.write(buffer, 0, read);
}
return baos.toByteArray();
}
}
JCudaVectorAddKernel.cu:
extern "C"
__global__ void add(int n, float *a, float *b, float *sum)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i<n)
{
sum[i] = a[i] + b[i];
}
}
'JCudaVectorAddKernel.cu' 和 'JCudaVectorAddKernel.java' 都在同一路径上:/home/sandeep/workspace1/jCuda/jCudaVectorAdd/src/jCudaVectorAdd
当我在eclipse中执行程序时,它给了我以下错误:
Exception in thread "main" java.io.IOException: Input file not found: JCudaVectorAddKernel.cu
at JCudaVectorAdd.JCudaVectorAdd.preparePtxFile(JCudaVectorAdd.java:128)
at JCudaVectorAdd.JCudaVectorAdd.main(JCudaVectorAdd.java:20)
有什么与编译命令相关的事情吗?或 ptx/.cu 文件路径?如果我走错方向,请指导我。