我从这里遵循特征分解的例子, https://github.com/NVIDIA/CUDALibrarySamples/blob/master/cuSOLVER/syevd/cusolver_syevd_example.cu
我需要为 Hermatian 复数矩阵做这件事。问题是特征向量与 Matlab 结果的结果完全不匹配。
有谁知道为什么会发生这种不匹配?
我还尝试了 cusolverdn svd 方法来获取特征值和给出另一个结果的向量。
为了方便起见,我的代码在这里,
#include <cstdio>
#include <cstdlib>
#include <vector>
#include <cuda_runtime.h>
#include <cusolverDn.h>
#include "cusolver_utils.h"
int N = 16;
void BuildMatrix(cuComplex* input);
void main()
{
cusolverDnHandle_t cusolverH = NULL;
cudaStream_t stream = NULL;
printf("*******************\n");
cuComplex* h_idata = (cuComplex*)malloc(sizeof(cuComplex) * N);
cuComplex* h_eigenVector = (cuComplex*)malloc(sizeof(cuComplex) * N); // eigen vector
float* h_eigenValue = (float*)malloc(sizeof(float) * 4); // eigen Value
BuildMatrix(h_idata);
int count = 0;
for (int i = 0; i < N / 4; i++)
{
for (int j = 0; j < 4; j++)
{
printf("%f + %f\t", h_idata[count].x, h_idata[count].y);
count++;
}
printf("\n");
}
printf("\n*****************\n");
/* step 1: create cusolver handle, bind a stream */
CUSOLVER_CHECK(cusolverDnCreate(&cusolverH));
CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
CUSOLVER_CHECK(cusolverDnSetStream(cusolverH, stream));
// step 2: reserve memory in cuda and copy input data from host to device
cuComplex* d_idata;
float* d_eigenValue = nullptr;
int* d_info = nullptr;
CUDA_CHECK(cudaMalloc((void**)&d_idata, N * sizeof(cuComplex)));
CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_eigenValue), N * sizeof(float)));
CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_info), sizeof(int)));
CUDA_CHECK(cudaMemcpyAsync(d_idata, h_idata, N * sizeof(cuComplex), cudaMemcpyHostToDevice, stream));
// step 3: query working space of syevd
cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_VECTOR; // compute eigenvalues and eigenvectors.
cublasFillMode_t uplo = CUBLAS_FILL_MODE_LOWER;
int lwork = 0; /* size of workspace */
cuComplex* d_work = nullptr; /* device workspace*/
const int m = 4;
const int lda = m;
cusolverDnCheevd_bufferSize(cusolverH, jobz, uplo, m, d_idata, lda, d_eigenValue, &lwork);
CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&d_work), sizeof(cuComplex) * lwork));
// step 4: compute spectrum
cusolverDnCheevd(cusolverH, jobz, uplo, m, d_idata, lda, d_eigenValue, d_work, lwork, d_info);
CUDA_CHECK(
cudaMemcpyAsync(h_eigenVector, d_idata, N * sizeof(cuComplex), cudaMemcpyDeviceToHost, stream));
CUDA_CHECK(
cudaMemcpyAsync(h_eigenValue, d_eigenValue, 4 * sizeof(double), cudaMemcpyDeviceToHost, stream));
int info = 0;
CUDA_CHECK(cudaMemcpyAsync(&info, d_info, sizeof(int), cudaMemcpyDeviceToHost, stream));
CUDA_CHECK(cudaStreamSynchronize(stream));
std::printf("after syevd: info = %d\n", info);
if (0 > info)
{
std::printf("%d-th parameter is wrong \n", -info);
exit(1);
}
count = 0;
for (int i = 0; i < N / 4; i++)
{
for (int j = 0; j < 4; j++)
{
printf("%f + %f\t", h_eigenVector[count].x, h_eigenVector[count].y);
count++;
}
printf("\n");
}
printf("\n");
for (int i = 0; i < N / 4; i++)
{
std::cout << h_eigenValue[i] << std::endl;
}
printf("\n*****************\n");
/* free resources */
CUDA_CHECK(cudaFree(d_idata));
CUDA_CHECK(cudaFree(d_eigenValue));
CUDA_CHECK(cudaFree(d_info));
CUDA_CHECK(cudaFree(d_work));
CUSOLVER_CHECK(cusolverDnDestroy(cusolverH));
CUDA_CHECK(cudaStreamDestroy(stream));
CUDA_CHECK(cudaDeviceReset());
}
//0.5560 + 0.0000i - 0.4864 + 0.0548i 0.8592 + 0.2101i - 1.5374 - 0.2069i
//- 0.4864 - 0.0548i 0.4317 + 0.0000i - 0.7318 - 0.2698i 1.3255 + 0.3344i
//0.8592 - 0.2101i - 0.7318 + 0.2698i 1.4099 + 0.0000i - 2.4578 + 0.2609i
//- 1.5374 + 0.2069i 1.3255 - 0.3344i - 2.4578 - 0.2609i 4.3333 + 0.0000i
void BuildMatrix(cuComplex* input)
{
std::vector<float> realVector = { 0.5560, -0.4864, 0.8592, -1.5374, -0.4864, 0.4317, -0.7318, 1.3255,
0.8592, -0.7318, 1.4099, -2.4578, -1.5374, 1.3255, -2.4578, 4.3333 };
std::vector<float> imagVector = { 0, -0.0548, -0.2101, 0.2069, 0.0548, 0.0000, 0.2698, -0.3344,
0.2101, -0.2698, 0, -0.2609, -0.2069, 0.3344, 0.2609, 0 };
for (int i = 0; i < N; i++)
{
input[i].x = realVector.at(i) * std::pow(10, 11);
input[i].y = imagVector.at(i) * std::pow(10, 11);
}
}
我在他们的 git ( https://github.com/NVIDIA/CUDALibrarySamples/issues/58 ) 中提出了这个问题,但不幸的是没有人回答。
如果有人可以帮助我解决这个问题,那将非常有帮助。