我有一些生成的 SPIR-V 代码,我想与 vulkan API 一起使用。但是我
Exception thrown at 0x00007FFB68D933CB (nvoglv64.dll) in vulkanCompute.exe: 0xC0000005: Access violation reading location 0x0000000000000008.
在尝试使用vkCreateComputePipelines
.
API 调用应该没问题,因为相同的代码适用于使用glslangValidator编译的着色器。因此,我假设生成的 SPIR-V 代码一定是格式错误的。
我已经使用khronos 的验证器工具检查了 SPIR-V 代码,使用spirv-val --target-env vulkan1.1 mainV.spv
它退出时没有错误。无论如何,也知道这个工具仍然不完整。
我还尝试使用Radeon GPU Analyzer来编译我的 SPIR-V 代码,该代码也可以在着色器游乐场在线获得,并且该工具会抛出Error: Error: internal error: Bil::BilInstructionConvert::Create(60) Code Not Tested!
实际上没有帮助的错误,但鼓励假设代码格式错误。
不幸的是,SPIR-V 代码很长时间无法在此处发布,但它位于着色器操场的链接中。
有谁知道我的设置有什么问题,或者知道如何以更好的方式验证我的 SPIR-V 代码,而无需手动检查所有 700 行代码。
我认为问题不存在,但无论如何这里是 c++ 主机代码:
#include "vulkan/vulkan.hpp"
#include <iostream>
#include <fstream>
#include <vector>
#define BAIL_ON_BAD_RESULT(result) \
if (VK_SUCCESS != (result)) \
{ \
fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); \
exit(-1); \
}
VkResult vkGetBestComputeQueueNPH(vk::PhysicalDevice &physicalDevice, uint32_t &queueFamilyIndex)
{
auto properties = physicalDevice.getQueueFamilyProperties();
int i = 0;
for (auto prop : properties)
{
vk::QueueFlags maskedFlags = (~(vk::QueueFlagBits::eTransfer | vk::QueueFlagBits::eSparseBinding) & prop.queueFlags);
if (!(vk::QueueFlagBits::eGraphics & maskedFlags) && (vk::QueueFlagBits::eCompute & maskedFlags))
{
queueFamilyIndex = i;
return VK_SUCCESS;
}
i++;
}
i = 0;
for (auto prop : properties)
{
vk::QueueFlags maskedFlags = (~(vk::QueueFlagBits::eTransfer | vk::QueueFlagBits::eSparseBinding) & prop.queueFlags);
if (vk::QueueFlagBits::eCompute & maskedFlags)
{
queueFamilyIndex = i;
return VK_SUCCESS;
}
i++;
}
return VK_ERROR_INITIALIZATION_FAILED;
}
int main(int argc, const char *const argv[])
{
(void)argc;
(void)argv;
try
{
// initialize the vk::ApplicationInfo structure
vk::ApplicationInfo applicationInfo("VecAdd", 1, "Vulkan.hpp", 1, VK_API_VERSION_1_1);
// initialize the vk::InstanceCreateInfo
std::vector<char *> layers = {
"VK_LAYER_LUNARG_api_dump",
"VK_LAYER_KHRONOS_validation"
};
vk::InstanceCreateInfo instanceCreateInfo({}, &applicationInfo, static_cast<uint32_t>(layers.size()), layers.data());
// create a UniqueInstance
vk::UniqueInstance instance = vk::createInstanceUnique(instanceCreateInfo);
auto physicalDevices = instance->enumeratePhysicalDevices();
for (auto &physicalDevice : physicalDevices)
{
auto props = physicalDevice.getProperties();
// get the QueueFamilyProperties of the first PhysicalDevice
std::vector<vk::QueueFamilyProperties> queueFamilyProperties = physicalDevice.getQueueFamilyProperties();
uint32_t computeQueueFamilyIndex = 0;
// get the best index into queueFamiliyProperties which supports compute and stuff
BAIL_ON_BAD_RESULT(vkGetBestComputeQueueNPH(physicalDevice, computeQueueFamilyIndex));
std::vector<char *>extensions = {"VK_EXT_external_memory_host", "VK_KHR_shader_float16_int8"};
// create a UniqueDevice
float queuePriority = 0.0f;
vk::DeviceQueueCreateInfo deviceQueueCreateInfo(vk::DeviceQueueCreateFlags(), static_cast<uint32_t>(computeQueueFamilyIndex), 1, &queuePriority);
vk::StructureChain<vk::DeviceCreateInfo, vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceShaderFloat16Int8Features> createDeviceInfo = {
vk::DeviceCreateInfo(vk::DeviceCreateFlags(), 1, &deviceQueueCreateInfo, 0, nullptr, static_cast<uint32_t>(extensions.size()), extensions.data()),
vk::PhysicalDeviceFeatures2(),
vk::PhysicalDeviceShaderFloat16Int8Features()
};
createDeviceInfo.get<vk::PhysicalDeviceFeatures2>().features.setShaderInt64(true);
createDeviceInfo.get<vk::PhysicalDeviceShaderFloat16Int8Features>().setShaderInt8(true);
vk::UniqueDevice device = physicalDevice.createDeviceUnique(createDeviceInfo.get<vk::DeviceCreateInfo>());
auto memoryProperties2 = physicalDevice.getMemoryProperties2();
vk::PhysicalDeviceMemoryProperties const &memoryProperties = memoryProperties2.memoryProperties;
const int32_t bufferLength = 16384;
const uint32_t bufferSize = sizeof(int32_t) * bufferLength;
// we are going to need two buffers from this one memory
const vk::DeviceSize memorySize = bufferSize * 3;
// set memoryTypeIndex to an invalid entry in the properties.memoryTypes array
uint32_t memoryTypeIndex = VK_MAX_MEMORY_TYPES;
for (uint32_t k = 0; k < memoryProperties.memoryTypeCount; k++)
{
if ((vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent) & memoryProperties.memoryTypes[k].propertyFlags &&
(memorySize < memoryProperties.memoryHeaps[memoryProperties.memoryTypes[k].heapIndex].size))
{
memoryTypeIndex = k;
std::cout << "found memory " << memoryTypeIndex + 1 << " out of " << memoryProperties.memoryTypeCount << std::endl;
break;
}
}
BAIL_ON_BAD_RESULT(memoryTypeIndex == VK_MAX_MEMORY_TYPES ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS);
auto memory = device->allocateMemoryUnique(vk::MemoryAllocateInfo(memorySize, memoryTypeIndex));
auto in_buffer = device->createBufferUnique(vk::BufferCreateInfo(vk::BufferCreateFlags(), bufferSize, vk::BufferUsageFlagBits::eStorageBuffer, vk::SharingMode::eExclusive));
device->bindBufferMemory(in_buffer.get(), memory.get(), 0);
// create a DescriptorSetLayout
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetLayoutBinding{
{0, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute}};
vk::UniqueDescriptorSetLayout descriptorSetLayout = device->createDescriptorSetLayoutUnique(vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), static_cast<uint32_t>(descriptorSetLayoutBinding.size()), descriptorSetLayoutBinding.data()));
std::cout << "Memory bound" << std::endl;
std::ifstream myfile;
myfile.open("shaders/MainV.spv", std::ios::ate | std::ios::binary);
if (!myfile.is_open())
{
std::cout << "File not found" << std::endl;
return EXIT_FAILURE;
}
auto size = myfile.tellg();
std::vector<unsigned int> shader_spv(size / sizeof(unsigned int));
myfile.seekg(0);
myfile.read(reinterpret_cast<char *>(shader_spv.data()), size);
myfile.close();
std::cout << "Shader size: " << shader_spv.size() << std::endl;
auto shaderModule = device->createShaderModuleUnique(vk::ShaderModuleCreateInfo(vk::ShaderModuleCreateFlags(), shader_spv.size() * sizeof(unsigned int), shader_spv.data()));
// create a PipelineLayout using that DescriptorSetLayout
vk::UniquePipelineLayout pipelineLayout = device->createPipelineLayoutUnique(vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), 1, &descriptorSetLayout.get()));
vk::ComputePipelineCreateInfo computePipelineInfo(
vk::PipelineCreateFlags(),
vk::PipelineShaderStageCreateInfo(
vk::PipelineShaderStageCreateFlags(),
vk::ShaderStageFlagBits::eCompute,
shaderModule.get(),
"_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE6VecAdd"),
pipelineLayout.get());
auto pipeline = device->createComputePipelineUnique(nullptr, computePipelineInfo);
auto descriptorPoolSize = vk::DescriptorPoolSize(vk::DescriptorType::eStorageBuffer, 2);
auto descriptorPool = device->createDescriptorPool(vk::DescriptorPoolCreateInfo(vk::DescriptorPoolCreateFlags(), 1, 1, &descriptorPoolSize));
auto commandPool = device->createCommandPoolUnique(vk::CommandPoolCreateInfo(vk::CommandPoolCreateFlags(), computeQueueFamilyIndex));
auto commandBuffer = std::move(device->allocateCommandBuffersUnique(vk::CommandBufferAllocateInfo(commandPool.get(), vk::CommandBufferLevel::ePrimary, 1)).front());
commandBuffer->begin(vk::CommandBufferBeginInfo(vk::CommandBufferUsageFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit)));
commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, pipeline.get());
commandBuffer->dispatch(bufferSize / sizeof(int32_t), 1, 1);
commandBuffer->end();
auto queue = device->getQueue(computeQueueFamilyIndex, 0);
vk::SubmitInfo submitInfo(0, nullptr, nullptr, 1, &commandBuffer.get(), 0, nullptr);
queue.submit(1, &submitInfo, vk::Fence());
queue.waitIdle();
printf("all done\nWoohooo!!!\n\n");
}
}
catch (vk::SystemError &err)
{
std::cout << "vk::SystemError: " << err.what() << std::endl;
exit(-1);
}
catch (std::runtime_error &err)
{
std::cout << "std::runtime_error: " << err.what() << std::endl;
exit(-1);
}
catch (...)
{
std::cout << "unknown error\n";
exit(-1);
}
return EXIT_SUCCESS;
}