使用500MB RAM的10MB图像

我的Vulkan应用程序中出现内存问题。加载10mb的PNG图片需要使用近500mb的内存。

TextureObject* createTextureImage(const char* File) {

    auto Tex = Textures.emplace_back(new TextureObject(_Driver));

    //decode
    unsigned error = lodepng::decode(Tex->Pixels,Tex->Width,Tex->Height,File);

    //if there's an error,display it
    if (error) printf("PNG Decoder error: (%i) %s",error,lodepng_error_text(error));

    Tex->Empty = false;

    VkDeviceSize imageSize = Tex->Width * Tex->Height * 4;

    //
    //  Image Staging Buffer
    VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
    stagingBufferInfo.size = imageSize;
    stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;

    VmaAllocationCreateInfo allocInfo = {};
    allocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
    allocInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;

    VkBuffer stagingImageBuffer = VK_NULL_HANDLE;
    VmaAllocation stagingImageBufferAlloc = VK_NULL_HANDLE;
    vmaCreateBuffer(_Driver->allocator,&stagingBufferInfo,&allocInfo,&stagingImageBuffer,&stagingImageBufferAlloc,nullptr);

    memcpy(stagingImageBufferAlloc->GetMappedData(),Tex->Pixels.data(),static_cast<size_t>(imageSize));
    Tex->Pixels.clear();

    VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
    imageInfo.imageType = VK_IMAGE_TYPE_2D;
    imageInfo.extent.width = static_cast<uint32_t>(Tex->Width);
    imageInfo.extent.height = static_cast<uint32_t>(Tex->Height);
    imageInfo.extent.depth = 1;
    imageInfo.mipLevels = 1;
    imageInfo.arrayLayers = 1;
    imageInfo.format = VK_FORMAT_B8G8R8A8_SRGB;
    imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
    imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
    imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
    imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
    imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;

    allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;

    VmaAllocationInfo imageBufferAllocInfo = {};
    vmaCreateImage(_Driver->allocator,&imageInfo,&Tex->Image,&Tex->Allocation,nullptr);
    //
    //  CPU->GPU Copy
    VkCommandBuffer commandBuffer = _Driver->_SceneGraph->beginSingleTimeCommands();
    VkImageMemoryBarrier imgMemBarrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
    imgMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    imgMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    imgMemBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
    imgMemBarrier.subresourceRange.baseMipLevel = 0;
    imgMemBarrier.subresourceRange.levelCount = 1;
    imgMemBarrier.subresourceRange.baseArrayLayer = 0;
    imgMemBarrier.subresourceRange.layerCount = 1;
    imgMemBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
    imgMemBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
    imgMemBarrier.image = Tex->Image;
    imgMemBarrier.srcaccessMask = 0;
    imgMemBarrier.dstaccessMask = VK_accESS_TRANSFER_WRITE_BIT;

    vkCmdpipelineBarrier(
        commandBuffer,VK_PIpelINE_STAGE_TOP_OF_PIPE_BIT,VK_PIpelINE_STAGE_TRANSFER_BIT,nullptr,1,&imgMemBarrier);

    VkBufferImageCopy region = {};
    region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
    region.imageSubresource.layerCount = 1;
    region.imageExtent.width = static_cast<uint32_t>(Tex->Width);
    region.imageExtent.height = static_cast<uint32_t>(Tex->Height);
    region.imageExtent.depth = 1;

    vkCmdCopyBufferToImage(commandBuffer,stagingImageBuffer,Tex->Image,VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,&region);

    imgMemBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
    imgMemBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
    imgMemBarrier.image = Tex->Image;
    imgMemBarrier.srcaccessMask = VK_accESS_TRANSFER_WRITE_BIT;
    imgMemBarrier.dstaccessMask = VK_accESS_SHADER_READ_BIT;

    vkCmdpipelineBarrier(
        commandBuffer,VK_PIpelINE_STAGE_FRAGMENT_SHADER_BIT,&imgMemBarrier);

    _Driver->_SceneGraph->endSingleTimeCommands(commandBuffer);

    vmaDestroyBuffer(_Driver->allocator,stagingImageBufferAlloc);

    VkImageViewCreateInfo textureImageViewInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
    textureImageViewInfo.image = Tex->Image;
    textureImageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
    textureImageViewInfo.format = VK_FORMAT_B8G8R8A8_SRGB;
    textureImageViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
    textureImageViewInfo.subresourceRange.baseMipLevel = 0;
    textureImageViewInfo.subresourceRange.levelCount = 1;
    textureImageViewInfo.subresourceRange.baseArrayLayer = 0;
    textureImageViewInfo.subresourceRange.layerCount = 1;
    vkCreateImageView(_Driver->device,&textureImageViewInfo,&Tex->ImageView);

    VkSamplerCreateInfo samplerInfo = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO };
    samplerInfo.magFilter = VK_FILTER_LINEAR;
    samplerInfo.minFilter = VK_FILTER_LINEAR;
    samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
    samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
    samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
    samplerInfo.anisotropyEnable = VK_TRUE;
    samplerInfo.maxAnisotropy = 16;
    samplerInfo.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK;
    samplerInfo.unnormalizedCoordinates = VK_FALSE;
    samplerInfo.compareEnable = VK_FALSE;
    samplerInfo.compareOp = VK_COMPARE_OP_ALWAYS;
    samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
    samplerInfo.mipLodBias = 0.0f;
    samplerInfo.minLod = 0.0f;
    samplerInfo.maxLod = 0.0f;
    if (vkCreateSampler(_Driver->device,&samplerInfo,&Tex->Sampler) != VK_SUCCESS) {
#ifdef _DEBUG
        throw std::runtime_error("failed to create texture sampler!");
#endif
    }

    return Tex;
}

我已将分配范围缩小到此函数中发生的范围。只需将要加载的图像从10mb png切换到1kb png,就可以大大减少此函数调用期间的内存消耗。

我在这里做错什么了吗,导致分配了大量的内存而不释放它们?

在每一行上放置断点表明在调用以下代码后立即发生了巨大的分配

unsigned error = lodepng::decode(Tex->Pixels,File);

加载10mb png文件如何使用500mb以上的加载空间?

lodepng::decode填充Tex中的以下3个变量

unsigned int Width = 0;
unsigned int Height = 0;
std::vector<unsigned char> Pixels = {};

即使1kb png占用了大约5mb的空间,看起来也太高了。

我也尝试过使用stb_image加载png文件,并且得到了相同的结果。

EDIT2:10mb png为4096x4096,1kb png为16x16。

EDIT2:经过一些内存分析后,在检查了一个内存峰值后,分配了58,000个无效实例(占563,000,000字节),其中超过500,000个实例位于我称为.clear()的向量内。我不确定如何真正取消分配它,或者这是否是一个大问题?

hahahaha548 回答:使用500MB RAM的10MB图像

  

加载10mb png文件如何使用500mb以上的加载空间?

好吧,它的10MB压缩数据。您在此处抛出的所有缓冲区都是在解压缩之后的,因此每个缓冲区64MB ...

一个可能的泄漏是无法保证vector.clear()会重新分配,因此当您在此处调用clear()来释放对象时,您可能仍坐在未释放的64MB支持存储上直到向量被破坏为止。

本文链接:https://www.f2er.com/3037806.html

大家都在问