diff --git a/src/common/slot_vector.h b/src/common/slot_vector.h index 2a408663e5..f6da7a59d1 100644 --- a/src/common/slot_vector.h +++ b/src/common/slot_vector.h @@ -20,10 +20,14 @@ namespace Common { struct SlotId { + static constexpr u32 TAGGED_MASK = 0x7fffffff; + static constexpr u32 TAGGED_VALUE = 0x80000000; static constexpr u32 INVALID_INDEX = (std::numeric_limits::max)(); + constexpr u32 Value() const noexcept { + return index & (~TAGGED_VALUE); + } constexpr auto operator<=>(const SlotId&) const noexcept = default; - constexpr explicit operator bool() const noexcept { return index != INVALID_INDEX; } @@ -47,11 +51,11 @@ public: Iterator& operator++() noexcept { const u64* const bitset = slot_vector->stored_bitset.data(); const u32 size = static_cast(slot_vector->stored_bitset.size()) * 64; - if (id.index < size) { + if (id.Value() < size) { do { ++id.index; - } while (id.index < size && !IsValid(bitset)); - if (id.index == size) { + } while (id.Value() < size && !IsValid(bitset)); + if (id.Value() == size) { id = SlotId{}; } } @@ -85,7 +89,7 @@ public: : slot_vector{slot_vector_}, id{id_} {} bool IsValid(const u64* bitset) const noexcept { - return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; + return ((bitset[id.Value() / 64] >> (id.Value() % 64)) & 1) != 0; } SlotVector* slot_vector; @@ -107,12 +111,12 @@ public: [[nodiscard]] T& operator[](SlotId id) noexcept { ValidateIndex(id); - return values[id.index].object; + return values[id.Value()].object; } [[nodiscard]] const T& operator[](SlotId id) const noexcept { ValidateIndex(id); - return values[id.index].object; + return values[id.Value()].object; } template @@ -125,9 +129,9 @@ public: } void erase(SlotId id) noexcept { - values[id.index].object.~T(); - free_list.push_back(id.index); - ResetStorageBit(id.index); + values[id.Value()].object.~T(); + free_list.push_back(id.Value()); + ResetStorageBit(id.Value()); } [[nodiscard]] Iterator begin() noexcept { @@ -175,8 +179,8 @@ private: void ValidateIndex(SlotId id) const noexcept { DEBUG_ASSERT(id); - DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); - DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); + DEBUG_ASSERT(id.Value() / 64 < stored_bitset.size()); + DEBUG_ASSERT(((stored_bitset[id.Value() / 64] >> (id.Value() % 64)) & 1) != 0); } [[nodiscard]] u32 FreeValueIndex() noexcept { @@ -208,9 +212,7 @@ private: const size_t old_free_size = free_list.size(); free_list.resize(old_free_size + (new_capacity - values_capacity)); - std::iota(free_list.begin() + old_free_size, free_list.end(), - static_cast(values_capacity)); - + std::iota(free_list.begin() + old_free_size, free_list.end(), u32(values_capacity)); delete[] values; values = new_values; values_capacity = new_capacity; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index d1c61be743..f0a7baf9aa 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -90,7 +90,7 @@ void ComputePipeline::Configure() { desc.is_written); ++ssbo_index; } - texture_cache.SynchronizeComputeDescriptors(); + texture_cache.SynchronizeDescriptors(true); boost::container::static_vector views; boost::container::static_vector samplers; @@ -148,14 +148,14 @@ void ComputePipeline::Configure() { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + VideoCommon::SamplerId sampler = texture_cache.GetSamplerId(handle.second, true); samplers.push_back(sampler); } } for (const auto& desc : info.image_descriptors) { add_image(desc, desc.is_written); } - texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); + texture_cache.FillImageViews(std::span(views.data(), views.size()), true); if (!is_built) { WaitForBuild(); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index ee3498428e..83545463ac 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -283,7 +283,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { size_t views_index{}; size_t samplers_index{}; - texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeDescriptors(false); buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); @@ -354,7 +354,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto handle{read_handle(desc, index)}; views[views_index++] = {handle.first}; - VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + VideoCommon::SamplerId sampler{texture_cache.GetSamplerId(handle.second, false)}; samplers[samplers_index++] = sampler; } } @@ -379,7 +379,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { config_stage(4); } - texture_cache.FillGraphicsImageViews(std::span(views.data(), views_index)); + texture_cache.FillImageViews(std::span(views.data(), views_index), false, Spec::has_images); texture_cache.UpdateRenderTargets(false); state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 26826a8f78..b845462268 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -353,13 +353,13 @@ void RasterizerOpenGL::DrawTexture() { gpu.TickWork(); }; - texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeDescriptors(false); texture_cache.UpdateRenderTargets(false); SyncState(); const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); - const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& sampler = texture_cache.GetSampler(draw_texture_state.src_sampler, true); const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); const auto Scale = [&](auto dim) -> s32 { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 1a62324c95..1bf24c6845 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -125,7 +125,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, ++ssbo_index; } - texture_cache.SynchronizeComputeDescriptors(); + texture_cache.SynchronizeDescriptors(true); static constexpr size_t max_elements = 64; boost::container::static_vector views; @@ -174,14 +174,14 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second); + VideoCommon::SamplerId sampler = texture_cache.GetSamplerId(handle.second, true); samplers.push_back(sampler); } } for (const auto& desc : info.image_descriptors) { add_image(desc, desc.is_written); } - texture_cache.FillComputeImageViews(std::span(views.data(), views.size())); + texture_cache.FillImageViews(std::span(views.data(), views.size()), true); buffer_cache.UnbindComputeTextureBuffers(); size_t index{}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d6a446a584..9609965637 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -319,7 +319,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { views.reserve(num_image_elements); samplers.reserve(num_textures); - texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeDescriptors(false); buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes); @@ -384,7 +384,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto handle{read_handle(desc, index)}; views.push_back({handle.first}); - VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)}; + VideoCommon::SamplerId sampler{texture_cache.GetSamplerId(handle.second, false)}; samplers.push_back(sampler); } } @@ -413,7 +413,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { } ASSERT(views.size() == num_image_elements); ASSERT(samplers.size() == num_textures); - texture_cache.FillGraphicsImageViews(std::span(views.data(), views.size())); + texture_cache.FillImageViews(std::span(views.data(), views.size()), false, Spec::has_images); VideoCommon::ImageViewInOut* texture_buffer_it{views.data()}; const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 78337b3ebe..cce721ca54 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -354,7 +354,7 @@ void RasterizerVulkan::DrawTexture() { FlushWork(); std::scoped_lock l{texture_cache.mutex}; - texture_cache.SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeDescriptors(false); texture_cache.UpdateRenderTargets(false); UpdateDynamicStates(); @@ -363,7 +363,7 @@ void RasterizerVulkan::DrawTexture() { query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); - const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); + const auto& sampler = texture_cache.GetSampler(draw_texture_state.src_sampler, true); const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); const auto* framebuffer = texture_cache.GetFramebuffer(); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e53cc4b5c8..92ac1f525d 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -14,6 +14,7 @@ #include "common/alignment.h" #include "common/settings.h" +#include "common/slot_vector.h" #include "video_core/control/channel_state.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" @@ -262,7 +263,8 @@ typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { template typename P::ImageView& TextureCache

::GetImageView(u32 index) noexcept { - const auto image_view_id = VisitImageView(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, index); + // Not compute! + const auto image_view_id = VisitImageView(index, false); return slot_image_views[image_view_id]; } @@ -272,14 +274,25 @@ void TextureCache

::MarkModification(ImageId id) noexcept { } template -template -void TextureCache

::FillGraphicsImageViews(std::span views) { - FillImageViews(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, views); -} - -template -void TextureCache

::FillComputeImageViews(std::span views) { - FillImageViews(channel_state->compute_image_table, channel_state->compute_image_view_ids, views); +void TextureCache

::FillImageViews(std::span views, bool compute, bool blacklist) { + bool has_blacklisted = false; + do { + has_deleted_images = false; + if (blacklist) { + has_blacklisted = false; + } + for (ImageViewInOut& view : views) { + view.id = VisitImageView(view.index, compute); + if (blacklist) { + if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { + const ImageViewBase& image_view = slot_image_views[view.id]; + auto& image = slot_images[image_view.image_id]; + has_blacklisted |= ScaleDown(image); + image.scale_rating = 0; + } + } + } + } while (has_deleted_images || (blacklist && has_blacklisted)); } template @@ -347,39 +360,24 @@ void TextureCache

::CheckFeedbackLoop(std::span views) { } template -typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - return &slot_samplers[GetGraphicsSamplerId(index)]; +typename P::Sampler* TextureCache

::GetSampler(u32 index, bool compute) { + return &slot_samplers[GetSamplerId(index, compute)]; } template -typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - return &slot_samplers[GetComputeSamplerId(index)]; -} - -template -SamplerId TextureCache

::GetGraphicsSamplerId(u32 index) { - if (index > channel_state->graphics_sampler_table.current_limit) { +SamplerId TextureCache

::GetSamplerId(u32 index, bool compute) { + auto& table = compute ? channel_state->compute_sampler_table : channel_state->graphics_sampler_table; + if (index > table.current_limit) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return NULL_SAMPLER_ID; } - const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(*gpu_memory, index); - SamplerId& id = channel_state->graphics_sampler_ids[index]; - if (is_new) - id = FindSampler(descriptor); - return id; -} - -template -SamplerId TextureCache

::GetComputeSamplerId(u32 index) { - if (index > channel_state->compute_sampler_table.current_limit) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return NULL_SAMPLER_ID; + auto const [descriptor, is_new] = table.Read(*gpu_memory, index); + if (is_new) { + auto const id = FindSampler(descriptor, compute); + channel_state->sampler_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), id); + return id; } - const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(*gpu_memory, index); - SamplerId& id = channel_state->compute_sampler_ids[index]; - if (is_new) - id = FindSampler(descriptor); - return id; + return channel_state->sampler_ids.find(index | (compute ? Common::SlotId::TAGGED_VALUE : 0))->second; } template @@ -393,40 +391,31 @@ typename P::Sampler& TextureCache

::GetSampler(SamplerId id) noexcept { } template -void TextureCache

::SynchronizeGraphicsDescriptors() { - const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding; - const u32 tic_limit = maxwell3d->regs.tex_header.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit; - bool bindings_changed = false; - if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) { - channel_state->graphics_sampler_ids.resize(tsc_limit + 1); - bindings_changed = true; - } - if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) { - channel_state->graphics_image_view_ids.resize(tic_limit + 1); - bindings_changed = true; - } - if (bindings_changed) { - ++texture_bindings_serial; - } -} - -template -void TextureCache

::SynchronizeComputeDescriptors() { - const bool linked_tsc = kepler_compute->launch_description.linked_tsc; - const u32 tic_limit = kepler_compute->regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; - bool bindings_changed = false; - if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) { - channel_state->compute_sampler_ids.resize(tsc_limit + 1); - bindings_changed = true; - } - if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) { - channel_state->compute_image_view_ids.resize(tic_limit + 1); - bindings_changed = true; - } - if (bindings_changed) { - ++texture_bindings_serial; +void TextureCache

::SynchronizeDescriptors(bool compute) { + if (compute) { + const bool linked_tsc = kepler_compute->launch_description.linked_tsc; + const u32 tic_limit = kepler_compute->regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; + bool bindings_changed = false; + if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) + bindings_changed = true; + if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) + bindings_changed = true; + if (bindings_changed) { + ++texture_bindings_serial; + } + } else { + const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding; + const u32 tic_limit = maxwell3d->regs.tex_header.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit; + bool bindings_changed = false; + if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) + bindings_changed = true; + if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) + bindings_changed = true; + if (bindings_changed) { + ++texture_bindings_serial; + } } } @@ -605,45 +594,30 @@ typename P::Framebuffer* TextureCache

::GetFramebuffer() { } template -template -void TextureCache

::FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, - std::span views) { - bool has_blacklisted = false; - do { - has_deleted_images = false; - if constexpr (has_blacklists) { - has_blacklisted = false; - } - for (ImageViewInOut& view : views) { - view.id = VisitImageView(table, cached_image_view_ids, view.index); - if constexpr (has_blacklists) { - if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { - const ImageViewBase& image_view{slot_image_views[view.id]}; - auto& image = slot_images[image_view.image_id]; - has_blacklisted |= ScaleDown(image); - image.scale_rating = 0; - } - } - } - } while (has_deleted_images || (has_blacklists && has_blacklisted)); -} - -template -ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, std::span cached_image_view_ids, u32 index) { +ImageViewId TextureCache

::VisitImageView(u32 index, bool compute) { + auto& table = compute ? channel_state->compute_image_table : channel_state->graphics_image_table; if (index > table.current_limit) { LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); return NULL_IMAGE_VIEW_ID; } - const auto [descriptor, is_new] = table.Read(*gpu_memory, index); - ImageViewId& image_view_id = cached_image_view_ids[index]; + // Is new (on the tegra engine side)? + auto const [descriptor, is_new] = table.Read(*gpu_memory, index); if (is_new) { - image_view_id = FindImageView(descriptor); + if (IsValidEntry(*gpu_memory, descriptor)) { + // Is new (registered view) on the texture cache side? + const auto [pair, is_new_tc] = channel_state->image_views.try_emplace(descriptor); + if (is_new_tc) + pair->second = CreateImageView(descriptor); + PrepareImageView(pair->second, false, false); + channel_state->image_view_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), pair->second); + return pair->second; + } + channel_state->image_view_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), NULL_IMAGE_VIEW_ID); + return NULL_IMAGE_VIEW_ID; } - if (image_view_id != NULL_IMAGE_VIEW_ID) { - PrepareImageView(image_view_id, false, false); - } - return image_view_id; + auto const it = channel_state->image_view_ids.find(index | (compute ? Common::SlotId::TAGGED_VALUE : 0)); + PrepareImageView(it->second, false, false); + return it->second; } template @@ -1245,19 +1219,6 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) } } -template -ImageViewId TextureCache

::FindImageView(const TICEntry& config) { - if (!IsValidEntry(*gpu_memory, config)) { - return NULL_IMAGE_VIEW_ID; - } - const auto [pair, is_new] = channel_state->image_views.try_emplace(config); - ImageViewId& image_view_id = pair->second; - if (is_new) { - image_view_id = CreateImageView(config); - } - return image_view_id; -} - template ImageViewId TextureCache

::CreateImageView(const TICEntry& config) { const ImageInfo info(config); @@ -1399,10 +1360,10 @@ void TextureCache

::InvalidateScale(Image& image) { image.image_view_infos.clear(); for (size_t c : active_channel_ids) { auto& channel_info = channel_storage[c]; - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); - } + + if constexpr (ENABLE_VALIDATION) + for (auto& e : channel_info.image_view_ids) + e.second = CORRUPT_ID; channel_info.graphics_image_table.Invalidate(); channel_info.compute_image_table.Invalidate(); } @@ -2021,7 +1982,7 @@ std::pair TextureCache

::PrepareDmaImage(ImageId dst_id, GPUVAddr ba } template -SamplerId TextureCache

::FindSampler(const TSCEntry& config) { +SamplerId TextureCache

::FindSampler(const TSCEntry& config, bool compute) { if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { return NULL_SAMPLER_ID; } @@ -2044,68 +2005,48 @@ std::optional TextureCache

::QuerySamplerBudget() const { template void TextureCache

::EnforceSamplerBudget() { - const auto budget = QuerySamplerBudget(); - if (!budget) { - return; + if (auto const budget = QuerySamplerBudget(); budget) { + if (slot_samplers.size() < *budget) { + return; + } + if (!channel_state) { + return; + } + if (last_sampler_gc_frame == frame_tick) { + return; + } + last_sampler_gc_frame = frame_tick; + TrimInactiveSamplers(*budget); } - if (slot_samplers.size() < *budget) { - return; - } - if (!channel_state) { - return; - } - if (last_sampler_gc_frame == frame_tick) { - return; - } - last_sampler_gc_frame = frame_tick; - TrimInactiveSamplers(*budget); } template void TextureCache

::TrimInactiveSamplers(size_t budget) { - if (channel_state->samplers.empty()) { - return; - } - constexpr size_t SAMPLER_GC_SLACK = 1024; - auto mark_active = [](auto& set, SamplerId id) { - if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) { - return; + if (channel_state->samplers.size() > 0) { + constexpr size_t SAMPLER_GC_SLACK = 1024; + ankerl::unordered_dense::set active_sampler_ids; + for (auto const& e : channel_state->sampler_ids) + active_sampler_ids.insert(e.second); + // Elements in the map must be necesarily valid + size_t removed = 0; + for (auto it = channel_state->samplers.begin(); it != channel_state->samplers.end();) { + const SamplerId sampler_id = it->second; + if (!sampler_id || sampler_id == CORRUPT_ID) { + it = channel_state->samplers.erase(it); + } else if (std::ranges::find(active_sampler_ids, sampler_id) != active_sampler_ids.end()) { + ++it; + } else { + slot_samplers.erase(sampler_id); + it = channel_state->samplers.erase(it); + ++removed; + if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) { + break; + } + } } - set.insert(id); - }; - ankerl::unordered_dense::set active; - active.reserve(channel_state->graphics_sampler_ids.size() + channel_state->compute_sampler_ids.size()); - for (const SamplerId id : channel_state->graphics_sampler_ids) { - mark_active(active, id); - } - for (const SamplerId id : channel_state->compute_sampler_ids) { - mark_active(active, id); - } - - size_t removed = 0; - auto& sampler_map = channel_state->samplers; - for (auto it = sampler_map.begin(); it != sampler_map.end();) { - const SamplerId sampler_id = it->second; - if (!sampler_id || sampler_id == CORRUPT_ID) { - it = sampler_map.erase(it); - continue; + if (removed != 0) { + LOG_WARNING(HW_GPU, "Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers", budget, removed); } - if (active.find(sampler_id) != active.end()) { - ++it; - continue; - } - slot_samplers.erase(sampler_id); - it = sampler_map.erase(it); - ++removed; - if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) { - break; - } - } - - if (removed != 0) { - LOG_WARNING(HW_GPU, - "Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers", - budget, removed); } } @@ -2345,8 +2286,7 @@ ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const Imag if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { return image_view_id; } - const ImageViewId image_view_id = - slot_image_views.insert(runtime, info, image_id, image, slot_images); + const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image, slot_images); image.InsertView(info, image_view_id); return image_view_id; } @@ -2606,10 +2546,9 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { } for (size_t c : active_channel_ids) { auto& channel_info = channel_storage[c]; - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); - } + if constexpr (ENABLE_VALIDATION) + for (auto& e : channel_info.image_view_ids) + e.second = CORRUPT_ID; channel_info.graphics_image_table.Invalidate(); channel_info.compute_image_table.Invalidate(); } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 46e1dac0e4..df4cb24464 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -86,12 +86,8 @@ public: std::unordered_map image_views; std::unordered_map samplers; - // Values tuned for Mario Brothership, see also descriptor_table.h - // Change values as required. - boost::container::static_vector graphics_sampler_ids; - boost::container::static_vector compute_sampler_ids; - boost::container::static_vector graphics_image_view_ids; - boost::container::static_vector compute_image_view_ids; + ankerl::unordered_dense::map sampler_ids; + ankerl::unordered_dense::map image_view_ids; TextureCacheGPUMap* gpu_page_table = nullptr; TextureCacheGPUMap* sparse_page_table = nullptr; @@ -170,27 +166,17 @@ public: /// Mark an image as modified from the GPU void MarkModification(ImageId id) noexcept; - /// Fill image_view_ids with the graphics images in indices - template - void FillGraphicsImageViews(std::span views); - - /// Fill image_view_ids with the compute images in indices - void FillComputeImageViews(std::span views); + /// Fill image_view_ids with the graphics/compute images in indices + void FillImageViews(std::span views, bool compute, bool blacklist = true); /// Handle feedback loops during draws. void CheckFeedbackLoop(std::span views); - /// Get the sampler from the graphics descriptor table in the specified index - Sampler* GetGraphicsSampler(u32 index); + /// Get the sampler from the graphics/compute descriptor table in the specified index + Sampler* GetSampler(u32 index, bool compute); - /// Get the sampler from the compute descriptor table in the specified index - Sampler* GetComputeSampler(u32 index); - - /// Get the sampler id from the graphics descriptor table in the specified index - SamplerId GetGraphicsSamplerId(u32 index); - - /// Get the sampler id from the compute descriptor table in the specified index - SamplerId GetComputeSamplerId(u32 index); + /// Get the sampler id from the graphics/compute descriptor table in the specified index + SamplerId GetSamplerId(u32 index, bool compute); /// Return a constant reference to the given sampler id [[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept; @@ -198,11 +184,8 @@ public: /// Return a reference to the given sampler id [[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept; - /// Refresh the state for graphics image view and sampler descriptors - void SynchronizeGraphicsDescriptors(); - - /// Refresh the state for compute image view and sampler descriptors - void SynchronizeComputeDescriptors(); + /// Refresh the state for graphics/compute image view and sampler descriptors + void SynchronizeDescriptors(bool compute); /// Updates the Render Targets if they can be rescaled /// @retval True if the Render Targets have been rescaled. @@ -313,15 +296,8 @@ private: /// Runs the Garbage Collector. void RunGarbageCollector(); - /// Fills image_view_ids in the image views in indices - template - void FillImageViews(DescriptorTable& table, - std::span cached_image_view_ids, - std::span views); - /// Find or create an image view in the guest descriptor table - ImageViewId VisitImageView(DescriptorTable& table, - std::span cached_image_view_ids, u32 index); + ImageViewId VisitImageView(u32 index, bool compute); /// Find or create a framebuffer with the given render target parameters FramebufferId GetFramebufferId(const RenderTargets& key); @@ -333,9 +309,6 @@ private: template void UploadImageContents(Image& image, StagingBuffer& staging_buffer); - /// Find or create an image view from a guest descriptor - [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); - /// Create a new image view from a guest descriptor [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); @@ -363,7 +336,7 @@ private: const Tegra::Engines::Fermi2D::Config& copy); /// Find or create a sampler from a guest descriptor sampler - [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); + [[nodiscard]] SamplerId FindSampler(const TSCEntry& config, bool compute); /// Find or create an image view for the given color buffer index [[nodiscard]] ImageViewId FindColorBuffer(size_t index);