From 807b521658fcec91b315a335e8cd11f28de83228 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 22 Apr 2026 08:56:48 +0000 Subject: [PATCH] [video_core] fix redundant resize-copy overload and just use default-init resize, to reduce stutter on Mario BP Signed-off-by: lizzie --- src/common/slot_vector.h | 6 +- .../touch_screen/touch_screen_resource.cpp | 24 +++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 +- .../texture_cache/descriptor_table.h | 63 +++++++++---------- .../texture_cache/texture_cache.cpp | 7 ++- src/video_core/texture_cache/texture_cache.h | 57 +++++++---------- .../texture_cache/texture_cache_base.h | 23 +++---- 7 files changed, 80 insertions(+), 104 deletions(-) diff --git a/src/common/slot_vector.h b/src/common/slot_vector.h index e464d3d948..2a408663e5 100644 --- a/src/common/slot_vector.h +++ b/src/common/slot_vector.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -52,7 +52,7 @@ public: ++id.index; } while (id.index < size && !IsValid(bitset)); if (id.index == size) { - id.index = SlotId::INVALID_INDEX; + id = SlotId{}; } } return *this; @@ -141,7 +141,7 @@ public: } [[nodiscard]] Iterator end() noexcept { - return Iterator(this, SlotId{SlotId::INVALID_INDEX}); + return Iterator(this, SlotId{}); } [[nodiscard]] size_t size() const noexcept { diff --git a/src/hid_core/resources/touch_screen/touch_screen_resource.cpp b/src/hid_core/resources/touch_screen/touch_screen_resource.cpp index 5d77fe5719..018a43b6c0 100644 --- a/src/hid_core/resources/touch_screen/touch_screen_resource.cpp +++ b/src/hid_core/resources/touch_screen/touch_screen_resource.cpp @@ -486,27 +486,17 @@ void TouchResource::ReadTouchInput() { SanitizeInput(current_touch_state); std::scoped_lock lock{*input_mutex}; - if (current_touch_state.entry_count == previous_touch_state.entry_count) { - if (current_touch_state.entry_count < 1) { - return; - } + if (current_touch_state.entry_count == previous_touch_state.entry_count && current_touch_state.entry_count >= 1) { bool has_moved = false; - for (std::size_t i = 0; i < static_cast(current_touch_state.entry_count); - i++) { - s32 delta_x = std::abs(static_cast(current_touch_state.states[i].position.x) - - static_cast(previous_touch_state.states[i].position.x)); - s32 delta_y = std::abs(static_cast(current_touch_state.states[i].position.y) - - static_cast(previous_touch_state.states[i].position.y)); - if (delta_x > 1 || delta_y > 1) { - has_moved = true; - } + for (std::size_t i = 0; !has_moved && i < std::size_t(current_touch_state.entry_count); i++) { + s32 delta_x = std::abs(s32(current_touch_state.states[i].position.x) - s32(previous_touch_state.states[i].position.x)); + s32 delta_y = std::abs(s32(current_touch_state.states[i].position.y) - s32(previous_touch_state.states[i].position.y)); + has_moved |= (delta_x > 1 || delta_y > 1); } - if (!has_moved) { - return; + if (has_moved) { + input_event->Signal(); } } - - input_event->Signal(); } void TouchResource::OnTouchUpdate(s64 timestamp) { diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 43fbefe425..d6a446a584 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -314,8 +314,8 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) { template bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { - small_vector views; - small_vector samplers; + boost::container::small_vector views; + boost::container::small_vector samplers; views.reserve(num_image_elements); samplers.reserve(num_textures); diff --git a/src/video_core/texture_cache/descriptor_table.h b/src/video_core/texture_cache/descriptor_table.h index 1bad83fb4a..e40c128ab5 100644 --- a/src/video_core/texture_cache/descriptor_table.h +++ b/src/video_core/texture_cache/descriptor_table.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -6,37 +9,39 @@ #include #include +#include "common/alignment.h" #include "common/common_types.h" #include "common/div_ceil.h" +#include "common/assert.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" namespace VideoCommon { -template +template class DescriptorTable { public: - explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {} - - [[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) { - [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { return false; } - Refresh(gpu_addr, limit); - return true; + [[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) noexcept { + bool ret = !(current_gpu_addr == gpu_addr && current_limit == limit); + if (ret) { + Refresh(gpu_addr, limit); + } + return ret; } void Invalidate() noexcept { std::ranges::fill(read_descriptors, 0); } - [[nodiscard]] std::pair Read(u32 index) { + [[nodiscard]] std::pair Read(Tegra::MemoryManager const& gpu_memory, u32 index) noexcept { DEBUG_ASSERT(index <= current_limit); - const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor); - std::pair result; - gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor)); - if (IsDescriptorRead(index)) { + const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(T); + std::pair result; + gpu_memory.ReadBlockUnsafe(gpu_addr, std::addressof(result.first), sizeof(T)); + if ((read_descriptors[index / 64] & (1ULL << (index % 64))) != 0) { result.second = result.first != descriptors[index]; } else { - MarkDescriptorAsRead(index); + read_descriptors[index / 64] |= 1ULL << (index % 64); result.second = true; } if (result.second) { @@ -45,34 +50,24 @@ public: return result; } - [[nodiscard]] u32 Limit() const noexcept { - return current_limit; - } - -private: - void Refresh(GPUVAddr gpu_addr, u32 limit) { + void Refresh(GPUVAddr gpu_addr, u32 limit) noexcept { current_gpu_addr = gpu_addr; current_limit = limit; - - const size_t num_descriptors = static_cast(limit) + 1; - read_descriptors.clear(); - read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0); + // Mario Brothership reallocates a lot of times, so use aggressive pre-alloc sizes + // std::vector by default uses quadratic growth, but that isn't even enough to satisfy brothership + const size_t num_descriptors = ((limit + 0x80000) & (~0x7ffff)) + 1; + size_t old_size = read_descriptors.size(); + read_descriptors.resize(Common::DivCeil(num_descriptors, 64U)); + old_size = (std::min)(old_size, read_descriptors.size()); + std::fill(read_descriptors.begin(), read_descriptors.begin() + old_size, 0); + // descriptors.resize(num_descriptors); } - void MarkDescriptorAsRead(u32 index) noexcept { - read_descriptors[index / 64] |= 1ULL << (index % 64); - } - - [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept { - return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0; - } - - Tegra::MemoryManager& gpu_memory; + std::vector read_descriptors; + std::vector descriptors; GPUVAddr current_gpu_addr{}; u32 current_limit{}; - std::vector read_descriptors; - std::vector descriptors; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 8a9a32f44a..d1728603bf 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -7,8 +10,8 @@ namespace VideoCommon { TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept - : ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory}, - compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {} + : ChannelInfo(state) +{} template class VideoCommon::ChannelSetupCaches; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index efae825885..e53cc4b5c8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -262,8 +262,7 @@ typename P::ImageView& TextureCache

::GetImageView(ImageViewId id) noexcept { template typename P::ImageView& TextureCache

::GetImageView(u32 index) noexcept { - const auto image_view_id = VisitImageView(channel_state->graphics_image_table, - channel_state->graphics_image_view_ids, index); + const auto image_view_id = VisitImageView(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, index); return slot_image_views[image_view_id]; } @@ -275,14 +274,12 @@ void TextureCache

::MarkModification(ImageId id) noexcept { template template void TextureCache

::FillGraphicsImageViews(std::span views) { - FillImageViews(channel_state->graphics_image_table, - channel_state->graphics_image_view_ids, views); + FillImageViews(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, views); } template void TextureCache

::FillComputeImageViews(std::span views) { - FillImageViews(channel_state->compute_image_table, channel_state->compute_image_view_ids, - views); + FillImageViews(channel_state->compute_image_table, channel_state->compute_image_view_ids, views); } template @@ -361,29 +358,27 @@ typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { template SamplerId TextureCache

::GetGraphicsSamplerId(u32 index) { - if (index > channel_state->graphics_sampler_table.Limit()) { + if (index > channel_state->graphics_sampler_table.current_limit) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return NULL_SAMPLER_ID; } - const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index); + const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(*gpu_memory, index); SamplerId& id = channel_state->graphics_sampler_ids[index]; - if (is_new) { + if (is_new) id = FindSampler(descriptor); - } return id; } template SamplerId TextureCache

::GetComputeSamplerId(u32 index) { - if (index > channel_state->compute_sampler_table.Limit()) { + if (index > channel_state->compute_sampler_table.current_limit) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return NULL_SAMPLER_ID; } - const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index); + const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(*gpu_memory, index); SamplerId& id = channel_state->compute_sampler_ids[index]; - if (is_new) { + if (is_new) id = FindSampler(descriptor); - } return id; } @@ -399,19 +394,16 @@ typename P::Sampler& TextureCache

::GetSampler(SamplerId id) noexcept { template void TextureCache

::SynchronizeGraphicsDescriptors() { - using SamplerBinding = Tegra::Engines::Maxwell3D::Regs::SamplerBinding; - const bool linked_tsc = maxwell3d->regs.sampler_binding == SamplerBinding::ViaHeaderBinding; + const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding; const u32 tic_limit = maxwell3d->regs.tex_header.limit; const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit; bool bindings_changed = false; - if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), - tsc_limit)) { - channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) { + channel_state->graphics_sampler_ids.resize(tsc_limit + 1); bindings_changed = true; } - if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), - tic_limit)) { - channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) { + channel_state->graphics_image_view_ids.resize(tic_limit + 1); bindings_changed = true; } if (bindings_changed) { @@ -424,15 +416,13 @@ void TextureCache

::SynchronizeComputeDescriptors() { const bool linked_tsc = kepler_compute->launch_description.linked_tsc; const u32 tic_limit = kepler_compute->regs.tic.limit; const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; - const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address(); bool bindings_changed = false; - if (channel_state->compute_sampler_table.Synchronize(tsc_gpu_addr, tsc_limit)) { - channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) { + channel_state->compute_sampler_ids.resize(tsc_limit + 1); bindings_changed = true; } - if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), - tic_limit)) { - channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) { + channel_state->compute_image_view_ids.resize(tic_limit + 1); bindings_changed = true; } if (bindings_changed) { @@ -640,14 +630,12 @@ void TextureCache

::FillImageViews(DescriptorTable& table, } template -ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, - std::span cached_image_view_ids, - u32 index) { - if (index > table.Limit()) { +ImageViewId TextureCache

::VisitImageView(DescriptorTable& table, std::span cached_image_view_ids, u32 index) { + if (index > table.current_limit) { LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); return NULL_IMAGE_VIEW_ID; } - const auto [descriptor, is_new] = table.Read(index); + const auto [descriptor, is_new] = table.Read(*gpu_memory, index); ImageViewId& image_view_id = cached_image_view_ids[index]; if (is_new) { image_view_id = FindImageView(descriptor); @@ -2086,8 +2074,7 @@ void TextureCache

::TrimInactiveSamplers(size_t budget) { set.insert(id); }; ankerl::unordered_dense::set active; - active.reserve(channel_state->graphics_sampler_ids.size() + - channel_state->compute_sampler_ids.size()); + active.reserve(channel_state->graphics_sampler_ids.size() + channel_state->compute_sampler_ids.size()); for (const SamplerId id : channel_state->graphics_sampler_ids) { mark_active(active, id); } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4b4061f21d..b5d846bfa4 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "common/common_types.h" @@ -76,22 +77,22 @@ public: TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete; TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete; - DescriptorTable graphics_image_table{gpu_memory}; - DescriptorTable graphics_sampler_table{gpu_memory}; - std::vector graphics_sampler_ids; - std::vector graphics_image_view_ids; - - DescriptorTable compute_image_table{gpu_memory}; - DescriptorTable compute_sampler_table{gpu_memory}; - std::vector compute_sampler_ids; - std::vector compute_image_view_ids; + DescriptorTable graphics_image_table; + DescriptorTable graphics_sampler_table; + DescriptorTable compute_image_table; + DescriptorTable compute_sampler_table; // TODO: still relies on bad iterators :( std::unordered_map image_views; std::unordered_map samplers; - TextureCacheGPUMap* gpu_page_table; - TextureCacheGPUMap* sparse_page_table; + std::vector graphics_sampler_ids; + std::vector compute_sampler_ids; + std::vector graphics_image_view_ids; + std::vector compute_image_view_ids; + + TextureCacheGPUMap* gpu_page_table = nullptr; + TextureCacheGPUMap* sparse_page_table = nullptr; }; template