[video_core] fix redundant resize-copy overload and just use default-init resize, to reduce stutter on Mario BP

Signed-off-by: lizzie <lizzie@eden-emu.dev>
2026-04-27 09:43:51 +00:00 · 2026-04-22 08:56:48 +00:00 · 2026-04-22 08:56:48 +00:00 · 807b521658
parent 91058d7383
commit 807b521658
7 changed files with 80 additions and 104 deletions
--- a/src/common/slot_vector.h
+++ b/src/common/slot_vector.h
@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later

 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
@ -52,7 +52,7 @@ public:
                    ++id.index;
                } while (id.index < size && !IsValid(bitset));
                if (id.index == size) {
-                    id.index = SlotId::INVALID_INDEX;
+                    id = SlotId{};
                }
            }
            return *this;
@ -141,7 +141,7 @@ public:
    }

    [[nodiscard]] Iterator end() noexcept {
-        return Iterator(this, SlotId{SlotId::INVALID_INDEX});
+        return Iterator(this, SlotId{});
    }

    [[nodiscard]] size_t size() const noexcept {
--- a/src/hid_core/resources/touch_screen/touch_screen_resource.cpp
+++ b/src/hid_core/resources/touch_screen/touch_screen_resource.cpp
@ -486,27 +486,17 @@ void TouchResource::ReadTouchInput() {
    SanitizeInput(current_touch_state);

    std::scoped_lock lock{*input_mutex};
-    if (current_touch_state.entry_count == previous_touch_state.entry_count) {
-        if (current_touch_state.entry_count < 1) {
-            return;
-        }
+    if (current_touch_state.entry_count == previous_touch_state.entry_count && current_touch_state.entry_count >= 1) {
        bool has_moved = false;
-        for (std::size_t i = 0; i < static_cast<std::size_t>(current_touch_state.entry_count);
-             i++) {
-            s32 delta_x = std::abs(static_cast<s32>(current_touch_state.states[i].position.x) -
-                                   static_cast<s32>(previous_touch_state.states[i].position.x));
-            s32 delta_y = std::abs(static_cast<s32>(current_touch_state.states[i].position.y) -
-                                   static_cast<s32>(previous_touch_state.states[i].position.y));
-            if (delta_x > 1 || delta_y > 1) {
-                has_moved = true;
-            }
+        for (std::size_t i = 0; !has_moved && i < std::size_t(current_touch_state.entry_count); i++) {
+            s32 delta_x = std::abs(s32(current_touch_state.states[i].position.x) - s32(previous_touch_state.states[i].position.x));
+            s32 delta_y = std::abs(s32(current_touch_state.states[i].position.y) - s32(previous_touch_state.states[i].position.y));
+            has_moved |= (delta_x > 1 || delta_y > 1);
        }
-        if (!has_moved) {
-            return;
+        if (has_moved) {
+            input_event->Signal();
        }
    }
-
-    input_event->Signal();
 }

 void TouchResource::OnTouchUpdate(s64 timestamp) {
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@ -314,8 +314,8 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {

 template <typename Spec>
 bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
-    small_vector<VideoCommon::ImageViewInOut, INLINE_IMAGE_ELEMENTS> views;
-    small_vector<VideoCommon::SamplerId, INLINE_IMAGE_ELEMENTS> samplers;
+    boost::container::small_vector<VideoCommon::ImageViewInOut, INLINE_IMAGE_ELEMENTS> views;
+    boost::container::small_vector<VideoCommon::SamplerId, INLINE_IMAGE_ELEMENTS> samplers;
    views.reserve(num_image_elements);
    samplers.reserve(num_textures);

--- a/src/video_core/texture_cache/descriptor_table.h
+++ b/src/video_core/texture_cache/descriptor_table.h
@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later

@ -6,37 +9,39 @@
 #include <algorithm>
 #include <vector>

+#include "common/alignment.h"
 #include "common/common_types.h"
 #include "common/div_ceil.h"
+#include "common/assert.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"

 namespace VideoCommon {

-template <typename Descriptor>
+template <typename T>
 class DescriptorTable {
 public:
-    explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
-
-    [[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) {
-        [[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { return false; }
-        Refresh(gpu_addr, limit);
-        return true;
+    [[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) noexcept {
+        bool ret = !(current_gpu_addr == gpu_addr && current_limit == limit);
+        if (ret) {
+            Refresh(gpu_addr, limit);
+        }
+        return ret;
    }

    void Invalidate() noexcept {
        std::ranges::fill(read_descriptors, 0);
    }

-    [[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) {
+    [[nodiscard]] std::pair<T, bool> Read(Tegra::MemoryManager const& gpu_memory, u32 index) noexcept {
        DEBUG_ASSERT(index <= current_limit);
-        const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor);
-        std::pair<Descriptor, bool> result;
-        gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor));
-        if (IsDescriptorRead(index)) {
+        const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(T);
+        std::pair<T, bool> result;
+        gpu_memory.ReadBlockUnsafe(gpu_addr, std::addressof(result.first), sizeof(T));
+        if ((read_descriptors[index / 64] & (1ULL << (index % 64))) != 0) {
            result.second = result.first != descriptors[index];
        } else {
-            MarkDescriptorAsRead(index);
+            read_descriptors[index / 64] |= 1ULL << (index % 64);
            result.second = true;
        }
        if (result.second) {
@ -45,34 +50,24 @@ public:
        return result;
    }

-    [[nodiscard]] u32 Limit() const noexcept {
-        return current_limit;
-    }
-
-private:
-    void Refresh(GPUVAddr gpu_addr, u32 limit) {
+    void Refresh(GPUVAddr gpu_addr, u32 limit) noexcept {
        current_gpu_addr = gpu_addr;
        current_limit = limit;
-
-        const size_t num_descriptors = static_cast<size_t>(limit) + 1;
-        read_descriptors.clear();
-        read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0);
+        // Mario Brothership reallocates a lot of times, so use aggressive pre-alloc sizes
+        // std::vector<T> by default uses quadratic growth, but that isn't even enough to satisfy brothership
+        const size_t num_descriptors = ((limit + 0x80000) & (~0x7ffff)) + 1;
+        size_t old_size = read_descriptors.size();
+        read_descriptors.resize(Common::DivCeil(num_descriptors, 64U));
+        old_size = (std::min)(old_size, read_descriptors.size());
+        std::fill(read_descriptors.begin(), read_descriptors.begin() + old_size, 0);
+        //
        descriptors.resize(num_descriptors);
    }

-    void MarkDescriptorAsRead(u32 index) noexcept {
-        read_descriptors[index / 64] |= 1ULL << (index % 64);
-    }
-
-    [[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
-        return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
-    }
-
-    Tegra::MemoryManager& gpu_memory;
+    std::vector<u64> read_descriptors;
+    std::vector<T> descriptors;
    GPUVAddr current_gpu_addr{};
    u32 current_limit{};
-    std::vector<u64> read_descriptors;
-    std::vector<Descriptor> descriptors;
 };

 } // namespace VideoCommon
--- a/src/video_core/texture_cache/texture_cache.cpp
+++ b/src/video_core/texture_cache/texture_cache.cpp
@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: 2021 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later

@ -7,8 +10,8 @@
 namespace VideoCommon {

 TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept
-    : ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory},
-      compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {}
+    : ChannelInfo(state)
+{}

 template class VideoCommon::ChannelSetupCaches<VideoCommon::TextureCacheChannelInfo>;

--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@ -262,8 +262,7 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {

 template <class P>
 typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
-    const auto image_view_id = VisitImageView(channel_state->graphics_image_table,
-                                              channel_state->graphics_image_view_ids, index);
+    const auto image_view_id = VisitImageView(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, index);
    return slot_image_views[image_view_id];
 }

@ -275,14 +274,12 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
 template <class P>
 template <bool has_blacklists>
 void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
-    FillImageViews<has_blacklists>(channel_state->graphics_image_table,
-                                   channel_state->graphics_image_view_ids, views);
+    FillImageViews<has_blacklists>(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, views);
 }

 template <class P>
 void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
-    FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids,
-                         views);
+    FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids, views);
 }

 template <class P>
@ -361,29 +358,27 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {

 template <class P>
 SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) {
-    if (index > channel_state->graphics_sampler_table.Limit()) {
+    if (index > channel_state->graphics_sampler_table.current_limit) {
        LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
        return NULL_SAMPLER_ID;
    }
-    const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index);
+    const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(*gpu_memory, index);
    SamplerId& id = channel_state->graphics_sampler_ids[index];
-    if (is_new) {
+    if (is_new)
        id = FindSampler(descriptor);
-    }
    return id;
 }

 template <class P>
 SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) {
-    if (index > channel_state->compute_sampler_table.Limit()) {
+    if (index > channel_state->compute_sampler_table.current_limit) {
        LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
        return NULL_SAMPLER_ID;
    }
-    const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index);
+    const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(*gpu_memory, index);
    SamplerId& id = channel_state->compute_sampler_ids[index];
-    if (is_new) {
+    if (is_new)
        id = FindSampler(descriptor);
-    }
    return id;
 }

@ -399,19 +394,16 @@ typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept {

 template <class P>
 void TextureCache<P>::SynchronizeGraphicsDescriptors() {
-    using SamplerBinding = Tegra::Engines::Maxwell3D::Regs::SamplerBinding;
-    const bool linked_tsc = maxwell3d->regs.sampler_binding == SamplerBinding::ViaHeaderBinding;
+    const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding;
    const u32 tic_limit = maxwell3d->regs.tex_header.limit;
    const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
    bool bindings_changed = false;
-    if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(),
-                                                          tsc_limit)) {
-        channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+    if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) {
+        channel_state->graphics_sampler_ids.resize(tsc_limit + 1);
        bindings_changed = true;
    }
-    if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(),
-                                                        tic_limit)) {
-        channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+    if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) {
+        channel_state->graphics_image_view_ids.resize(tic_limit + 1);
        bindings_changed = true;
    }
    if (bindings_changed) {
@ -424,15 +416,13 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
    const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
    const u32 tic_limit = kepler_compute->regs.tic.limit;
    const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
-    const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
    bool bindings_changed = false;
-    if (channel_state->compute_sampler_table.Synchronize(tsc_gpu_addr, tsc_limit)) {
-        channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+    if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) {
+        channel_state->compute_sampler_ids.resize(tsc_limit + 1);
        bindings_changed = true;
    }
-    if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(),
-                                                       tic_limit)) {
-        channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+    if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) {
+        channel_state->compute_image_view_ids.resize(tic_limit + 1);
        bindings_changed = true;
    }
    if (bindings_changed) {
@ -640,14 +630,12 @@ void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
 }

 template <class P>
-ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
-                                            std::span<ImageViewId> cached_image_view_ids,
-                                            u32 index) {
-    if (index > table.Limit()) {
+ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, u32 index) {
+    if (index > table.current_limit) {
        LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
        return NULL_IMAGE_VIEW_ID;
    }
-    const auto [descriptor, is_new] = table.Read(index);
+    const auto [descriptor, is_new] = table.Read(*gpu_memory, index);
    ImageViewId& image_view_id = cached_image_view_ids[index];
    if (is_new) {
        image_view_id = FindImageView(descriptor);
@ -2086,8 +2074,7 @@ void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
        set.insert(id);
    };
    ankerl::unordered_dense::set<SamplerId> active;
-    active.reserve(channel_state->graphics_sampler_ids.size() +
-                   channel_state->compute_sampler_ids.size());
+    active.reserve(channel_state->graphics_sampler_ids.size() + channel_state->compute_sampler_ids.size());
    for (const SamplerId id : channel_state->graphics_sampler_ids) {
        mark_active(active, id);
    }
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@ -17,6 +17,7 @@
 #include <ankerl/unordered_dense.h>
 #include <vector>
 #include <boost/container/small_vector.hpp>
+#include <boost/container/static_vector.hpp>
 #include <queue>

 #include "common/common_types.h"
@ -76,22 +77,22 @@ public:
    TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
    TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;

-    DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
-    DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
-    std::vector<SamplerId> graphics_sampler_ids;
-    std::vector<ImageViewId> graphics_image_view_ids;
-
-    DescriptorTable<TICEntry> compute_image_table{gpu_memory};
-    DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
-    std::vector<SamplerId> compute_sampler_ids;
-    std::vector<ImageViewId> compute_image_view_ids;
+    DescriptorTable<TICEntry> graphics_image_table;
+    DescriptorTable<TSCEntry> graphics_sampler_table;
+    DescriptorTable<TICEntry> compute_image_table;
+    DescriptorTable<TSCEntry> compute_sampler_table;

    // TODO: still relies on bad iterators :(
    std::unordered_map<TICEntry, ImageViewId> image_views;
    std::unordered_map<TSCEntry, SamplerId> samplers;

-    TextureCacheGPUMap* gpu_page_table;
-    TextureCacheGPUMap* sparse_page_table;
+    std::vector<SamplerId> graphics_sampler_ids;
+    std::vector<SamplerId> compute_sampler_ids;
+    std::vector<ImageViewId> graphics_image_view_ids;
+    std::vector<ImageViewId> compute_image_view_ids;
+
+    TextureCacheGPUMap* gpu_page_table = nullptr;
+    TextureCacheGPUMap* sparse_page_table = nullptr;
 };

 template <class P>