[video_core] fix redundant resize-copy overload and just use default-init resize, to reduce stutter on Mario BP

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-04-22 08:56:48 +00:00 committed by crueter
parent 91058d7383
commit 807b521658
7 changed files with 80 additions and 104 deletions

View file

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
@ -52,7 +52,7 @@ public:
++id.index;
} while (id.index < size && !IsValid(bitset));
if (id.index == size) {
id.index = SlotId::INVALID_INDEX;
id = SlotId{};
}
}
return *this;
@ -141,7 +141,7 @@ public:
}
[[nodiscard]] Iterator end() noexcept {
return Iterator(this, SlotId{SlotId::INVALID_INDEX});
return Iterator(this, SlotId{});
}
[[nodiscard]] size_t size() const noexcept {

View file

@ -486,27 +486,17 @@ void TouchResource::ReadTouchInput() {
SanitizeInput(current_touch_state);
std::scoped_lock lock{*input_mutex};
if (current_touch_state.entry_count == previous_touch_state.entry_count) {
if (current_touch_state.entry_count < 1) {
return;
}
if (current_touch_state.entry_count == previous_touch_state.entry_count && current_touch_state.entry_count >= 1) {
bool has_moved = false;
for (std::size_t i = 0; i < static_cast<std::size_t>(current_touch_state.entry_count);
i++) {
s32 delta_x = std::abs(static_cast<s32>(current_touch_state.states[i].position.x) -
static_cast<s32>(previous_touch_state.states[i].position.x));
s32 delta_y = std::abs(static_cast<s32>(current_touch_state.states[i].position.y) -
static_cast<s32>(previous_touch_state.states[i].position.y));
if (delta_x > 1 || delta_y > 1) {
has_moved = true;
}
for (std::size_t i = 0; !has_moved && i < std::size_t(current_touch_state.entry_count); i++) {
s32 delta_x = std::abs(s32(current_touch_state.states[i].position.x) - s32(previous_touch_state.states[i].position.x));
s32 delta_y = std::abs(s32(current_touch_state.states[i].position.y) - s32(previous_touch_state.states[i].position.y));
has_moved |= (delta_x > 1 || delta_y > 1);
}
if (!has_moved) {
return;
if (has_moved) {
input_event->Signal();
}
}
input_event->Signal();
}
void TouchResource::OnTouchUpdate(s64 timestamp) {

View file

@ -314,8 +314,8 @@ void GraphicsPipeline::AddTransition(GraphicsPipeline* transition) {
template <typename Spec>
bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
small_vector<VideoCommon::ImageViewInOut, INLINE_IMAGE_ELEMENTS> views;
small_vector<VideoCommon::SamplerId, INLINE_IMAGE_ELEMENTS> samplers;
boost::container::small_vector<VideoCommon::ImageViewInOut, INLINE_IMAGE_ELEMENTS> views;
boost::container::small_vector<VideoCommon::SamplerId, INLINE_IMAGE_ELEMENTS> samplers;
views.reserve(num_image_elements);
samplers.reserve(num_textures);

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -6,37 +9,39 @@
#include <algorithm>
#include <vector>
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
#include "common/assert.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace VideoCommon {
template <typename Descriptor>
template <typename T>
class DescriptorTable {
public:
explicit DescriptorTable(Tegra::MemoryManager& gpu_memory_) : gpu_memory{gpu_memory_} {}
[[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) {
[[likely]] if (current_gpu_addr == gpu_addr && current_limit == limit) { return false; }
Refresh(gpu_addr, limit);
return true;
[[nodiscard]] bool Synchronize(GPUVAddr gpu_addr, u32 limit) noexcept {
bool ret = !(current_gpu_addr == gpu_addr && current_limit == limit);
if (ret) {
Refresh(gpu_addr, limit);
}
return ret;
}
void Invalidate() noexcept {
std::ranges::fill(read_descriptors, 0);
}
[[nodiscard]] std::pair<Descriptor, bool> Read(u32 index) {
[[nodiscard]] std::pair<T, bool> Read(Tegra::MemoryManager const& gpu_memory, u32 index) noexcept {
DEBUG_ASSERT(index <= current_limit);
const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(Descriptor);
std::pair<Descriptor, bool> result;
gpu_memory.ReadBlockUnsafe(gpu_addr, &result.first, sizeof(Descriptor));
if (IsDescriptorRead(index)) {
const GPUVAddr gpu_addr = current_gpu_addr + index * sizeof(T);
std::pair<T, bool> result;
gpu_memory.ReadBlockUnsafe(gpu_addr, std::addressof(result.first), sizeof(T));
if ((read_descriptors[index / 64] & (1ULL << (index % 64))) != 0) {
result.second = result.first != descriptors[index];
} else {
MarkDescriptorAsRead(index);
read_descriptors[index / 64] |= 1ULL << (index % 64);
result.second = true;
}
if (result.second) {
@ -45,34 +50,24 @@ public:
return result;
}
[[nodiscard]] u32 Limit() const noexcept {
return current_limit;
}
private:
void Refresh(GPUVAddr gpu_addr, u32 limit) {
void Refresh(GPUVAddr gpu_addr, u32 limit) noexcept {
current_gpu_addr = gpu_addr;
current_limit = limit;
const size_t num_descriptors = static_cast<size_t>(limit) + 1;
read_descriptors.clear();
read_descriptors.resize(Common::DivCeil(num_descriptors, 64U), 0);
// Mario Brothership reallocates a lot of times, so use aggressive pre-alloc sizes
// std::vector<T> by default uses quadratic growth, but that isn't even enough to satisfy brothership
const size_t num_descriptors = ((limit + 0x80000) & (~0x7ffff)) + 1;
size_t old_size = read_descriptors.size();
read_descriptors.resize(Common::DivCeil(num_descriptors, 64U));
old_size = (std::min)(old_size, read_descriptors.size());
std::fill(read_descriptors.begin(), read_descriptors.begin() + old_size, 0);
//
descriptors.resize(num_descriptors);
}
void MarkDescriptorAsRead(u32 index) noexcept {
read_descriptors[index / 64] |= 1ULL << (index % 64);
}
[[nodiscard]] bool IsDescriptorRead(u32 index) const noexcept {
return (read_descriptors[index / 64] & (1ULL << (index % 64))) != 0;
}
Tegra::MemoryManager& gpu_memory;
std::vector<u64> read_descriptors;
std::vector<T> descriptors;
GPUVAddr current_gpu_addr{};
u32 current_limit{};
std::vector<u64> read_descriptors;
std::vector<Descriptor> descriptors;
};
} // namespace VideoCommon

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@ -7,8 +10,8 @@
namespace VideoCommon {
TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept
: ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory},
compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {}
: ChannelInfo(state)
{}
template class VideoCommon::ChannelSetupCaches<VideoCommon::TextureCacheChannelInfo>;

View file

@ -262,8 +262,7 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
template <class P>
typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
const auto image_view_id = VisitImageView(channel_state->graphics_image_table,
channel_state->graphics_image_view_ids, index);
const auto image_view_id = VisitImageView(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, index);
return slot_image_views[image_view_id];
}
@ -275,14 +274,12 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
template <class P>
template <bool has_blacklists>
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
FillImageViews<has_blacklists>(channel_state->graphics_image_table,
channel_state->graphics_image_view_ids, views);
FillImageViews<has_blacklists>(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, views);
}
template <class P>
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids,
views);
FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids, views);
}
template <class P>
@ -361,29 +358,27 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
template <class P>
SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) {
if (index > channel_state->graphics_sampler_table.Limit()) {
if (index > channel_state->graphics_sampler_table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return NULL_SAMPLER_ID;
}
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index);
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(*gpu_memory, index);
SamplerId& id = channel_state->graphics_sampler_ids[index];
if (is_new) {
if (is_new)
id = FindSampler(descriptor);
}
return id;
}
template <class P>
SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) {
if (index > channel_state->compute_sampler_table.Limit()) {
if (index > channel_state->compute_sampler_table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return NULL_SAMPLER_ID;
}
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index);
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(*gpu_memory, index);
SamplerId& id = channel_state->compute_sampler_ids[index];
if (is_new) {
if (is_new)
id = FindSampler(descriptor);
}
return id;
}
@ -399,19 +394,16 @@ typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept {
template <class P>
void TextureCache<P>::SynchronizeGraphicsDescriptors() {
using SamplerBinding = Tegra::Engines::Maxwell3D::Regs::SamplerBinding;
const bool linked_tsc = maxwell3d->regs.sampler_binding == SamplerBinding::ViaHeaderBinding;
const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding;
const u32 tic_limit = maxwell3d->regs.tex_header.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
bool bindings_changed = false;
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(),
tsc_limit)) {
channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) {
channel_state->graphics_sampler_ids.resize(tsc_limit + 1);
bindings_changed = true;
}
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(),
tic_limit)) {
channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) {
channel_state->graphics_image_view_ids.resize(tic_limit + 1);
bindings_changed = true;
}
if (bindings_changed) {
@ -424,15 +416,13 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
const u32 tic_limit = kepler_compute->regs.tic.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
bool bindings_changed = false;
if (channel_state->compute_sampler_table.Synchronize(tsc_gpu_addr, tsc_limit)) {
channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) {
channel_state->compute_sampler_ids.resize(tsc_limit + 1);
bindings_changed = true;
}
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(),
tic_limit)) {
channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) {
channel_state->compute_image_view_ids.resize(tic_limit + 1);
bindings_changed = true;
}
if (bindings_changed) {
@ -640,14 +630,12 @@ void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
}
template <class P>
ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
u32 index) {
if (index > table.Limit()) {
ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, u32 index) {
if (index > table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
return NULL_IMAGE_VIEW_ID;
}
const auto [descriptor, is_new] = table.Read(index);
const auto [descriptor, is_new] = table.Read(*gpu_memory, index);
ImageViewId& image_view_id = cached_image_view_ids[index];
if (is_new) {
image_view_id = FindImageView(descriptor);
@ -2086,8 +2074,7 @@ void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
set.insert(id);
};
ankerl::unordered_dense::set<SamplerId> active;
active.reserve(channel_state->graphics_sampler_ids.size() +
channel_state->compute_sampler_ids.size());
active.reserve(channel_state->graphics_sampler_ids.size() + channel_state->compute_sampler_ids.size());
for (const SamplerId id : channel_state->graphics_sampler_ids) {
mark_active(active, id);
}

View file

@ -17,6 +17,7 @@
#include <ankerl/unordered_dense.h>
#include <vector>
#include <boost/container/small_vector.hpp>
#include <boost/container/static_vector.hpp>
#include <queue>
#include "common/common_types.h"
@ -76,22 +77,22 @@ public:
TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
std::vector<SamplerId> graphics_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
DescriptorTable<TICEntry> graphics_image_table;
DescriptorTable<TSCEntry> graphics_sampler_table;
DescriptorTable<TICEntry> compute_image_table;
DescriptorTable<TSCEntry> compute_sampler_table;
// TODO: still relies on bad iterators :(
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
TextureCacheGPUMap* gpu_page_table;
TextureCacheGPUMap* sparse_page_table;
std::vector<SamplerId> graphics_sampler_ids;
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
std::vector<ImageViewId> compute_image_view_ids;
TextureCacheGPUMap* gpu_page_table = nullptr;
TextureCacheGPUMap* sparse_page_table = nullptr;
};
template <class P>