use map for samplers

This commit is contained in:
lizzie 2026-04-26 01:37:48 +00:00
parent 549a428149
commit b22bfdb42b
9 changed files with 165 additions and 251 deletions

View file

@ -20,10 +20,14 @@
namespace Common {
struct SlotId {
static constexpr u32 TAGGED_MASK = 0x7fffffff;
static constexpr u32 TAGGED_VALUE = 0x80000000;
static constexpr u32 INVALID_INDEX = (std::numeric_limits<u32>::max)();
constexpr u32 Value() const noexcept {
return index & (~TAGGED_VALUE);
}
constexpr auto operator<=>(const SlotId&) const noexcept = default;
constexpr explicit operator bool() const noexcept {
return index != INVALID_INDEX;
}
@ -47,11 +51,11 @@ public:
Iterator& operator++() noexcept {
const u64* const bitset = slot_vector->stored_bitset.data();
const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
if (id.index < size) {
if (id.Value() < size) {
do {
++id.index;
} while (id.index < size && !IsValid(bitset));
if (id.index == size) {
} while (id.Value() < size && !IsValid(bitset));
if (id.Value() == size) {
id = SlotId{};
}
}
@ -85,7 +89,7 @@ public:
: slot_vector{slot_vector_}, id{id_} {}
bool IsValid(const u64* bitset) const noexcept {
return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
return ((bitset[id.Value() / 64] >> (id.Value() % 64)) & 1) != 0;
}
SlotVector<T>* slot_vector;
@ -107,12 +111,12 @@ public:
[[nodiscard]] T& operator[](SlotId id) noexcept {
ValidateIndex(id);
return values[id.index].object;
return values[id.Value()].object;
}
[[nodiscard]] const T& operator[](SlotId id) const noexcept {
ValidateIndex(id);
return values[id.index].object;
return values[id.Value()].object;
}
template <typename... Args>
@ -125,9 +129,9 @@ public:
}
void erase(SlotId id) noexcept {
values[id.index].object.~T();
free_list.push_back(id.index);
ResetStorageBit(id.index);
values[id.Value()].object.~T();
free_list.push_back(id.Value());
ResetStorageBit(id.Value());
}
[[nodiscard]] Iterator begin() noexcept {
@ -175,8 +179,8 @@ private:
void ValidateIndex(SlotId id) const noexcept {
DEBUG_ASSERT(id);
DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
DEBUG_ASSERT(id.Value() / 64 < stored_bitset.size());
DEBUG_ASSERT(((stored_bitset[id.Value() / 64] >> (id.Value() % 64)) & 1) != 0);
}
[[nodiscard]] u32 FreeValueIndex() noexcept {
@ -208,9 +212,7 @@ private:
const size_t old_free_size = free_list.size();
free_list.resize(old_free_size + (new_capacity - values_capacity));
std::iota(free_list.begin() + old_free_size, free_list.end(),
static_cast<u32>(values_capacity));
std::iota(free_list.begin() + old_free_size, free_list.end(), u32(values_capacity));
delete[] values;
values = new_values;
values_capacity = new_capacity;

View file

@ -90,7 +90,7 @@ void ComputePipeline::Configure() {
desc.is_written);
++ssbo_index;
}
texture_cache.SynchronizeComputeDescriptors();
texture_cache.SynchronizeDescriptors(true);
boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
@ -148,14 +148,14 @@ void ComputePipeline::Configure() {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
VideoCommon::SamplerId sampler = texture_cache.GetSamplerId(handle.second, true);
samplers.push_back(sampler);
}
}
for (const auto& desc : info.image_descriptors) {
add_image(desc, desc.is_written);
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
texture_cache.FillImageViews(std::span(views.data(), views.size()), true);
if (!is_built) {
WaitForBuild();

View file

@ -283,7 +283,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
size_t views_index{};
size_t samplers_index{};
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.SynchronizeDescriptors(false);
buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
@ -354,7 +354,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto handle{read_handle(desc, index)};
views[views_index++] = {handle.first};
VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
VideoCommon::SamplerId sampler{texture_cache.GetSamplerId(handle.second, false)};
samplers[samplers_index++] = sampler;
}
}
@ -379,7 +379,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
config_stage(4);
}
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views_index));
texture_cache.FillImageViews(std::span(views.data(), views_index), false, Spec::has_images);
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());

View file

@ -353,13 +353,13 @@ void RasterizerOpenGL::DrawTexture() {
gpu.TickWork();
};
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.SynchronizeDescriptors(false);
texture_cache.UpdateRenderTargets(false);
SyncState();
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
const auto& sampler = texture_cache.GetSampler(draw_texture_state.src_sampler, true);
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
const auto Scale = [&](auto dim) -> s32 {

View file

@ -125,7 +125,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
++ssbo_index;
}
texture_cache.SynchronizeComputeDescriptors();
texture_cache.SynchronizeDescriptors(true);
static constexpr size_t max_elements = 64;
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
@ -174,14 +174,14 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
VideoCommon::SamplerId sampler = texture_cache.GetSamplerId(handle.second, true);
samplers.push_back(sampler);
}
}
for (const auto& desc : info.image_descriptors) {
add_image(desc, desc.is_written);
}
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
texture_cache.FillImageViews(std::span(views.data(), views.size()), true);
buffer_cache.UnbindComputeTextureBuffers();
size_t index{};

View file

@ -319,7 +319,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
views.reserve(num_image_elements);
samplers.reserve(num_textures);
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.SynchronizeDescriptors(false);
buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
@ -384,7 +384,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto handle{read_handle(desc, index)};
views.push_back({handle.first});
VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
VideoCommon::SamplerId sampler{texture_cache.GetSamplerId(handle.second, false)};
samplers.push_back(sampler);
}
}
@ -413,7 +413,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
ASSERT(views.size() == num_image_elements);
ASSERT(samplers.size() == num_textures);
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views.size()));
texture_cache.FillImageViews(std::span(views.data(), views.size()), false, Spec::has_images);
VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {

View file

@ -354,7 +354,7 @@ void RasterizerVulkan::DrawTexture() {
FlushWork();
std::scoped_lock l{texture_cache.mutex};
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.SynchronizeDescriptors(false);
texture_cache.UpdateRenderTargets(false);
UpdateDynamicStates();
@ -363,7 +363,7 @@ void RasterizerVulkan::DrawTexture() {
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
const auto& sampler = texture_cache.GetSampler(draw_texture_state.src_sampler, true);
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
const auto* framebuffer = texture_cache.GetFramebuffer();

View file

@ -14,6 +14,7 @@
#include "common/alignment.h"
#include "common/settings.h"
#include "common/slot_vector.h"
#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
@ -262,7 +263,8 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
template <class P>
typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
const auto image_view_id = VisitImageView(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, index);
// Not compute!
const auto image_view_id = VisitImageView(index, false);
return slot_image_views[image_view_id];
}
@ -272,14 +274,25 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
}
template <class P>
template <bool has_blacklists>
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
FillImageViews<has_blacklists>(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, views);
}
template <class P>
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids, views);
void TextureCache<P>::FillImageViews(std::span<ImageViewInOut> views, bool compute, bool blacklist) {
bool has_blacklisted = false;
do {
has_deleted_images = false;
if (blacklist) {
has_blacklisted = false;
}
for (ImageViewInOut& view : views) {
view.id = VisitImageView(view.index, compute);
if (blacklist) {
if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
const ImageViewBase& image_view = slot_image_views[view.id];
auto& image = slot_images[image_view.image_id];
has_blacklisted |= ScaleDown(image);
image.scale_rating = 0;
}
}
}
} while (has_deleted_images || (blacklist && has_blacklisted));
}
template <class P>
@ -347,39 +360,24 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
}
template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
return &slot_samplers[GetGraphicsSamplerId(index)];
typename P::Sampler* TextureCache<P>::GetSampler(u32 index, bool compute) {
return &slot_samplers[GetSamplerId(index, compute)];
}
template <class P>
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
return &slot_samplers[GetComputeSamplerId(index)];
}
template <class P>
SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) {
if (index > channel_state->graphics_sampler_table.current_limit) {
SamplerId TextureCache<P>::GetSamplerId(u32 index, bool compute) {
auto& table = compute ? channel_state->compute_sampler_table : channel_state->graphics_sampler_table;
if (index > table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return NULL_SAMPLER_ID;
}
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(*gpu_memory, index);
SamplerId& id = channel_state->graphics_sampler_ids[index];
if (is_new)
id = FindSampler(descriptor);
return id;
}
template <class P>
SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) {
if (index > channel_state->compute_sampler_table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return NULL_SAMPLER_ID;
auto const [descriptor, is_new] = table.Read(*gpu_memory, index);
if (is_new) {
auto const id = FindSampler(descriptor, compute);
channel_state->sampler_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), id);
return id;
}
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(*gpu_memory, index);
SamplerId& id = channel_state->compute_sampler_ids[index];
if (is_new)
id = FindSampler(descriptor);
return id;
return channel_state->sampler_ids.find(index | (compute ? Common::SlotId::TAGGED_VALUE : 0))->second;
}
template <class P>
@ -393,40 +391,31 @@ typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept {
}
template <class P>
void TextureCache<P>::SynchronizeGraphicsDescriptors() {
const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding;
const u32 tic_limit = maxwell3d->regs.tex_header.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
bool bindings_changed = false;
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) {
channel_state->graphics_sampler_ids.resize(tsc_limit + 1);
bindings_changed = true;
}
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) {
channel_state->graphics_image_view_ids.resize(tic_limit + 1);
bindings_changed = true;
}
if (bindings_changed) {
++texture_bindings_serial;
}
}
template <class P>
void TextureCache<P>::SynchronizeComputeDescriptors() {
const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
const u32 tic_limit = kepler_compute->regs.tic.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
bool bindings_changed = false;
if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) {
channel_state->compute_sampler_ids.resize(tsc_limit + 1);
bindings_changed = true;
}
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) {
channel_state->compute_image_view_ids.resize(tic_limit + 1);
bindings_changed = true;
}
if (bindings_changed) {
++texture_bindings_serial;
void TextureCache<P>::SynchronizeDescriptors(bool compute) {
if (compute) {
const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
const u32 tic_limit = kepler_compute->regs.tic.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
bool bindings_changed = false;
if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit))
bindings_changed = true;
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit))
bindings_changed = true;
if (bindings_changed) {
++texture_bindings_serial;
}
} else {
const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding;
const u32 tic_limit = maxwell3d->regs.tex_header.limit;
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
bool bindings_changed = false;
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit))
bindings_changed = true;
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit))
bindings_changed = true;
if (bindings_changed) {
++texture_bindings_serial;
}
}
}
@ -605,45 +594,30 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
}
template <class P>
template <bool has_blacklists>
void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
std::span<ImageViewInOut> views) {
bool has_blacklisted = false;
do {
has_deleted_images = false;
if constexpr (has_blacklists) {
has_blacklisted = false;
}
for (ImageViewInOut& view : views) {
view.id = VisitImageView(table, cached_image_view_ids, view.index);
if constexpr (has_blacklists) {
if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
const ImageViewBase& image_view{slot_image_views[view.id]};
auto& image = slot_images[image_view.image_id];
has_blacklisted |= ScaleDown(image);
image.scale_rating = 0;
}
}
}
} while (has_deleted_images || (has_blacklists && has_blacklisted));
}
template <class P>
ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, u32 index) {
ImageViewId TextureCache<P>::VisitImageView(u32 index, bool compute) {
auto& table = compute ? channel_state->compute_image_table : channel_state->graphics_image_table;
if (index > table.current_limit) {
LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
return NULL_IMAGE_VIEW_ID;
}
const auto [descriptor, is_new] = table.Read(*gpu_memory, index);
ImageViewId& image_view_id = cached_image_view_ids[index];
// Is new (on the tegra engine side)?
auto const [descriptor, is_new] = table.Read(*gpu_memory, index);
if (is_new) {
image_view_id = FindImageView(descriptor);
if (IsValidEntry(*gpu_memory, descriptor)) {
// Is new (registered view) on the texture cache side?
const auto [pair, is_new_tc] = channel_state->image_views.try_emplace(descriptor);
if (is_new_tc)
pair->second = CreateImageView(descriptor);
PrepareImageView(pair->second, false, false);
channel_state->image_view_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), pair->second);
return pair->second;
}
channel_state->image_view_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), NULL_IMAGE_VIEW_ID);
return NULL_IMAGE_VIEW_ID;
}
if (image_view_id != NULL_IMAGE_VIEW_ID) {
PrepareImageView(image_view_id, false, false);
}
return image_view_id;
auto const it = channel_state->image_view_ids.find(index | (compute ? Common::SlotId::TAGGED_VALUE : 0));
PrepareImageView(it->second, false, false);
return it->second;
}
template <class P>
@ -1245,19 +1219,6 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
}
}
template <class P>
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
if (!IsValidEntry(*gpu_memory, config)) {
return NULL_IMAGE_VIEW_ID;
}
const auto [pair, is_new] = channel_state->image_views.try_emplace(config);
ImageViewId& image_view_id = pair->second;
if (is_new) {
image_view_id = CreateImageView(config);
}
return image_view_id;
}
template <class P>
ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
const ImageInfo info(config);
@ -1399,10 +1360,10 @@ void TextureCache<P>::InvalidateScale(Image& image) {
image.image_view_infos.clear();
for (size_t c : active_channel_ids) {
auto& channel_info = channel_storage[c];
if constexpr (ENABLE_VALIDATION) {
std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID);
std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID);
}
if constexpr (ENABLE_VALIDATION)
for (auto& e : channel_info.image_view_ids)
e.second = CORRUPT_ID;
channel_info.graphics_image_table.Invalidate();
channel_info.compute_image_table.Invalidate();
}
@ -2021,7 +1982,7 @@ std::pair<u32, u32> TextureCache<P>::PrepareDmaImage(ImageId dst_id, GPUVAddr ba
}
template <class P>
SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
SamplerId TextureCache<P>::FindSampler(const TSCEntry& config, bool compute) {
if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
return NULL_SAMPLER_ID;
}
@ -2044,68 +2005,48 @@ std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
template <class P>
void TextureCache<P>::EnforceSamplerBudget() {
const auto budget = QuerySamplerBudget();
if (!budget) {
return;
if (auto const budget = QuerySamplerBudget(); budget) {
if (slot_samplers.size() < *budget) {
return;
}
if (!channel_state) {
return;
}
if (last_sampler_gc_frame == frame_tick) {
return;
}
last_sampler_gc_frame = frame_tick;
TrimInactiveSamplers(*budget);
}
if (slot_samplers.size() < *budget) {
return;
}
if (!channel_state) {
return;
}
if (last_sampler_gc_frame == frame_tick) {
return;
}
last_sampler_gc_frame = frame_tick;
TrimInactiveSamplers(*budget);
}
template <class P>
void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
if (channel_state->samplers.empty()) {
return;
}
constexpr size_t SAMPLER_GC_SLACK = 1024;
auto mark_active = [](auto& set, SamplerId id) {
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
return;
if (channel_state->samplers.size() > 0) {
constexpr size_t SAMPLER_GC_SLACK = 1024;
ankerl::unordered_dense::set<SamplerId> active_sampler_ids;
for (auto const& e : channel_state->sampler_ids)
active_sampler_ids.insert(e.second);
// Elements in the map must be necesarily valid
size_t removed = 0;
for (auto it = channel_state->samplers.begin(); it != channel_state->samplers.end();) {
const SamplerId sampler_id = it->second;
if (!sampler_id || sampler_id == CORRUPT_ID) {
it = channel_state->samplers.erase(it);
} else if (std::ranges::find(active_sampler_ids, sampler_id) != active_sampler_ids.end()) {
++it;
} else {
slot_samplers.erase(sampler_id);
it = channel_state->samplers.erase(it);
++removed;
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
break;
}
}
}
set.insert(id);
};
ankerl::unordered_dense::set<SamplerId> active;
active.reserve(channel_state->graphics_sampler_ids.size() + channel_state->compute_sampler_ids.size());
for (const SamplerId id : channel_state->graphics_sampler_ids) {
mark_active(active, id);
}
for (const SamplerId id : channel_state->compute_sampler_ids) {
mark_active(active, id);
}
size_t removed = 0;
auto& sampler_map = channel_state->samplers;
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
const SamplerId sampler_id = it->second;
if (!sampler_id || sampler_id == CORRUPT_ID) {
it = sampler_map.erase(it);
continue;
if (removed != 0) {
LOG_WARNING(HW_GPU, "Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers", budget, removed);
}
if (active.find(sampler_id) != active.end()) {
++it;
continue;
}
slot_samplers.erase(sampler_id);
it = sampler_map.erase(it);
++removed;
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
break;
}
}
if (removed != 0) {
LOG_WARNING(HW_GPU,
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
budget, removed);
}
}
@ -2345,8 +2286,7 @@ ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const Imag
if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
return image_view_id;
}
const ImageViewId image_view_id =
slot_image_views.insert(runtime, info, image_id, image, slot_images);
const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image, slot_images);
image.InsertView(info, image_view_id);
return image_view_id;
}
@ -2606,10 +2546,9 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
}
for (size_t c : active_channel_ids) {
auto& channel_info = channel_storage[c];
if constexpr (ENABLE_VALIDATION) {
std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID);
std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID);
}
if constexpr (ENABLE_VALIDATION)
for (auto& e : channel_info.image_view_ids)
e.second = CORRUPT_ID;
channel_info.graphics_image_table.Invalidate();
channel_info.compute_image_table.Invalidate();
}

View file

@ -86,12 +86,8 @@ public:
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
// Values tuned for Mario Brothership, see also descriptor_table.h
// Change values as required.
boost::container::static_vector<SamplerId, 0x1000 + 1> graphics_sampler_ids;
boost::container::static_vector<SamplerId, 0x1000 + 1> compute_sampler_ids;
boost::container::static_vector<ImageViewId, 0x80000 + 1> graphics_image_view_ids;
boost::container::static_vector<ImageViewId, 0x80000 + 1> compute_image_view_ids;
ankerl::unordered_dense::map<u32, SamplerId> sampler_ids;
ankerl::unordered_dense::map<u32, ImageViewId> image_view_ids;
TextureCacheGPUMap* gpu_page_table = nullptr;
TextureCacheGPUMap* sparse_page_table = nullptr;
@ -170,27 +166,17 @@ public:
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;
/// Fill image_view_ids with the graphics images in indices
template <bool has_blacklists>
void FillGraphicsImageViews(std::span<ImageViewInOut> views);
/// Fill image_view_ids with the compute images in indices
void FillComputeImageViews(std::span<ImageViewInOut> views);
/// Fill image_view_ids with the graphics/compute images in indices
void FillImageViews(std::span<ImageViewInOut> views, bool compute, bool blacklist = true);
/// Handle feedback loops during draws.
void CheckFeedbackLoop(std::span<const ImageViewInOut> views);
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
/// Get the sampler from the graphics/compute descriptor table in the specified index
Sampler* GetSampler(u32 index, bool compute);
/// Get the sampler from the compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index);
/// Get the sampler id from the graphics descriptor table in the specified index
SamplerId GetGraphicsSamplerId(u32 index);
/// Get the sampler id from the compute descriptor table in the specified index
SamplerId GetComputeSamplerId(u32 index);
/// Get the sampler id from the graphics/compute descriptor table in the specified index
SamplerId GetSamplerId(u32 index, bool compute);
/// Return a constant reference to the given sampler id
[[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept;
@ -198,11 +184,8 @@ public:
/// Return a reference to the given sampler id
[[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
/// Refresh the state for compute image view and sampler descriptors
void SynchronizeComputeDescriptors();
/// Refresh the state for graphics/compute image view and sampler descriptors
void SynchronizeDescriptors(bool compute);
/// Updates the Render Targets if they can be rescaled
/// @retval True if the Render Targets have been rescaled.
@ -313,15 +296,8 @@ private:
/// Runs the Garbage Collector.
void RunGarbageCollector();
/// Fills image_view_ids in the image views in indices
template <bool has_blacklists>
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
std::span<ImageViewInOut> views);
/// Find or create an image view in the guest descriptor table
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, u32 index);
ImageViewId VisitImageView(u32 index, bool compute);
/// Find or create a framebuffer with the given render target parameters
FramebufferId GetFramebufferId(const RenderTargets& key);
@ -333,9 +309,6 @@ private:
template <typename StagingBuffer>
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
/// Find or create an image view from a guest descriptor
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
/// Create a new image view from a guest descriptor
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
@ -363,7 +336,7 @@ private:
const Tegra::Engines::Fermi2D::Config& copy);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config, bool compute);
/// Find or create an image view for the given color buffer index
[[nodiscard]] ImageViewId FindColorBuffer(size_t index);