mirror of
https://git.eden-emu.dev/eden-emu/eden
synced 2026-04-26 09:14:09 +00:00
use map for samplers
This commit is contained in:
parent
549a428149
commit
b22bfdb42b
|
|
@ -20,10 +20,14 @@
|
|||
namespace Common {
|
||||
|
||||
struct SlotId {
|
||||
static constexpr u32 TAGGED_MASK = 0x7fffffff;
|
||||
static constexpr u32 TAGGED_VALUE = 0x80000000;
|
||||
static constexpr u32 INVALID_INDEX = (std::numeric_limits<u32>::max)();
|
||||
|
||||
constexpr u32 Value() const noexcept {
|
||||
return index & (~TAGGED_VALUE);
|
||||
}
|
||||
constexpr auto operator<=>(const SlotId&) const noexcept = default;
|
||||
|
||||
constexpr explicit operator bool() const noexcept {
|
||||
return index != INVALID_INDEX;
|
||||
}
|
||||
|
|
@ -47,11 +51,11 @@ public:
|
|||
Iterator& operator++() noexcept {
|
||||
const u64* const bitset = slot_vector->stored_bitset.data();
|
||||
const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
|
||||
if (id.index < size) {
|
||||
if (id.Value() < size) {
|
||||
do {
|
||||
++id.index;
|
||||
} while (id.index < size && !IsValid(bitset));
|
||||
if (id.index == size) {
|
||||
} while (id.Value() < size && !IsValid(bitset));
|
||||
if (id.Value() == size) {
|
||||
id = SlotId{};
|
||||
}
|
||||
}
|
||||
|
|
@ -85,7 +89,7 @@ public:
|
|||
: slot_vector{slot_vector_}, id{id_} {}
|
||||
|
||||
bool IsValid(const u64* bitset) const noexcept {
|
||||
return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
|
||||
return ((bitset[id.Value() / 64] >> (id.Value() % 64)) & 1) != 0;
|
||||
}
|
||||
|
||||
SlotVector<T>* slot_vector;
|
||||
|
|
@ -107,12 +111,12 @@ public:
|
|||
|
||||
[[nodiscard]] T& operator[](SlotId id) noexcept {
|
||||
ValidateIndex(id);
|
||||
return values[id.index].object;
|
||||
return values[id.Value()].object;
|
||||
}
|
||||
|
||||
[[nodiscard]] const T& operator[](SlotId id) const noexcept {
|
||||
ValidateIndex(id);
|
||||
return values[id.index].object;
|
||||
return values[id.Value()].object;
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
|
|
@ -125,9 +129,9 @@ public:
|
|||
}
|
||||
|
||||
void erase(SlotId id) noexcept {
|
||||
values[id.index].object.~T();
|
||||
free_list.push_back(id.index);
|
||||
ResetStorageBit(id.index);
|
||||
values[id.Value()].object.~T();
|
||||
free_list.push_back(id.Value());
|
||||
ResetStorageBit(id.Value());
|
||||
}
|
||||
|
||||
[[nodiscard]] Iterator begin() noexcept {
|
||||
|
|
@ -175,8 +179,8 @@ private:
|
|||
|
||||
void ValidateIndex(SlotId id) const noexcept {
|
||||
DEBUG_ASSERT(id);
|
||||
DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
|
||||
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
|
||||
DEBUG_ASSERT(id.Value() / 64 < stored_bitset.size());
|
||||
DEBUG_ASSERT(((stored_bitset[id.Value() / 64] >> (id.Value() % 64)) & 1) != 0);
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 FreeValueIndex() noexcept {
|
||||
|
|
@ -208,9 +212,7 @@ private:
|
|||
|
||||
const size_t old_free_size = free_list.size();
|
||||
free_list.resize(old_free_size + (new_capacity - values_capacity));
|
||||
std::iota(free_list.begin() + old_free_size, free_list.end(),
|
||||
static_cast<u32>(values_capacity));
|
||||
|
||||
std::iota(free_list.begin() + old_free_size, free_list.end(), u32(values_capacity));
|
||||
delete[] values;
|
||||
values = new_values;
|
||||
values_capacity = new_capacity;
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ void ComputePipeline::Configure() {
|
|||
desc.is_written);
|
||||
++ssbo_index;
|
||||
}
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
texture_cache.SynchronizeDescriptors(true);
|
||||
|
||||
boost::container::static_vector<VideoCommon::ImageViewInOut, MAX_TEXTURES + MAX_IMAGES> views;
|
||||
boost::container::static_vector<VideoCommon::SamplerId, MAX_TEXTURES> samplers;
|
||||
|
|
@ -148,14 +148,14 @@ void ComputePipeline::Configure() {
|
|||
const auto handle{read_handle(desc, index)};
|
||||
views.push_back({handle.first});
|
||||
|
||||
VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
|
||||
VideoCommon::SamplerId sampler = texture_cache.GetSamplerId(handle.second, true);
|
||||
samplers.push_back(sampler);
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
add_image(desc, desc.is_written);
|
||||
}
|
||||
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
|
||||
texture_cache.FillImageViews(std::span(views.data(), views.size()), true);
|
||||
|
||||
if (!is_built) {
|
||||
WaitForBuild();
|
||||
|
|
|
|||
|
|
@ -283,7 +283,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
size_t views_index{};
|
||||
size_t samplers_index{};
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
texture_cache.SynchronizeDescriptors(false);
|
||||
|
||||
buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
|
||||
buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
|
||||
|
|
@ -354,7 +354,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
const auto handle{read_handle(desc, index)};
|
||||
views[views_index++] = {handle.first};
|
||||
|
||||
VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
|
||||
VideoCommon::SamplerId sampler{texture_cache.GetSamplerId(handle.second, false)};
|
||||
samplers[samplers_index++] = sampler;
|
||||
}
|
||||
}
|
||||
|
|
@ -379,7 +379,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
if constexpr (Spec::enabled_stages[4]) {
|
||||
config_stage(4);
|
||||
}
|
||||
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views_index));
|
||||
texture_cache.FillImageViews(std::span(views.data(), views_index), false, Spec::has_images);
|
||||
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
|
|
|
|||
|
|
@ -353,13 +353,13 @@ void RasterizerOpenGL::DrawTexture() {
|
|||
gpu.TickWork();
|
||||
};
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
texture_cache.SynchronizeDescriptors(false);
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
|
||||
SyncState();
|
||||
|
||||
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
|
||||
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
|
||||
const auto& sampler = texture_cache.GetSampler(draw_texture_state.src_sampler, true);
|
||||
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
|
||||
|
||||
const auto Scale = [&](auto dim) -> s32 {
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||
++ssbo_index;
|
||||
}
|
||||
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
texture_cache.SynchronizeDescriptors(true);
|
||||
|
||||
static constexpr size_t max_elements = 64;
|
||||
boost::container::static_vector<VideoCommon::ImageViewInOut, max_elements> views;
|
||||
|
|
@ -174,14 +174,14 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||
const auto handle{read_handle(desc, index)};
|
||||
views.push_back({handle.first});
|
||||
|
||||
VideoCommon::SamplerId sampler = texture_cache.GetComputeSamplerId(handle.second);
|
||||
VideoCommon::SamplerId sampler = texture_cache.GetSamplerId(handle.second, true);
|
||||
samplers.push_back(sampler);
|
||||
}
|
||||
}
|
||||
for (const auto& desc : info.image_descriptors) {
|
||||
add_image(desc, desc.is_written);
|
||||
}
|
||||
texture_cache.FillComputeImageViews(std::span(views.data(), views.size()));
|
||||
texture_cache.FillImageViews(std::span(views.data(), views.size()), true);
|
||||
|
||||
buffer_cache.UnbindComputeTextureBuffers();
|
||||
size_t index{};
|
||||
|
|
|
|||
|
|
@ -319,7 +319,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
views.reserve(num_image_elements);
|
||||
samplers.reserve(num_textures);
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
texture_cache.SynchronizeDescriptors(false);
|
||||
|
||||
buffer_cache.SetUniformBuffersState(enabled_uniform_buffer_masks, &uniform_buffer_sizes);
|
||||
|
||||
|
|
@ -384,7 +384,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
const auto handle{read_handle(desc, index)};
|
||||
views.push_back({handle.first});
|
||||
|
||||
VideoCommon::SamplerId sampler{texture_cache.GetGraphicsSamplerId(handle.second)};
|
||||
VideoCommon::SamplerId sampler{texture_cache.GetSamplerId(handle.second, false)};
|
||||
samplers.push_back(sampler);
|
||||
}
|
||||
}
|
||||
|
|
@ -413,7 +413,7 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
}
|
||||
ASSERT(views.size() == num_image_elements);
|
||||
ASSERT(samplers.size() == num_textures);
|
||||
texture_cache.FillGraphicsImageViews<Spec::has_images>(std::span(views.data(), views.size()));
|
||||
texture_cache.FillImageViews(std::span(views.data(), views.size()), false, Spec::has_images);
|
||||
|
||||
VideoCommon::ImageViewInOut* texture_buffer_it{views.data()};
|
||||
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
|
||||
|
|
|
|||
|
|
@ -354,7 +354,7 @@ void RasterizerVulkan::DrawTexture() {
|
|||
FlushWork();
|
||||
|
||||
std::scoped_lock l{texture_cache.mutex};
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
texture_cache.SynchronizeDescriptors(false);
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
|
||||
UpdateDynamicStates();
|
||||
|
|
@ -363,7 +363,7 @@ void RasterizerVulkan::DrawTexture() {
|
|||
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
|
||||
maxwell3d->regs.zpass_pixel_count_enable);
|
||||
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
|
||||
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
|
||||
const auto& sampler = texture_cache.GetSampler(draw_texture_state.src_sampler, true);
|
||||
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
|
||||
const auto* framebuffer = texture_cache.GetFramebuffer();
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "common/alignment.h"
|
||||
#include "common/settings.h"
|
||||
#include "common/slot_vector.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
|
|
@ -262,7 +263,8 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
|
|||
|
||||
template <class P>
|
||||
typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
|
||||
const auto image_view_id = VisitImageView(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, index);
|
||||
// Not compute!
|
||||
const auto image_view_id = VisitImageView(index, false);
|
||||
return slot_image_views[image_view_id];
|
||||
}
|
||||
|
||||
|
|
@ -272,14 +274,25 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
|
|||
}
|
||||
|
||||
template <class P>
|
||||
template <bool has_blacklists>
|
||||
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
|
||||
FillImageViews<has_blacklists>(channel_state->graphics_image_table, channel_state->graphics_image_view_ids, views);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
|
||||
FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids, views);
|
||||
void TextureCache<P>::FillImageViews(std::span<ImageViewInOut> views, bool compute, bool blacklist) {
|
||||
bool has_blacklisted = false;
|
||||
do {
|
||||
has_deleted_images = false;
|
||||
if (blacklist) {
|
||||
has_blacklisted = false;
|
||||
}
|
||||
for (ImageViewInOut& view : views) {
|
||||
view.id = VisitImageView(view.index, compute);
|
||||
if (blacklist) {
|
||||
if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
|
||||
const ImageViewBase& image_view = slot_image_views[view.id];
|
||||
auto& image = slot_images[image_view.image_id];
|
||||
has_blacklisted |= ScaleDown(image);
|
||||
image.scale_rating = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (has_deleted_images || (blacklist && has_blacklisted));
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
|
@ -347,39 +360,24 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
|
|||
}
|
||||
|
||||
template <class P>
|
||||
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
|
||||
return &slot_samplers[GetGraphicsSamplerId(index)];
|
||||
typename P::Sampler* TextureCache<P>::GetSampler(u32 index, bool compute) {
|
||||
return &slot_samplers[GetSamplerId(index, compute)];
|
||||
}
|
||||
|
||||
template <class P>
|
||||
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
|
||||
return &slot_samplers[GetComputeSamplerId(index)];
|
||||
}
|
||||
|
||||
template <class P>
|
||||
SamplerId TextureCache<P>::GetGraphicsSamplerId(u32 index) {
|
||||
if (index > channel_state->graphics_sampler_table.current_limit) {
|
||||
SamplerId TextureCache<P>::GetSamplerId(u32 index, bool compute) {
|
||||
auto& table = compute ? channel_state->compute_sampler_table : channel_state->graphics_sampler_table;
|
||||
if (index > table.current_limit) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
|
||||
return NULL_SAMPLER_ID;
|
||||
}
|
||||
const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(*gpu_memory, index);
|
||||
SamplerId& id = channel_state->graphics_sampler_ids[index];
|
||||
if (is_new)
|
||||
id = FindSampler(descriptor);
|
||||
return id;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
SamplerId TextureCache<P>::GetComputeSamplerId(u32 index) {
|
||||
if (index > channel_state->compute_sampler_table.current_limit) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
|
||||
return NULL_SAMPLER_ID;
|
||||
auto const [descriptor, is_new] = table.Read(*gpu_memory, index);
|
||||
if (is_new) {
|
||||
auto const id = FindSampler(descriptor, compute);
|
||||
channel_state->sampler_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), id);
|
||||
return id;
|
||||
}
|
||||
const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(*gpu_memory, index);
|
||||
SamplerId& id = channel_state->compute_sampler_ids[index];
|
||||
if (is_new)
|
||||
id = FindSampler(descriptor);
|
||||
return id;
|
||||
return channel_state->sampler_ids.find(index | (compute ? Common::SlotId::TAGGED_VALUE : 0))->second;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
|
@ -393,40 +391,31 @@ typename P::Sampler& TextureCache<P>::GetSampler(SamplerId id) noexcept {
|
|||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::SynchronizeGraphicsDescriptors() {
|
||||
const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding;
|
||||
const u32 tic_limit = maxwell3d->regs.tex_header.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
|
||||
bool bindings_changed = false;
|
||||
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) {
|
||||
channel_state->graphics_sampler_ids.resize(tsc_limit + 1);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) {
|
||||
channel_state->graphics_image_view_ids.resize(tic_limit + 1);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (bindings_changed) {
|
||||
++texture_bindings_serial;
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::SynchronizeComputeDescriptors() {
|
||||
const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
|
||||
const u32 tic_limit = kepler_compute->regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
|
||||
bool bindings_changed = false;
|
||||
if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit)) {
|
||||
channel_state->compute_sampler_ids.resize(tsc_limit + 1);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) {
|
||||
channel_state->compute_image_view_ids.resize(tic_limit + 1);
|
||||
bindings_changed = true;
|
||||
}
|
||||
if (bindings_changed) {
|
||||
++texture_bindings_serial;
|
||||
void TextureCache<P>::SynchronizeDescriptors(bool compute) {
|
||||
if (compute) {
|
||||
const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
|
||||
const u32 tic_limit = kepler_compute->regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
|
||||
bool bindings_changed = false;
|
||||
if (channel_state->compute_sampler_table.Synchronize(kepler_compute->regs.tsc.Address(), tsc_limit))
|
||||
bindings_changed = true;
|
||||
if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit))
|
||||
bindings_changed = true;
|
||||
if (bindings_changed) {
|
||||
++texture_bindings_serial;
|
||||
}
|
||||
} else {
|
||||
const bool linked_tsc = maxwell3d->regs.sampler_binding == Tegra::Engines::Maxwell3D::Regs::SamplerBinding::ViaHeaderBinding;
|
||||
const u32 tic_limit = maxwell3d->regs.tex_header.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit;
|
||||
bool bindings_changed = false;
|
||||
if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit))
|
||||
bindings_changed = true;
|
||||
if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit))
|
||||
bindings_changed = true;
|
||||
if (bindings_changed) {
|
||||
++texture_bindings_serial;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -605,45 +594,30 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
|
|||
}
|
||||
|
||||
template <class P>
|
||||
template <bool has_blacklists>
|
||||
void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
|
||||
std::span<ImageViewId> cached_image_view_ids,
|
||||
std::span<ImageViewInOut> views) {
|
||||
bool has_blacklisted = false;
|
||||
do {
|
||||
has_deleted_images = false;
|
||||
if constexpr (has_blacklists) {
|
||||
has_blacklisted = false;
|
||||
}
|
||||
for (ImageViewInOut& view : views) {
|
||||
view.id = VisitImageView(table, cached_image_view_ids, view.index);
|
||||
if constexpr (has_blacklists) {
|
||||
if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
|
||||
const ImageViewBase& image_view{slot_image_views[view.id]};
|
||||
auto& image = slot_images[image_view.image_id];
|
||||
has_blacklisted |= ScaleDown(image);
|
||||
image.scale_rating = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (has_deleted_images || (has_blacklists && has_blacklisted));
|
||||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, std::span<ImageViewId> cached_image_view_ids, u32 index) {
|
||||
ImageViewId TextureCache<P>::VisitImageView(u32 index, bool compute) {
|
||||
auto& table = compute ? channel_state->compute_image_table : channel_state->graphics_image_table;
|
||||
if (index > table.current_limit) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
const auto [descriptor, is_new] = table.Read(*gpu_memory, index);
|
||||
ImageViewId& image_view_id = cached_image_view_ids[index];
|
||||
// Is new (on the tegra engine side)?
|
||||
auto const [descriptor, is_new] = table.Read(*gpu_memory, index);
|
||||
if (is_new) {
|
||||
image_view_id = FindImageView(descriptor);
|
||||
if (IsValidEntry(*gpu_memory, descriptor)) {
|
||||
// Is new (registered view) on the texture cache side?
|
||||
const auto [pair, is_new_tc] = channel_state->image_views.try_emplace(descriptor);
|
||||
if (is_new_tc)
|
||||
pair->second = CreateImageView(descriptor);
|
||||
PrepareImageView(pair->second, false, false);
|
||||
channel_state->image_view_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), pair->second);
|
||||
return pair->second;
|
||||
}
|
||||
channel_state->image_view_ids.insert_or_assign(index | (compute ? Common::SlotId::TAGGED_VALUE : 0), NULL_IMAGE_VIEW_ID);
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
if (image_view_id != NULL_IMAGE_VIEW_ID) {
|
||||
PrepareImageView(image_view_id, false, false);
|
||||
}
|
||||
return image_view_id;
|
||||
auto const it = channel_state->image_view_ids.find(index | (compute ? Common::SlotId::TAGGED_VALUE : 0));
|
||||
PrepareImageView(it->second, false, false);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
|
@ -1245,19 +1219,6 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
|
|||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
|
||||
if (!IsValidEntry(*gpu_memory, config)) {
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
const auto [pair, is_new] = channel_state->image_views.try_emplace(config);
|
||||
ImageViewId& image_view_id = pair->second;
|
||||
if (is_new) {
|
||||
image_view_id = CreateImageView(config);
|
||||
}
|
||||
return image_view_id;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
|
||||
const ImageInfo info(config);
|
||||
|
|
@ -1399,10 +1360,10 @@ void TextureCache<P>::InvalidateScale(Image& image) {
|
|||
image.image_view_infos.clear();
|
||||
for (size_t c : active_channel_ids) {
|
||||
auto& channel_info = channel_storage[c];
|
||||
if constexpr (ENABLE_VALIDATION) {
|
||||
std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID);
|
||||
}
|
||||
|
||||
if constexpr (ENABLE_VALIDATION)
|
||||
for (auto& e : channel_info.image_view_ids)
|
||||
e.second = CORRUPT_ID;
|
||||
channel_info.graphics_image_table.Invalidate();
|
||||
channel_info.compute_image_table.Invalidate();
|
||||
}
|
||||
|
|
@ -2021,7 +1982,7 @@ std::pair<u32, u32> TextureCache<P>::PrepareDmaImage(ImageId dst_id, GPUVAddr ba
|
|||
}
|
||||
|
||||
template <class P>
|
||||
SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
|
||||
SamplerId TextureCache<P>::FindSampler(const TSCEntry& config, bool compute) {
|
||||
if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
|
||||
return NULL_SAMPLER_ID;
|
||||
}
|
||||
|
|
@ -2044,68 +2005,48 @@ std::optional<size_t> TextureCache<P>::QuerySamplerBudget() const {
|
|||
|
||||
template <class P>
|
||||
void TextureCache<P>::EnforceSamplerBudget() {
|
||||
const auto budget = QuerySamplerBudget();
|
||||
if (!budget) {
|
||||
return;
|
||||
if (auto const budget = QuerySamplerBudget(); budget) {
|
||||
if (slot_samplers.size() < *budget) {
|
||||
return;
|
||||
}
|
||||
if (!channel_state) {
|
||||
return;
|
||||
}
|
||||
if (last_sampler_gc_frame == frame_tick) {
|
||||
return;
|
||||
}
|
||||
last_sampler_gc_frame = frame_tick;
|
||||
TrimInactiveSamplers(*budget);
|
||||
}
|
||||
if (slot_samplers.size() < *budget) {
|
||||
return;
|
||||
}
|
||||
if (!channel_state) {
|
||||
return;
|
||||
}
|
||||
if (last_sampler_gc_frame == frame_tick) {
|
||||
return;
|
||||
}
|
||||
last_sampler_gc_frame = frame_tick;
|
||||
TrimInactiveSamplers(*budget);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::TrimInactiveSamplers(size_t budget) {
|
||||
if (channel_state->samplers.empty()) {
|
||||
return;
|
||||
}
|
||||
constexpr size_t SAMPLER_GC_SLACK = 1024;
|
||||
auto mark_active = [](auto& set, SamplerId id) {
|
||||
if (!id || id == CORRUPT_ID || id == NULL_SAMPLER_ID) {
|
||||
return;
|
||||
if (channel_state->samplers.size() > 0) {
|
||||
constexpr size_t SAMPLER_GC_SLACK = 1024;
|
||||
ankerl::unordered_dense::set<SamplerId> active_sampler_ids;
|
||||
for (auto const& e : channel_state->sampler_ids)
|
||||
active_sampler_ids.insert(e.second);
|
||||
// Elements in the map must be necesarily valid
|
||||
size_t removed = 0;
|
||||
for (auto it = channel_state->samplers.begin(); it != channel_state->samplers.end();) {
|
||||
const SamplerId sampler_id = it->second;
|
||||
if (!sampler_id || sampler_id == CORRUPT_ID) {
|
||||
it = channel_state->samplers.erase(it);
|
||||
} else if (std::ranges::find(active_sampler_ids, sampler_id) != active_sampler_ids.end()) {
|
||||
++it;
|
||||
} else {
|
||||
slot_samplers.erase(sampler_id);
|
||||
it = channel_state->samplers.erase(it);
|
||||
++removed;
|
||||
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
set.insert(id);
|
||||
};
|
||||
ankerl::unordered_dense::set<SamplerId> active;
|
||||
active.reserve(channel_state->graphics_sampler_ids.size() + channel_state->compute_sampler_ids.size());
|
||||
for (const SamplerId id : channel_state->graphics_sampler_ids) {
|
||||
mark_active(active, id);
|
||||
}
|
||||
for (const SamplerId id : channel_state->compute_sampler_ids) {
|
||||
mark_active(active, id);
|
||||
}
|
||||
|
||||
size_t removed = 0;
|
||||
auto& sampler_map = channel_state->samplers;
|
||||
for (auto it = sampler_map.begin(); it != sampler_map.end();) {
|
||||
const SamplerId sampler_id = it->second;
|
||||
if (!sampler_id || sampler_id == CORRUPT_ID) {
|
||||
it = sampler_map.erase(it);
|
||||
continue;
|
||||
if (removed != 0) {
|
||||
LOG_WARNING(HW_GPU, "Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers", budget, removed);
|
||||
}
|
||||
if (active.find(sampler_id) != active.end()) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
slot_samplers.erase(sampler_id);
|
||||
it = sampler_map.erase(it);
|
||||
++removed;
|
||||
if (slot_samplers.size() + SAMPLER_GC_SLACK <= budget) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (removed != 0) {
|
||||
LOG_WARNING(HW_GPU,
|
||||
"Sampler cache exceeded {} entries on this driver; reclaimed {} inactive samplers",
|
||||
budget, removed);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2345,8 +2286,7 @@ ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const Imag
|
|||
if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
|
||||
return image_view_id;
|
||||
}
|
||||
const ImageViewId image_view_id =
|
||||
slot_image_views.insert(runtime, info, image_id, image, slot_images);
|
||||
const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image, slot_images);
|
||||
image.InsertView(info, image_view_id);
|
||||
return image_view_id;
|
||||
}
|
||||
|
|
@ -2606,10 +2546,9 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
|||
}
|
||||
for (size_t c : active_channel_ids) {
|
||||
auto& channel_info = channel_storage[c];
|
||||
if constexpr (ENABLE_VALIDATION) {
|
||||
std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID);
|
||||
}
|
||||
if constexpr (ENABLE_VALIDATION)
|
||||
for (auto& e : channel_info.image_view_ids)
|
||||
e.second = CORRUPT_ID;
|
||||
channel_info.graphics_image_table.Invalidate();
|
||||
channel_info.compute_image_table.Invalidate();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,12 +86,8 @@ public:
|
|||
std::unordered_map<TICEntry, ImageViewId> image_views;
|
||||
std::unordered_map<TSCEntry, SamplerId> samplers;
|
||||
|
||||
// Values tuned for Mario Brothership, see also descriptor_table.h
|
||||
// Change values as required.
|
||||
boost::container::static_vector<SamplerId, 0x1000 + 1> graphics_sampler_ids;
|
||||
boost::container::static_vector<SamplerId, 0x1000 + 1> compute_sampler_ids;
|
||||
boost::container::static_vector<ImageViewId, 0x80000 + 1> graphics_image_view_ids;
|
||||
boost::container::static_vector<ImageViewId, 0x80000 + 1> compute_image_view_ids;
|
||||
ankerl::unordered_dense::map<u32, SamplerId> sampler_ids;
|
||||
ankerl::unordered_dense::map<u32, ImageViewId> image_view_ids;
|
||||
|
||||
TextureCacheGPUMap* gpu_page_table = nullptr;
|
||||
TextureCacheGPUMap* sparse_page_table = nullptr;
|
||||
|
|
@ -170,27 +166,17 @@ public:
|
|||
/// Mark an image as modified from the GPU
|
||||
void MarkModification(ImageId id) noexcept;
|
||||
|
||||
/// Fill image_view_ids with the graphics images in indices
|
||||
template <bool has_blacklists>
|
||||
void FillGraphicsImageViews(std::span<ImageViewInOut> views);
|
||||
|
||||
/// Fill image_view_ids with the compute images in indices
|
||||
void FillComputeImageViews(std::span<ImageViewInOut> views);
|
||||
/// Fill image_view_ids with the graphics/compute images in indices
|
||||
void FillImageViews(std::span<ImageViewInOut> views, bool compute, bool blacklist = true);
|
||||
|
||||
/// Handle feedback loops during draws.
|
||||
void CheckFeedbackLoop(std::span<const ImageViewInOut> views);
|
||||
|
||||
/// Get the sampler from the graphics descriptor table in the specified index
|
||||
Sampler* GetGraphicsSampler(u32 index);
|
||||
/// Get the sampler from the graphics/compute descriptor table in the specified index
|
||||
Sampler* GetSampler(u32 index, bool compute);
|
||||
|
||||
/// Get the sampler from the compute descriptor table in the specified index
|
||||
Sampler* GetComputeSampler(u32 index);
|
||||
|
||||
/// Get the sampler id from the graphics descriptor table in the specified index
|
||||
SamplerId GetGraphicsSamplerId(u32 index);
|
||||
|
||||
/// Get the sampler id from the compute descriptor table in the specified index
|
||||
SamplerId GetComputeSamplerId(u32 index);
|
||||
/// Get the sampler id from the graphics/compute descriptor table in the specified index
|
||||
SamplerId GetSamplerId(u32 index, bool compute);
|
||||
|
||||
/// Return a constant reference to the given sampler id
|
||||
[[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept;
|
||||
|
|
@ -198,11 +184,8 @@ public:
|
|||
/// Return a reference to the given sampler id
|
||||
[[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
|
||||
|
||||
/// Refresh the state for graphics image view and sampler descriptors
|
||||
void SynchronizeGraphicsDescriptors();
|
||||
|
||||
/// Refresh the state for compute image view and sampler descriptors
|
||||
void SynchronizeComputeDescriptors();
|
||||
/// Refresh the state for graphics/compute image view and sampler descriptors
|
||||
void SynchronizeDescriptors(bool compute);
|
||||
|
||||
/// Updates the Render Targets if they can be rescaled
|
||||
/// @retval True if the Render Targets have been rescaled.
|
||||
|
|
@ -313,15 +296,8 @@ private:
|
|||
/// Runs the Garbage Collector.
|
||||
void RunGarbageCollector();
|
||||
|
||||
/// Fills image_view_ids in the image views in indices
|
||||
template <bool has_blacklists>
|
||||
void FillImageViews(DescriptorTable<TICEntry>& table,
|
||||
std::span<ImageViewId> cached_image_view_ids,
|
||||
std::span<ImageViewInOut> views);
|
||||
|
||||
/// Find or create an image view in the guest descriptor table
|
||||
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
|
||||
std::span<ImageViewId> cached_image_view_ids, u32 index);
|
||||
ImageViewId VisitImageView(u32 index, bool compute);
|
||||
|
||||
/// Find or create a framebuffer with the given render target parameters
|
||||
FramebufferId GetFramebufferId(const RenderTargets& key);
|
||||
|
|
@ -333,9 +309,6 @@ private:
|
|||
template <typename StagingBuffer>
|
||||
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
|
||||
|
||||
/// Find or create an image view from a guest descriptor
|
||||
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
|
||||
|
||||
/// Create a new image view from a guest descriptor
|
||||
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
|
||||
|
||||
|
|
@ -363,7 +336,7 @@ private:
|
|||
const Tegra::Engines::Fermi2D::Config& copy);
|
||||
|
||||
/// Find or create a sampler from a guest descriptor sampler
|
||||
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
|
||||
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config, bool compute);
|
||||
|
||||
/// Find or create an image view for the given color buffer index
|
||||
[[nodiscard]] ImageViewId FindColorBuffer(size_t index);
|
||||
|
|
|
|||
Loading…
Reference in a new issue