From edd26bdb29cdb43c7163ee358549f0f54c41b4fe Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 30 Apr 2026 16:30:19 +0000 Subject: [PATCH] [video_core] Remove redundant references in GPU engine structs Signed-off-by: lizzie --- src/video_core/control/channel_state.cpp | 4 +- src/video_core/control/channel_state.h | 2 +- src/video_core/dma_pusher.cpp | 32 ++--- src/video_core/dma_pusher.h | 53 ++++--- src/video_core/engines/puller.cpp | 139 ++++++++----------- src/video_core/engines/puller.h | 46 ++----- src/video_core/gpu.cpp | 168 +++++++---------------- 7 files changed, 163 insertions(+), 281 deletions(-) diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index d07c7e2a83..49e2e270b1 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp @@ -19,10 +19,10 @@ namespace Tegra::Control { ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {} -void ChannelState::Init(Core::System& system, GPU& gpu, u64 program_id_) { +void ChannelState::Init(Core::System& system, u64 program_id_) { ASSERT(memory_manager); program_id = program_id_; - dma_pusher.emplace(system, gpu, *memory_manager, *this); + dma_pusher.emplace(system, *memory_manager, *this); maxwell_3d.emplace(system, *memory_manager); fermi_2d.emplace(*memory_manager); kepler_compute.emplace(system, *memory_manager); diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h index 2984d2e09e..c72e1446e7 100644 --- a/src/video_core/control/channel_state.h +++ b/src/video_core/control/channel_state.h @@ -34,7 +34,7 @@ namespace Control { struct ChannelState { explicit ChannelState(s32 bind_id); - void Init(Core::System& system, GPU& gpu, u64 program_id); + void Init(Core::System& system, u64 program_id); void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3844a8e2f9..5f6b34836c 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -12,37 +12,32 @@ #include "video_core/guest_memory.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" -#include "video_core/texture_cache/util.h" - -#ifdef _MSC_VER -#include -#endif namespace Tegra { constexpr u32 MacroRegistersStart = 0xE00; [[maybe_unused]] constexpr u32 ComputeInline = 0x6D; -DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, - Control::ChannelState& channel_state_) - : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, - *this, channel_state_}, signal_sync{false}, synced{false} {} +DmaPusher::DmaPusher(Core::System& system_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_) + : system{system_} + , memory_manager{memory_manager_} + , channel_state{channel_state_} + , signal_sync{false} + , synced{false} +{} DmaPusher::~DmaPusher() = default; void DmaPusher::DispatchCalls() { - dma_pushbuffer_subindex = 0; - dma_state.is_last_call = true; - while (system.IsPoweredOn()) { if (!Step()) { break; } } - gpu.FlushCommands(); - gpu.OnCommandListEnd(); + system.GPU().FlushCommands(); + system.GPU().OnCommandListEnd(); } bool DmaPusher::Step() { @@ -171,9 +166,9 @@ void DmaPusher::SetState(const CommandHeader& command_header) { dma_state.method_count = command_header.method_count; } -void DmaPusher::CallMethod(u32 argument) const { +void DmaPusher::CallMethod(u32 argument) { if (dma_state.method < non_puller_methods) { - puller.CallPullerMethod(Engines::Puller::MethodCall{ + puller.CallPullerMethod(*this, Engines::Puller::MethodCall{ dma_state.method, argument, dma_state.subchannel, @@ -191,9 +186,9 @@ void DmaPusher::CallMethod(u32 argument) const { } } -void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { +void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) { if (dma_state.method < non_puller_methods) { - puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, dma_state.method_count); + puller.CallMultiMethod(*this, dma_state.method, dma_state.subchannel, base_start, num_methods, dma_state.method_count); } else { auto subchannel = subchannels[dma_state.subchannel]; subchannel->ConsumeSink(); @@ -204,7 +199,6 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { rasterizer = rasterizer_; - puller.BindRasterizer(rasterizer); } } // namespace Tegra diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index f850513603..d8ae737a46 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -109,25 +109,21 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub struct CommandList final { CommandList() = default; explicit CommandList(std::size_t size) : command_lists(size) {} - explicit CommandList( - boost::container::small_vector&& prefetch_command_list_) + explicit CommandList(boost::container::small_vector&& prefetch_command_list_) : prefetch_command_list{std::move(prefetch_command_list_)} {} boost::container::small_vector command_lists; boost::container::small_vector prefetch_command_list; }; -/** - * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the - * emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled - * into a "command stream" consisting of 32-bit words that make up "commands". - * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for - * details on this implementation. - */ +/// @brief The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the +/// emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled +/// into a "command stream" consisting of 32-bit words that make up "commands". +/// See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for +/// details on this implementation. class DmaPusher final { public: - explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, - Control::ChannelState& channel_state_); + explicit DmaPusher(Core::System& system_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_); ~DmaPusher(); void Push(CommandList&& entries) { @@ -136,8 +132,7 @@ public: void DispatchCalls(); - void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id, - Engines::EngineTypes engine_type) { + void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id, Engines::EngineTypes engine_type) { subchannels[subchannel_id] = engine; subchannel_type[subchannel_id] = engine_type; } @@ -152,11 +147,11 @@ private: void SetState(const CommandHeader& command_header); - void CallMethod(u32 argument) const; - void CallMultiMethod(const u32* base_start, u32 num_methods) const; + void CallMethod(u32 argument); + void CallMultiMethod(const u32* base_start, u32 num_methods); - Common::ScratchBuffer - command_headers; ///< Buffer for list of commands fetched at once +public: + Common::ScratchBuffer command_headers; ///< Buffer for list of commands fetched at once std::queue dma_pushbuffer; ///< Queue of command lists to be processed std::size_t dma_pushbuffer_subindex{}; ///< Index within a command list within the pushbuffer @@ -172,24 +167,24 @@ private: bool is_last_call; }; + Core::System& system; + MemoryManager& memory_manager; + Control::ChannelState& channel_state; + DmaState dma_state{}; - bool dma_increment_once{}; - - const bool ib_enable{true}; ///< IB mode enabled - std::array subchannels{}; std::array subchannel_type; - GPU& gpu; - Core::System& system; - MemoryManager& memory_manager; - mutable Engines::Puller puller; - - VideoCore::RasterizerInterface* rasterizer; - bool signal_sync; - bool synced; + Engines::Puller puller; std::mutex sync_mutex; std::condition_variable sync_cv; + + VideoCore::RasterizerInterface* rasterizer = nullptr; + + const bool ib_enable : 1 = true; ///< IB mode enabled + bool dma_increment_once : 1 = false; + bool signal_sync : 1 = false; + bool synced : 1 = false; }; } // namespace Tegra diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 4030f93d49..b153619c16 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -22,37 +22,29 @@ namespace Tegra::Engines { -Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_, - Control::ChannelState& channel_state_) - : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{ - channel_state_} {} - -Puller::~Puller() = default; - -void Puller::ProcessBindMethod(const MethodCall& method_call) { +void Puller::ProcessBindMethod(DmaPusher& dma_pusher, const MethodCall& method_call) { // Bind the current subchannel to the desired engine id. - LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, - method_call.argument); + LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, method_call.argument); const auto engine_id = static_cast(method_call.argument); bound_engines[method_call.subchannel] = engine_id; switch (engine_id) { case EngineID::FERMI_TWOD_A: - dma_pusher.BindSubchannel(&*channel_state.fermi_2d, method_call.subchannel, EngineTypes::Fermi2D); + dma_pusher.BindSubchannel(&*dma_pusher.channel_state.fermi_2d, method_call.subchannel, EngineTypes::Fermi2D); break; case EngineID::MAXWELL_B: - dma_pusher.BindSubchannel(&*channel_state.maxwell_3d, method_call.subchannel, EngineTypes::Maxwell3D); + dma_pusher.BindSubchannel(&*dma_pusher.channel_state.maxwell_3d, method_call.subchannel, EngineTypes::Maxwell3D); break; case EngineID::KEPLER_COMPUTE_B: - dma_pusher.BindSubchannel(&*channel_state.kepler_compute, method_call.subchannel, EngineTypes::KeplerCompute); + dma_pusher.BindSubchannel(&*dma_pusher.channel_state.kepler_compute, method_call.subchannel, EngineTypes::KeplerCompute); break; case EngineID::MAXWELL_DMA_COPY_A: - dma_pusher.BindSubchannel(&*channel_state.maxwell_dma, method_call.subchannel, EngineTypes::MaxwellDMA); + dma_pusher.BindSubchannel(&*dma_pusher.channel_state.maxwell_dma, method_call.subchannel, EngineTypes::MaxwellDMA); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - dma_pusher.BindSubchannel(&*channel_state.kepler_memory, method_call.subchannel, EngineTypes::KeplerMemory); + dma_pusher.BindSubchannel(&*dma_pusher.channel_state.kepler_memory, method_call.subchannel, EngineTypes::KeplerMemory); break; case EngineID::NV01_TIMER: - dma_pusher.BindSubchannel(&*channel_state.nv01_timer, method_call.subchannel, EngineTypes::Nv01Timer); + dma_pusher.BindSubchannel(&*dma_pusher.channel_state.nv01_timer, method_call.subchannel, EngineTypes::Nv01Timer); break; default: UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); @@ -60,15 +52,15 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) { } } -void Puller::ProcessFenceActionMethod() { +void Puller::ProcessFenceActionMethod(DmaPusher& dma_pusher) { switch (regs.fence_action.op) { case Puller::FenceOperation::Acquire: // UNIMPLEMENTED_MSG("Channel Scheduling pending."); // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); - rasterizer->ReleaseFences(); + dma_pusher.rasterizer->ReleaseFences(); break; case Puller::FenceOperation::Increment: - rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); + dma_pusher.rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); break; default: UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); @@ -76,37 +68,35 @@ void Puller::ProcessFenceActionMethod() { } } -void Puller::ProcessSemaphoreTriggerMethod() { +void Puller::ProcessSemaphoreTriggerMethod(DmaPusher& dma_pusher) { const auto semaphoreOperationMask = 0xF; - const auto op = - static_cast(regs.semaphore_trigger & semaphoreOperationMask); + const auto op = GpuSemaphoreOperation(regs.semaphore_trigger & semaphoreOperationMask); if (op == GpuSemaphoreOperation::WriteLong) { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_sequence; - rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, - VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); + dma_pusher.rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); } else { do { - const u32 word{memory_manager.Read(regs.semaphore_address.SemaphoreAddress())}; + const u32 word = dma_pusher.memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); regs.acquire_source = true; regs.acquire_value = regs.semaphore_sequence; if (op == GpuSemaphoreOperation::AcquireEqual) { regs.acquire_active = true; regs.acquire_mode = false; if (word != regs.acquire_value) { - rasterizer->ReleaseFences(); + dma_pusher.rasterizer->ReleaseFences(); continue; } } else if (op == GpuSemaphoreOperation::AcquireGequal) { regs.acquire_active = true; regs.acquire_mode = true; if (word < regs.acquire_value) { - rasterizer->ReleaseFences(); + dma_pusher.rasterizer->ReleaseFences(); continue; } } else if (op == GpuSemaphoreOperation::AcquireMask) { if (word && regs.semaphore_sequence == 0) { - rasterizer->ReleaseFences(); + dma_pusher.rasterizer->ReleaseFences(); continue; } } else { @@ -116,21 +106,20 @@ void Puller::ProcessSemaphoreTriggerMethod() { } } -void Puller::ProcessSemaphoreRelease() { +void Puller::ProcessSemaphoreRelease(DmaPusher& dma_pusher) { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_release; - rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, - VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); + dma_pusher.rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); } -void Puller::ProcessSemaphoreAcquire() { - u32 word = memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); +void Puller::ProcessSemaphoreAcquire(DmaPusher& dma_pusher) { + u32 word = dma_pusher.memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); const auto value = regs.semaphore_acquire; while (word != value) { regs.acquire_active = true; regs.acquire_value = value; - rasterizer->ReleaseFences(); - word = memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); + dma_pusher.rasterizer->ReleaseFences(); + word = dma_pusher.memory_manager.Read(regs.semaphore_address.SemaphoreAddress()); // TODO(kemathe73) figure out how to do the acquire_timeout regs.acquire_mode = false; regs.acquire_source = false; @@ -138,13 +127,13 @@ void Puller::ProcessSemaphoreAcquire() { } /// Calls a GPU puller method. -void Puller::CallPullerMethod(const MethodCall& method_call) { +void Puller::CallPullerMethod(DmaPusher& dma_pusher, const MethodCall& method_call) { regs.reg_array[method_call.method] = method_call.argument; const auto method = static_cast(method_call.method); switch (method) { case BufferMethods::BindObject: { - ProcessBindMethod(method_call); + ProcessBindMethod(dma_pusher, method_call); break; } case BufferMethods::Nop: @@ -155,16 +144,16 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::WrcacheFlush: break; case BufferMethods::RefCnt: - rasterizer->SignalReference(); + dma_pusher.rasterizer->SignalReference(); break; case BufferMethods::SyncpointOperation: - ProcessFenceActionMethod(); + ProcessFenceActionMethod(dma_pusher); break; case BufferMethods::WaitForIdle: - rasterizer->WaitForIdle(); + dma_pusher.rasterizer->WaitForIdle(); break; case BufferMethods::SemaphoreOperation: { - ProcessSemaphoreTriggerMethod(); + ProcessSemaphoreTriggerMethod(dma_pusher); break; } case BufferMethods::NonStallInterrupt: { @@ -177,7 +166,7 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { } case BufferMethods::MemOpB: { // Implement this better. - rasterizer->InvalidateGPUCache(); + dma_pusher.rasterizer->InvalidateGPUCache(); break; } case BufferMethods::MemOpC: @@ -186,11 +175,11 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { break; } case BufferMethods::SemaphoreAcquire: { - ProcessSemaphoreAcquire(); + ProcessSemaphoreAcquire(dma_pusher); break; } case BufferMethods::SemaphoreRelease: { - ProcessSemaphoreRelease(); + ProcessSemaphoreRelease(dma_pusher); break; } case BufferMethods::Yield: { @@ -205,27 +194,26 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { } /// Calls a GPU engine method. -void Puller::CallEngineMethod(const MethodCall& method_call) { +void Puller::CallEngineMethod(DmaPusher& dma_pusher, const MethodCall& method_call) { const EngineID engine = bound_engines[method_call.subchannel]; - switch (engine) { case EngineID::FERMI_TWOD_A: - channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + dma_pusher.channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::MAXWELL_B: - channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + dma_pusher.channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::KEPLER_COMPUTE_B: - channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + dma_pusher.channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::MAXWELL_DMA_COPY_A: - channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + dma_pusher.channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + dma_pusher.channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; case EngineID::NV01_TIMER: - channel_state.nv01_timer->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); + dma_pusher.channel_state.nv01_timer->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall()); break; default: UNIMPLEMENTED_MSG("Unimplemented engine"); @@ -234,28 +222,26 @@ void Puller::CallEngineMethod(const MethodCall& method_call) { } /// Calls a GPU engine multivalue method. -void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, - u32 methods_pending) { +void Puller::CallEngineMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending) { const EngineID engine = bound_engines[subchannel]; - switch (engine) { case EngineID::FERMI_TWOD_A: - channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); + dma_pusher.channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::MAXWELL_B: - channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); + dma_pusher.channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::KEPLER_COMPUTE_B: - channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); + dma_pusher.channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::MAXWELL_DMA_COPY_A: - channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); + dma_pusher.channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); + dma_pusher.channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending); break; case EngineID::NV01_TIMER: - channel_state.nv01_timer->CallMultiMethod(method, base_start, amount, methods_pending); + dma_pusher.channel_state.nv01_timer->CallMultiMethod(method, base_start, amount, methods_pending); break; default: UNIMPLEMENTED_MSG("Unimplemented engine"); @@ -264,31 +250,26 @@ void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_s } /// Calls a GPU method. -void Puller::CallMethod(const MethodCall& method_call) { - LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, - method_call.subchannel); - +void Puller::CallMethod(DmaPusher& dma_pusher, const MethodCall& method_call) { + LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, method_call.subchannel); ASSERT(method_call.subchannel < bound_engines.size()); - if (ExecuteMethodOnEngine(method_call.method)) { - CallEngineMethod(method_call); + if (ExecuteMethodOnEngine(dma_pusher, method_call.method)) { + CallEngineMethod(dma_pusher, method_call); } else { - CallPullerMethod(method_call); + CallPullerMethod(dma_pusher, method_call); } } /// Calls a GPU multivalue method. -void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, - u32 methods_pending) { +void Puller::CallMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending) { LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel); - ASSERT(subchannel < bound_engines.size()); - - if (ExecuteMethodOnEngine(method)) { - CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending); + if (ExecuteMethodOnEngine(dma_pusher, method)) { + CallEngineMultiMethod(dma_pusher, method, subchannel, base_start, amount, methods_pending); } else { for (u32 i = 0; i < amount; i++) { - CallPullerMethod(MethodCall{ + CallPullerMethod(dma_pusher, MethodCall{ method, base_start[i], subchannel, @@ -298,13 +279,9 @@ void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, } } -void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { - rasterizer = rasterizer_; -} - /// Determines where the method should be executed. -[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) { - const auto buffer_method = static_cast(method); +[[nodiscard]] bool Puller::ExecuteMethodOnEngine(DmaPusher& dma_pusher, u32 method) { + const auto buffer_method = BufferMethods(method); return buffer_method >= BufferMethods::NonPullerMethods; } diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h index fe5102e3ed..e8994f5640 100644 --- a/src/video_core/engines/puller.h +++ b/src/video_core/engines/puller.h @@ -70,32 +70,13 @@ public: BitField<8, 24, u32> syncpoint_id; }; - explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher, - Control::ChannelState& channel_state); - ~Puller(); - - void CallMethod(const MethodCall& method_call); - - void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, - u32 methods_pending); - - void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); - - void CallPullerMethod(const MethodCall& method_call); - - void CallEngineMethod(const MethodCall& method_call); - - void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount, - u32 methods_pending); - + void CallMethod(DmaPusher& dma_pusher, const MethodCall& method_call); + void CallMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending); + void BindRasterizer(DmaPusher& dma_pusher, VideoCore::RasterizerInterface* rasterizer); + void CallPullerMethod(DmaPusher& dma_pusher, const MethodCall& method_call); + void CallEngineMethod(DmaPusher& dma_pusher, const MethodCall& method_call); + void CallEngineMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending); private: - Tegra::GPU& gpu; - - MemoryManager& memory_manager; - DmaPusher& dma_pusher; - Control::ChannelState& channel_state; - VideoCore::RasterizerInterface* rasterizer = nullptr; - static constexpr std::size_t NUM_REGS = 0x800; struct Regs { static constexpr size_t NUM_REGS = 0x40; @@ -139,12 +120,12 @@ private: }; } regs{}; - void ProcessBindMethod(const MethodCall& method_call); - void ProcessFenceActionMethod(); - void ProcessSemaphoreAcquire(); - void ProcessSemaphoreRelease(); - void ProcessSemaphoreTriggerMethod(); - [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); + void ProcessBindMethod(DmaPusher& dma_pusher, const MethodCall& method_call); + void ProcessFenceActionMethod(DmaPusher& dma_pusher); + void ProcessSemaphoreAcquire(DmaPusher& dma_pusher); + void ProcessSemaphoreRelease(DmaPusher& dma_pusher); + void ProcessSemaphoreTriggerMethod(DmaPusher& dma_pusher); + [[nodiscard]] bool ExecuteMethodOnEngine(DmaPusher& dma_pusher, u32 method); /// Mapping of command subchannels to their bound engine ids std::array bound_engines{}; @@ -157,8 +138,7 @@ private: }; #define ASSERT_REG_POSITION(field_name, position) \ - static_assert(offsetof(Regs, field_name) == position * 4, \ - "Field " #field_name " has invalid position") + static_assert(offsetof(Regs, field_name) == position * 4, "Field " #field_name " has invalid position") ASSERT_REG_POSITION(semaphore_address, 0x4); ASSERT_REG_POSITION(semaphore_sequence, 0x6); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 5f4054212f..44dbb93c3d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -40,30 +40,32 @@ namespace Tegra { struct GPU::Impl { - explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) - : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_}, - shader_notify{std::make_unique()}, is_async{is_async_}, - gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)} {} + explicit Impl(Core::System& system_, bool is_async_, bool use_nvdec_) + : system{system_} + , use_nvdec{use_nvdec_} + , shader_notify() + , is_async{is_async_} + , gpu_thread{system_, is_async_} + , scheduler(system_.GPU()) + {} ~Impl() = default; std::shared_ptr CreateChannel(s32 channel_id) { auto channel_state = std::make_shared(channel_id); channels.emplace(channel_id, channel_state); - scheduler->DeclareChannel(channel_state); + scheduler.DeclareChannel(channel_state); return channel_state; } void BindChannel(s32 channel_id) { - if (bound_channel == channel_id) { - return; + if (bound_channel != channel_id) { + auto it = channels.find(channel_id); + ASSERT(it != channels.end()); + bound_channel = channel_id; + current_channel = it->second.get(); + renderer->ReadRasterizer()->BindChannel(*current_channel); } - auto it = channels.find(channel_id); - ASSERT(it != channels.end()); - bound_channel = channel_id; - current_channel = it->second.get(); - - rasterizer->BindChannel(*current_channel); } std::shared_ptr AllocateChannel() { @@ -71,13 +73,13 @@ struct GPU::Impl { } void InitChannel(Control::ChannelState& to_init, u64 program_id) { - to_init.Init(system, gpu, program_id); - to_init.BindRasterizer(rasterizer); - rasterizer->InitializeChannel(to_init); + to_init.Init(system, program_id); + to_init.BindRasterizer(renderer->ReadRasterizer()); + renderer->ReadRasterizer()->InitializeChannel(to_init); } void InitAddressSpace(Tegra::MemoryManager& memory_manager) { - memory_manager.BindRasterizer(rasterizer); + memory_manager.BindRasterizer(renderer->ReadRasterizer()); } void ReleaseChannel(Control::ChannelState& to_release) { @@ -87,26 +89,26 @@ struct GPU::Impl { /// Binds a renderer to the GPU. void BindRenderer(std::unique_ptr renderer_) { renderer = std::move(renderer_); - rasterizer = renderer->ReadRasterizer(); - host1x.MemoryManager().BindInterface(rasterizer); - host1x.gmmu_manager.BindRasterizer(rasterizer); + system.Host1x().MemoryManager().BindInterface(renderer->ReadRasterizer()); + system.Host1x().GMMU().BindRasterizer(renderer->ReadRasterizer()); } /// Flush all current written commands into the host GPU for execution. void FlushCommands() { - rasterizer->FlushCommands(); + renderer->ReadRasterizer()->FlushCommands(); } /// Synchronizes CPU writes with Host GPU memory. void InvalidateGPUCache() { - std::function callback_writes( - [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); + std::function callback_writes([this](PAddr address, size_t size) { + renderer->ReadRasterizer()->OnCacheInvalidation(address, size); + }); system.GatherGPUDirtyMemory(callback_writes); } /// Signal the ending of command list. void OnCommandListEnd() { - rasterizer->ReleaseFences(false); + renderer->ReadRasterizer()->ReleaseFences(false); Settings::UpdateGPUAccuracy(); } @@ -143,62 +145,6 @@ struct GPU::Impl { } } - /// Returns a reference to the Maxwell3D GPU engine. - [[nodiscard]] Engines::Maxwell3D& Maxwell3D() { - ASSERT(current_channel); - return *current_channel->maxwell_3d; - } - - /// Returns a const reference to the Maxwell3D GPU engine. - [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const { - ASSERT(current_channel); - return *current_channel->maxwell_3d; - } - - /// Returns a reference to the KeplerCompute GPU engine. - [[nodiscard]] Engines::KeplerCompute& KeplerCompute() { - ASSERT(current_channel); - return *current_channel->kepler_compute; - } - - /// Returns a reference to the KeplerCompute GPU engine. - [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const { - ASSERT(current_channel); - return *current_channel->kepler_compute; - } - - /// Returns a reference to the GPU DMA pusher. - [[nodiscard]] Tegra::DmaPusher& DmaPusher() { - ASSERT(current_channel); - return *current_channel->dma_pusher; - } - - /// Returns a const reference to the GPU DMA pusher. - [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const { - ASSERT(current_channel); - return *current_channel->dma_pusher; - } - - /// Returns a reference to the underlying renderer. - [[nodiscard]] VideoCore::RendererBase& Renderer() { - return *renderer; - } - - /// Returns a const reference to the underlying renderer. - [[nodiscard]] const VideoCore::RendererBase& Renderer() const { - return *renderer; - } - - /// Returns a reference to the shader notifier. - [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() { - return *shader_notify; - } - - /// Returns a const reference to the shader notifier. - [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const { - return *shader_notify; - } - [[nodiscard]] u64 GetTicks() const { u64 gpu_tick = system.CoreTiming().GetGPUTicks(); Settings::GpuOverclock overclock = Settings::values.fast_gpu_time.GetValue(); @@ -210,14 +156,6 @@ struct GPU::Impl { return gpu_tick; } - [[nodiscard]] bool IsAsync() const { - return is_async; - } - - [[nodiscard]] bool UseNvdec() const { - return use_nvdec; - } - void RendererFrameEndNotify() { system.GetPerfStats().EndGameFrame(); } @@ -227,7 +165,7 @@ struct GPU::Impl { /// core timing events. void Start() { Settings::UpdateGPUAccuracy(); - gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler); + gpu_thread.StartThread(*renderer, renderer->Context(), scheduler); } void NotifyShutdown() { @@ -260,14 +198,13 @@ struct GPU::Impl { } VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) { - auto raster_area = rasterizer->GetFlushArea(addr, size); + auto raster_area = renderer->ReadRasterizer()->GetFlushArea(addr, size); if (raster_area.preemtive) { return raster_area; } raster_area.preemtive = true; const u64 fence = RequestSyncOperation([this, &raster_area]() { - rasterizer->FlushRegion(raster_area.start_address, - raster_area.end_address - raster_area.start_address); + renderer->ReadRasterizer()->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address); }); gpu_thread.TickGPU(); WaitForSyncOperation(fence); @@ -280,7 +217,7 @@ struct GPU::Impl { } bool OnCPUWrite(DAddr addr, u64 size) { - return rasterizer->OnCPUWrite(addr, size); + return renderer->ReadRasterizer()->OnCPUWrite(addr, size); } /// Notify rasterizer that any caches of the specified region should be flushed and invalidated @@ -305,7 +242,7 @@ struct GPU::Impl { } const auto wait_fence = RequestSyncOperation([this, current_request_counter, &layers, &fences, num_fences] { - auto& syncpoint_manager = host1x.GetSyncpointManager(); + auto& syncpoint_manager = system.Host1x().GetSyncpointManager(); if (num_fences == 0) { renderer->Composite(layers); } @@ -338,17 +275,14 @@ struct GPU::Impl { return out; } - GPU& gpu; Core::System& system; - Host1x::Host1x& host1x; std::unique_ptr renderer; - VideoCore::RasterizerInterface* rasterizer = nullptr; const bool use_nvdec; s32 new_channel_id{1}; /// Shader build notifier - std::unique_ptr shader_notify; + VideoCore::ShaderNotify shader_notify; /// When true, we are about to shut down emulation session, so terminate outstanding tasks std::atomic_bool shutting_down{}; @@ -372,7 +306,7 @@ struct GPU::Impl { VideoCommon::GPUThread::ThreadManager gpu_thread; std::unique_ptr cpu_context; - std::unique_ptr scheduler; + Tegra::Control::Scheduler scheduler; ankerl::unordered_dense::map> channels; Tegra::Control::ChannelState* current_channel; s32 bound_channel{-1}; @@ -383,7 +317,8 @@ struct GPU::Impl { }; GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) - : impl{std::make_unique(*this, system, is_async, use_nvdec)} {} + : impl{std::make_unique(system, is_async, use_nvdec)} +{} GPU::~GPU() = default; @@ -424,8 +359,9 @@ void GPU::OnCommandListEnd() { } u64 GPU::RequestFlush(DAddr addr, std::size_t size) { - return impl->RequestSyncOperation( - [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); + return impl->RequestSyncOperation([this, addr, size]() { + impl->renderer->ReadRasterizer()->FlushRegion(addr, size); + }); } u64 GPU::CurrentSyncRequestFence() const { @@ -442,52 +378,52 @@ void GPU::TickWork() { /// Gets a mutable reference to the Host1x interface Host1x::Host1x& GPU::Host1x() { - return impl->host1x; + return impl->system.Host1x(); } /// Gets an immutable reference to the Host1x interface. const Host1x::Host1x& GPU::Host1x() const { - return impl->host1x; + return impl->system.Host1x(); } Engines::Maxwell3D& GPU::Maxwell3D() { - return impl->Maxwell3D(); + return *impl->current_channel->maxwell_3d; } const Engines::Maxwell3D& GPU::Maxwell3D() const { - return impl->Maxwell3D(); + return *impl->current_channel->maxwell_3d; } Engines::KeplerCompute& GPU::KeplerCompute() { - return impl->KeplerCompute(); + return *impl->current_channel->kepler_compute; } const Engines::KeplerCompute& GPU::KeplerCompute() const { - return impl->KeplerCompute(); + return *impl->current_channel->kepler_compute; } Tegra::DmaPusher& GPU::DmaPusher() { - return impl->DmaPusher(); + return *impl->current_channel->dma_pusher; } const Tegra::DmaPusher& GPU::DmaPusher() const { - return impl->DmaPusher(); + return *impl->current_channel->dma_pusher; } VideoCore::RendererBase& GPU::Renderer() { - return impl->Renderer(); + return *impl->renderer; } const VideoCore::RendererBase& GPU::Renderer() const { - return impl->Renderer(); + return *impl->renderer; } VideoCore::ShaderNotify& GPU::ShaderNotify() { - return impl->ShaderNotify(); + return impl->shader_notify; } const VideoCore::ShaderNotify& GPU::ShaderNotify() const { - return impl->ShaderNotify(); + return impl->shader_notify; } void GPU::RequestComposite(std::vector&& layers, @@ -504,11 +440,11 @@ u64 GPU::GetTicks() const { } bool GPU::IsAsync() const { - return impl->IsAsync(); + return impl->is_async; } bool GPU::UseNvdec() const { - return impl->UseNvdec(); + return impl->use_nvdec; } void GPU::RendererFrameEndNotify() {