From edd26bdb29cdb43c7163ee358549f0f54c41b4fe Mon Sep 17 00:00:00 2001
From: lizzie <lizzie@eden-emu.dev>
Date: Thu, 30 Apr 2026 16:30:19 +0000
Subject: [PATCH] [video_core] Remove redundant references in GPU engine
 structs

Signed-off-by: lizzie <lizzie@eden-emu.dev>
---
 src/video_core/control/channel_state.cpp |   4 +-
 src/video_core/control/channel_state.h   |   2 +-
 src/video_core/dma_pusher.cpp            |  32 ++---
 src/video_core/dma_pusher.h              |  53 ++++---
 src/video_core/engines/puller.cpp        | 139 ++++++++-----------
 src/video_core/engines/puller.h          |  46 ++-----
 src/video_core/gpu.cpp                   | 168 +++++++----------------
 7 files changed, 163 insertions(+), 281 deletions(-)

diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
index d07c7e2a83..49e2e270b1 100644
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -19,10 +19,10 @@ namespace Tegra::Control {
 
 ChannelState::ChannelState(s32 bind_id_) : bind_id{bind_id_}, initialized{} {}
 
-void ChannelState::Init(Core::System& system, GPU& gpu, u64 program_id_) {
+void ChannelState::Init(Core::System& system, u64 program_id_) {
     ASSERT(memory_manager);
     program_id = program_id_;
-    dma_pusher.emplace(system, gpu, *memory_manager, *this);
+    dma_pusher.emplace(system, *memory_manager, *this);
     maxwell_3d.emplace(system, *memory_manager);
     fermi_2d.emplace(*memory_manager);
     kepler_compute.emplace(system, *memory_manager);
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
index 2984d2e09e..c72e1446e7 100644
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@@ -34,7 +34,7 @@ namespace Control {
 struct ChannelState {
     explicit ChannelState(s32 bind_id);
 
-    void Init(Core::System& system, GPU& gpu, u64 program_id);
+    void Init(Core::System& system, u64 program_id);
 
     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
 
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 3844a8e2f9..5f6b34836c 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -12,37 +12,32 @@
 #include "video_core/guest_memory.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/texture_cache/util.h"
-
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
 
 namespace Tegra {
 
 constexpr u32 MacroRegistersStart = 0xE00;
 [[maybe_unused]] constexpr u32 ComputeInline = 0x6D;
 
-DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
-                     Control::ChannelState& channel_state_)
-    : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
-                                                                          *this, channel_state_}, signal_sync{false}, synced{false} {}
+DmaPusher::DmaPusher(Core::System& system_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_)
+    : system{system_}
+    , memory_manager{memory_manager_}
+    , channel_state{channel_state_}
+    , signal_sync{false}
+    , synced{false}
+{}
 
 DmaPusher::~DmaPusher() = default;
 
 void DmaPusher::DispatchCalls() {
-
     dma_pushbuffer_subindex = 0;
-
     dma_state.is_last_call = true;
-
     while (system.IsPoweredOn()) {
         if (!Step()) {
             break;
         }
     }
-    gpu.FlushCommands();
-    gpu.OnCommandListEnd();
+    system.GPU().FlushCommands();
+    system.GPU().OnCommandListEnd();
 }
 
 bool DmaPusher::Step() {
@@ -171,9 +166,9 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
     dma_state.method_count = command_header.method_count;
 }
 
-void DmaPusher::CallMethod(u32 argument) const {
+void DmaPusher::CallMethod(u32 argument) {
     if (dma_state.method < non_puller_methods) {
-        puller.CallPullerMethod(Engines::Puller::MethodCall{
+        puller.CallPullerMethod(*this, Engines::Puller::MethodCall{
             dma_state.method,
             argument,
             dma_state.subchannel,
@@ -191,9 +186,9 @@ void DmaPusher::CallMethod(u32 argument) const {
     }
 }
 
-void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
+void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) {
     if (dma_state.method < non_puller_methods) {
-        puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, dma_state.method_count);
+        puller.CallMultiMethod(*this, dma_state.method, dma_state.subchannel, base_start, num_methods, dma_state.method_count);
     } else {
         auto subchannel = subchannels[dma_state.subchannel];
         subchannel->ConsumeSink();
@@ -204,7 +199,6 @@ void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
 
 void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
     rasterizer = rasterizer_;
-    puller.BindRasterizer(rasterizer);
 }
 
 } // namespace Tegra
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index f850513603..d8ae737a46 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -109,25 +109,21 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub
 struct CommandList final {
     CommandList() = default;
     explicit CommandList(std::size_t size) : command_lists(size) {}
-    explicit CommandList(
-        boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_)
+    explicit CommandList(boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_)
         : prefetch_command_list{std::move(prefetch_command_list_)} {}
 
     boost::container::small_vector<CommandListHeader, 512> command_lists;
     boost::container::small_vector<CommandHeader, 512> prefetch_command_list;
 };
 
-/**
- * The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
- * emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled
- * into a "command stream" consisting of 32-bit words that make up "commands".
- * See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
- * details on this implementation.
- */
+/// @brief The DmaPusher class implements DMA submission to FIFOs, providing an area of memory that the
+/// emulated app fills with commands and tells PFIFO to process. The pushbuffers are then assembled
+/// into a "command stream" consisting of 32-bit words that make up "commands".
+/// See https://envytools.readthedocs.io/en/latest/hw/fifo/dma-pusher.html#fifo-dma-pusher for
+/// details on this implementation.
 class DmaPusher final {
 public:
-    explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
-                       Control::ChannelState& channel_state_);
+    explicit DmaPusher(Core::System& system_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_);
     ~DmaPusher();
 
     void Push(CommandList&& entries) {
@@ -136,8 +132,7 @@ public:
 
     void DispatchCalls();
 
-    void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id,
-                        Engines::EngineTypes engine_type) {
+    void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id, Engines::EngineTypes engine_type) {
         subchannels[subchannel_id] = engine;
         subchannel_type[subchannel_id] = engine_type;
     }
@@ -152,11 +147,11 @@ private:
 
     void SetState(const CommandHeader& command_header);
 
-    void CallMethod(u32 argument) const;
-    void CallMultiMethod(const u32* base_start, u32 num_methods) const;
+    void CallMethod(u32 argument);
+    void CallMultiMethod(const u32* base_start, u32 num_methods);
 
-    Common::ScratchBuffer<CommandHeader>
-        command_headers; ///< Buffer for list of commands fetched at once
+public:
+    Common::ScratchBuffer<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
 
     std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
     std::size_t dma_pushbuffer_subindex{};  ///< Index within a command list within the pushbuffer
@@ -172,24 +167,24 @@ private:
         bool is_last_call;
     };
 
+    Core::System& system;
+    MemoryManager& memory_manager;
+    Control::ChannelState& channel_state;
+
     DmaState dma_state{};
-    bool dma_increment_once{};
-
-    const bool ib_enable{true}; ///< IB mode enabled
-
     std::array<Engines::EngineInterface*, max_subchannels> subchannels{};
     std::array<Engines::EngineTypes, max_subchannels> subchannel_type;
 
-    GPU& gpu;
-    Core::System& system;
-    MemoryManager& memory_manager;
-    mutable Engines::Puller puller;
-
-    VideoCore::RasterizerInterface* rasterizer;
-    bool signal_sync;
-    bool synced;
+    Engines::Puller puller;
     std::mutex sync_mutex;
     std::condition_variable sync_cv;
+
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+    const bool ib_enable : 1 = true; ///< IB mode enabled
+    bool dma_increment_once : 1 = false;
+    bool signal_sync : 1 = false;
+    bool synced : 1 = false;
 };
 
 } // namespace Tegra
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 4030f93d49..b153619c16 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -22,37 +22,29 @@
 
 namespace Tegra::Engines {
 
-Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
-               Control::ChannelState& channel_state_)
-    : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
-                                                                               channel_state_} {}
-
-Puller::~Puller() = default;
-
-void Puller::ProcessBindMethod(const MethodCall& method_call) {
+void Puller::ProcessBindMethod(DmaPusher& dma_pusher, const MethodCall& method_call) {
     // Bind the current subchannel to the desired engine id.
-    LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
-              method_call.argument);
+    LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel, method_call.argument);
     const auto engine_id = static_cast<EngineID>(method_call.argument);
     bound_engines[method_call.subchannel] = engine_id;
     switch (engine_id) {
     case EngineID::FERMI_TWOD_A:
-        dma_pusher.BindSubchannel(&*channel_state.fermi_2d, method_call.subchannel, EngineTypes::Fermi2D);
+        dma_pusher.BindSubchannel(&*dma_pusher.channel_state.fermi_2d, method_call.subchannel, EngineTypes::Fermi2D);
         break;
     case EngineID::MAXWELL_B:
-        dma_pusher.BindSubchannel(&*channel_state.maxwell_3d, method_call.subchannel, EngineTypes::Maxwell3D);
+        dma_pusher.BindSubchannel(&*dma_pusher.channel_state.maxwell_3d, method_call.subchannel, EngineTypes::Maxwell3D);
         break;
     case EngineID::KEPLER_COMPUTE_B:
-        dma_pusher.BindSubchannel(&*channel_state.kepler_compute, method_call.subchannel, EngineTypes::KeplerCompute);
+        dma_pusher.BindSubchannel(&*dma_pusher.channel_state.kepler_compute, method_call.subchannel, EngineTypes::KeplerCompute);
         break;
     case EngineID::MAXWELL_DMA_COPY_A:
-        dma_pusher.BindSubchannel(&*channel_state.maxwell_dma, method_call.subchannel, EngineTypes::MaxwellDMA);
+        dma_pusher.BindSubchannel(&*dma_pusher.channel_state.maxwell_dma, method_call.subchannel, EngineTypes::MaxwellDMA);
         break;
     case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-        dma_pusher.BindSubchannel(&*channel_state.kepler_memory, method_call.subchannel, EngineTypes::KeplerMemory);
+        dma_pusher.BindSubchannel(&*dma_pusher.channel_state.kepler_memory, method_call.subchannel, EngineTypes::KeplerMemory);
         break;
     case EngineID::NV01_TIMER:
-        dma_pusher.BindSubchannel(&*channel_state.nv01_timer, method_call.subchannel, EngineTypes::Nv01Timer);
+        dma_pusher.BindSubchannel(&*dma_pusher.channel_state.nv01_timer, method_call.subchannel, EngineTypes::Nv01Timer);
         break;
     default:
         UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
@@ -60,15 +52,15 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
     }
 }
 
-void Puller::ProcessFenceActionMethod() {
+void Puller::ProcessFenceActionMethod(DmaPusher& dma_pusher) {
     switch (regs.fence_action.op) {
     case Puller::FenceOperation::Acquire:
         // UNIMPLEMENTED_MSG("Channel Scheduling pending.");
         // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
-        rasterizer->ReleaseFences();
+        dma_pusher.rasterizer->ReleaseFences();
         break;
     case Puller::FenceOperation::Increment:
-        rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
+        dma_pusher.rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
         break;
     default:
         UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
@@ -76,37 +68,35 @@ void Puller::ProcessFenceActionMethod() {
     }
 }
 
-void Puller::ProcessSemaphoreTriggerMethod() {
+void Puller::ProcessSemaphoreTriggerMethod(DmaPusher& dma_pusher) {
     const auto semaphoreOperationMask = 0xF;
-    const auto op =
-        static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
+    const auto op = GpuSemaphoreOperation(regs.semaphore_trigger & semaphoreOperationMask);
     if (op == GpuSemaphoreOperation::WriteLong) {
         const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
         const u32 payload = regs.semaphore_sequence;
-        rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
-                          VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
+        dma_pusher.rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
     } else {
         do {
-            const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
+            const u32 word = dma_pusher.memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
             regs.acquire_source = true;
             regs.acquire_value = regs.semaphore_sequence;
             if (op == GpuSemaphoreOperation::AcquireEqual) {
                 regs.acquire_active = true;
                 regs.acquire_mode = false;
                 if (word != regs.acquire_value) {
-                    rasterizer->ReleaseFences();
+                    dma_pusher.rasterizer->ReleaseFences();
                     continue;
                 }
             } else if (op == GpuSemaphoreOperation::AcquireGequal) {
                 regs.acquire_active = true;
                 regs.acquire_mode = true;
                 if (word < regs.acquire_value) {
-                    rasterizer->ReleaseFences();
+                    dma_pusher.rasterizer->ReleaseFences();
                     continue;
                 }
             } else if (op == GpuSemaphoreOperation::AcquireMask) {
                 if (word && regs.semaphore_sequence == 0) {
-                    rasterizer->ReleaseFences();
+                    dma_pusher.rasterizer->ReleaseFences();
                     continue;
                 }
             } else {
@@ -116,21 +106,20 @@ void Puller::ProcessSemaphoreTriggerMethod() {
     }
 }
 
-void Puller::ProcessSemaphoreRelease() {
+void Puller::ProcessSemaphoreRelease(DmaPusher& dma_pusher) {
     const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
     const u32 payload = regs.semaphore_release;
-    rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
-                      VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0);
+    dma_pusher.rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0);
 }
 
-void Puller::ProcessSemaphoreAcquire() {
-    u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
+void Puller::ProcessSemaphoreAcquire(DmaPusher& dma_pusher) {
+    u32 word = dma_pusher.memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
     const auto value = regs.semaphore_acquire;
     while (word != value) {
         regs.acquire_active = true;
         regs.acquire_value = value;
-        rasterizer->ReleaseFences();
-        word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
+        dma_pusher.rasterizer->ReleaseFences();
+        word = dma_pusher.memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
         // TODO(kemathe73) figure out how to do the acquire_timeout
         regs.acquire_mode = false;
         regs.acquire_source = false;
@@ -138,13 +127,13 @@ void Puller::ProcessSemaphoreAcquire() {
 }
 
 /// Calls a GPU puller method.
-void Puller::CallPullerMethod(const MethodCall& method_call) {
+void Puller::CallPullerMethod(DmaPusher& dma_pusher, const MethodCall& method_call) {
     regs.reg_array[method_call.method] = method_call.argument;
     const auto method = static_cast<BufferMethods>(method_call.method);
 
     switch (method) {
     case BufferMethods::BindObject: {
-        ProcessBindMethod(method_call);
+        ProcessBindMethod(dma_pusher, method_call);
         break;
     }
     case BufferMethods::Nop:
@@ -155,16 +144,16 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
     case BufferMethods::WrcacheFlush:
         break;
     case BufferMethods::RefCnt:
-        rasterizer->SignalReference();
+        dma_pusher.rasterizer->SignalReference();
         break;
     case BufferMethods::SyncpointOperation:
-        ProcessFenceActionMethod();
+        ProcessFenceActionMethod(dma_pusher);
         break;
     case BufferMethods::WaitForIdle:
-        rasterizer->WaitForIdle();
+        dma_pusher.rasterizer->WaitForIdle();
         break;
     case BufferMethods::SemaphoreOperation: {
-        ProcessSemaphoreTriggerMethod();
+        ProcessSemaphoreTriggerMethod(dma_pusher);
         break;
     }
     case BufferMethods::NonStallInterrupt: {
@@ -177,7 +166,7 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
     }
     case BufferMethods::MemOpB: {
         // Implement this better.
-        rasterizer->InvalidateGPUCache();
+        dma_pusher.rasterizer->InvalidateGPUCache();
         break;
     }
     case BufferMethods::MemOpC:
@@ -186,11 +175,11 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
         break;
     }
     case BufferMethods::SemaphoreAcquire: {
-        ProcessSemaphoreAcquire();
+        ProcessSemaphoreAcquire(dma_pusher);
         break;
     }
     case BufferMethods::SemaphoreRelease: {
-        ProcessSemaphoreRelease();
+        ProcessSemaphoreRelease(dma_pusher);
         break;
     }
     case BufferMethods::Yield: {
@@ -205,27 +194,26 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
 }
 
 /// Calls a GPU engine method.
-void Puller::CallEngineMethod(const MethodCall& method_call) {
+void Puller::CallEngineMethod(DmaPusher& dma_pusher, const MethodCall& method_call) {
     const EngineID engine = bound_engines[method_call.subchannel];
-
     switch (engine) {
     case EngineID::FERMI_TWOD_A:
-        channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
+        dma_pusher.channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
         break;
     case EngineID::MAXWELL_B:
-        channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
+        dma_pusher.channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
         break;
     case EngineID::KEPLER_COMPUTE_B:
-        channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
+        dma_pusher.channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
         break;
     case EngineID::MAXWELL_DMA_COPY_A:
-        channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
+        dma_pusher.channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
         break;
     case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-        channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
+        dma_pusher.channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
         break;
     case EngineID::NV01_TIMER:
-        channel_state.nv01_timer->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
+        dma_pusher.channel_state.nv01_timer->CallMethod(method_call.method, method_call.argument, method_call.IsLastCall());
         break;
     default:
         UNIMPLEMENTED_MSG("Unimplemented engine");
@@ -234,28 +222,26 @@ void Puller::CallEngineMethod(const MethodCall& method_call) {
 }
 
 /// Calls a GPU engine multivalue method.
-void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                                   u32 methods_pending) {
+void Puller::CallEngineMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending) {
     const EngineID engine = bound_engines[subchannel];
-
     switch (engine) {
     case EngineID::FERMI_TWOD_A:
-        channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
+        dma_pusher.channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
         break;
     case EngineID::MAXWELL_B:
-        channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
+        dma_pusher.channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
         break;
     case EngineID::KEPLER_COMPUTE_B:
-        channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
+        dma_pusher.channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
         break;
     case EngineID::MAXWELL_DMA_COPY_A:
-        channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
+        dma_pusher.channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
         break;
     case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-        channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
+        dma_pusher.channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
         break;
     case EngineID::NV01_TIMER:
-        channel_state.nv01_timer->CallMultiMethod(method, base_start, amount, methods_pending);
+        dma_pusher.channel_state.nv01_timer->CallMultiMethod(method, base_start, amount, methods_pending);
         break;
     default:
         UNIMPLEMENTED_MSG("Unimplemented engine");
@@ -264,31 +250,26 @@ void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_s
 }
 
 /// Calls a GPU method.
-void Puller::CallMethod(const MethodCall& method_call) {
-    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
-              method_call.subchannel);
-
+void Puller::CallMethod(DmaPusher& dma_pusher, const MethodCall& method_call) {
+    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method, method_call.subchannel);
     ASSERT(method_call.subchannel < bound_engines.size());
 
-    if (ExecuteMethodOnEngine(method_call.method)) {
-        CallEngineMethod(method_call);
+    if (ExecuteMethodOnEngine(dma_pusher, method_call.method)) {
+        CallEngineMethod(dma_pusher, method_call);
     } else {
-        CallPullerMethod(method_call);
+        CallPullerMethod(dma_pusher, method_call);
     }
 }
 
 /// Calls a GPU multivalue method.
-void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                             u32 methods_pending) {
+void Puller::CallMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending) {
     LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
-
     ASSERT(subchannel < bound_engines.size());
-
-    if (ExecuteMethodOnEngine(method)) {
-        CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
+    if (ExecuteMethodOnEngine(dma_pusher, method)) {
+        CallEngineMultiMethod(dma_pusher, method, subchannel, base_start, amount, methods_pending);
     } else {
         for (u32 i = 0; i < amount; i++) {
-            CallPullerMethod(MethodCall{
+            CallPullerMethod(dma_pusher, MethodCall{
                 method,
                 base_start[i],
                 subchannel,
@@ -298,13 +279,9 @@ void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start,
     }
 }
 
-void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
-    rasterizer = rasterizer_;
-}
-
 /// Determines where the method should be executed.
-[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
-    const auto buffer_method = static_cast<BufferMethods>(method);
+[[nodiscard]] bool Puller::ExecuteMethodOnEngine(DmaPusher& dma_pusher, u32 method) {
+    const auto buffer_method = BufferMethods(method);
     return buffer_method >= BufferMethods::NonPullerMethods;
 }
 
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h
index fe5102e3ed..e8994f5640 100644
--- a/src/video_core/engines/puller.h
+++ b/src/video_core/engines/puller.h
@@ -70,32 +70,13 @@ public:
         BitField<8, 24, u32> syncpoint_id;
     };
 
-    explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
-                    Control::ChannelState& channel_state);
-    ~Puller();
-
-    void CallMethod(const MethodCall& method_call);
-
-    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                         u32 methods_pending);
-
-    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
-
-    void CallPullerMethod(const MethodCall& method_call);
-
-    void CallEngineMethod(const MethodCall& method_call);
-
-    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                               u32 methods_pending);
-
+    void CallMethod(DmaPusher& dma_pusher, const MethodCall& method_call);
+    void CallMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending);
+    void BindRasterizer(DmaPusher& dma_pusher, VideoCore::RasterizerInterface* rasterizer);
+    void CallPullerMethod(DmaPusher& dma_pusher, const MethodCall& method_call);
+    void CallEngineMethod(DmaPusher& dma_pusher, const MethodCall& method_call);
+    void CallEngineMultiMethod(DmaPusher& dma_pusher, u32 method, u32 subchannel, const u32* base_start, u32 amount, u32 methods_pending);
 private:
-    Tegra::GPU& gpu;
-
-    MemoryManager& memory_manager;
-    DmaPusher& dma_pusher;
-    Control::ChannelState& channel_state;
-    VideoCore::RasterizerInterface* rasterizer = nullptr;
-
     static constexpr std::size_t NUM_REGS = 0x800;
     struct Regs {
         static constexpr size_t NUM_REGS = 0x40;
@@ -139,12 +120,12 @@ private:
         };
     } regs{};
 
-    void ProcessBindMethod(const MethodCall& method_call);
-    void ProcessFenceActionMethod();
-    void ProcessSemaphoreAcquire();
-    void ProcessSemaphoreRelease();
-    void ProcessSemaphoreTriggerMethod();
-    [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
+    void ProcessBindMethod(DmaPusher& dma_pusher, const MethodCall& method_call);
+    void ProcessFenceActionMethod(DmaPusher& dma_pusher);
+    void ProcessSemaphoreAcquire(DmaPusher& dma_pusher);
+    void ProcessSemaphoreRelease(DmaPusher& dma_pusher);
+    void ProcessSemaphoreTriggerMethod(DmaPusher& dma_pusher);
+    [[nodiscard]] bool ExecuteMethodOnEngine(DmaPusher& dma_pusher, u32 method);
 
     /// Mapping of command subchannels to their bound engine ids
     std::array<EngineID, 8> bound_engines{};
@@ -157,8 +138,7 @@ private:
     };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
-                  "Field " #field_name " has invalid position")
+    static_assert(offsetof(Regs, field_name) == position * 4, "Field " #field_name " has invalid position")
 
     ASSERT_REG_POSITION(semaphore_address, 0x4);
     ASSERT_REG_POSITION(semaphore_sequence, 0x6);
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 5f4054212f..44dbb93c3d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -40,30 +40,32 @@
 namespace Tegra {
 
 struct GPU::Impl {
-    explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
-        : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
-          shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
-          gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
+    explicit Impl(Core::System& system_, bool is_async_, bool use_nvdec_)
+        : system{system_}
+        , use_nvdec{use_nvdec_}
+        , shader_notify()
+        , is_async{is_async_}
+        , gpu_thread{system_, is_async_}
+        , scheduler(system_.GPU())
+    {}
 
     ~Impl() = default;
 
     std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) {
         auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id);
         channels.emplace(channel_id, channel_state);
-        scheduler->DeclareChannel(channel_state);
+        scheduler.DeclareChannel(channel_state);
         return channel_state;
     }
 
     void BindChannel(s32 channel_id) {
-        if (bound_channel == channel_id) {
-            return;
+        if (bound_channel != channel_id) {
+            auto it = channels.find(channel_id);
+            ASSERT(it != channels.end());
+            bound_channel = channel_id;
+            current_channel = it->second.get();
+            renderer->ReadRasterizer()->BindChannel(*current_channel);
         }
-        auto it = channels.find(channel_id);
-        ASSERT(it != channels.end());
-        bound_channel = channel_id;
-        current_channel = it->second.get();
-
-        rasterizer->BindChannel(*current_channel);
     }
 
     std::shared_ptr<Control::ChannelState> AllocateChannel() {
@@ -71,13 +73,13 @@ struct GPU::Impl {
     }
 
     void InitChannel(Control::ChannelState& to_init, u64 program_id) {
-        to_init.Init(system, gpu, program_id);
-        to_init.BindRasterizer(rasterizer);
-        rasterizer->InitializeChannel(to_init);
+        to_init.Init(system, program_id);
+        to_init.BindRasterizer(renderer->ReadRasterizer());
+        renderer->ReadRasterizer()->InitializeChannel(to_init);
     }
 
     void InitAddressSpace(Tegra::MemoryManager& memory_manager) {
-        memory_manager.BindRasterizer(rasterizer);
+        memory_manager.BindRasterizer(renderer->ReadRasterizer());
     }
 
     void ReleaseChannel(Control::ChannelState& to_release) {
@@ -87,26 +89,26 @@ struct GPU::Impl {
     /// Binds a renderer to the GPU.
     void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
         renderer = std::move(renderer_);
-        rasterizer = renderer->ReadRasterizer();
-        host1x.MemoryManager().BindInterface(rasterizer);
-        host1x.gmmu_manager.BindRasterizer(rasterizer);
+        system.Host1x().MemoryManager().BindInterface(renderer->ReadRasterizer());
+        system.Host1x().GMMU().BindRasterizer(renderer->ReadRasterizer());
     }
 
     /// Flush all current written commands into the host GPU for execution.
     void FlushCommands() {
-        rasterizer->FlushCommands();
+        renderer->ReadRasterizer()->FlushCommands();
     }
 
     /// Synchronizes CPU writes with Host GPU memory.
     void InvalidateGPUCache() {
-        std::function<void(PAddr, size_t)> callback_writes(
-            [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
+        std::function<void(PAddr, size_t)> callback_writes([this](PAddr address, size_t size) {
+            renderer->ReadRasterizer()->OnCacheInvalidation(address, size);
+        });
         system.GatherGPUDirtyMemory(callback_writes);
     }
 
     /// Signal the ending of command list.
     void OnCommandListEnd() {
-        rasterizer->ReleaseFences(false);
+        renderer->ReadRasterizer()->ReleaseFences(false);
         Settings::UpdateGPUAccuracy();
     }
 
@@ -143,62 +145,6 @@ struct GPU::Impl {
         }
     }
 
-    /// Returns a reference to the Maxwell3D GPU engine.
-    [[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
-        ASSERT(current_channel);
-        return *current_channel->maxwell_3d;
-    }
-
-    /// Returns a const reference to the Maxwell3D GPU engine.
-    [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
-        ASSERT(current_channel);
-        return *current_channel->maxwell_3d;
-    }
-
-    /// Returns a reference to the KeplerCompute GPU engine.
-    [[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
-        ASSERT(current_channel);
-        return *current_channel->kepler_compute;
-    }
-
-    /// Returns a reference to the KeplerCompute GPU engine.
-    [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
-        ASSERT(current_channel);
-        return *current_channel->kepler_compute;
-    }
-
-    /// Returns a reference to the GPU DMA pusher.
-    [[nodiscard]] Tegra::DmaPusher& DmaPusher() {
-        ASSERT(current_channel);
-        return *current_channel->dma_pusher;
-    }
-
-    /// Returns a const reference to the GPU DMA pusher.
-    [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
-        ASSERT(current_channel);
-        return *current_channel->dma_pusher;
-    }
-
-    /// Returns a reference to the underlying renderer.
-    [[nodiscard]] VideoCore::RendererBase& Renderer() {
-        return *renderer;
-    }
-
-    /// Returns a const reference to the underlying renderer.
-    [[nodiscard]] const VideoCore::RendererBase& Renderer() const {
-        return *renderer;
-    }
-
-    /// Returns a reference to the shader notifier.
-    [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() {
-        return *shader_notify;
-    }
-
-    /// Returns a const reference to the shader notifier.
-    [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const {
-        return *shader_notify;
-    }
-
     [[nodiscard]] u64 GetTicks() const {
         u64 gpu_tick = system.CoreTiming().GetGPUTicks();
         Settings::GpuOverclock overclock = Settings::values.fast_gpu_time.GetValue();
@@ -210,14 +156,6 @@ struct GPU::Impl {
         return gpu_tick;
     }
 
-    [[nodiscard]] bool IsAsync() const {
-        return is_async;
-    }
-
-    [[nodiscard]] bool UseNvdec() const {
-        return use_nvdec;
-    }
-
     void RendererFrameEndNotify() {
         system.GetPerfStats().EndGameFrame();
     }
@@ -227,7 +165,7 @@ struct GPU::Impl {
     /// core timing events.
     void Start() {
         Settings::UpdateGPUAccuracy();
-        gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
+        gpu_thread.StartThread(*renderer, renderer->Context(), scheduler);
     }
 
     void NotifyShutdown() {
@@ -260,14 +198,13 @@ struct GPU::Impl {
     }
 
     VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
-        auto raster_area = rasterizer->GetFlushArea(addr, size);
+        auto raster_area = renderer->ReadRasterizer()->GetFlushArea(addr, size);
         if (raster_area.preemtive) {
             return raster_area;
         }
         raster_area.preemtive = true;
         const u64 fence = RequestSyncOperation([this, &raster_area]() {
-            rasterizer->FlushRegion(raster_area.start_address,
-                                    raster_area.end_address - raster_area.start_address);
+            renderer->ReadRasterizer()->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address);
         });
         gpu_thread.TickGPU();
         WaitForSyncOperation(fence);
@@ -280,7 +217,7 @@ struct GPU::Impl {
     }
 
     bool OnCPUWrite(DAddr addr, u64 size) {
-        return rasterizer->OnCPUWrite(addr, size);
+        return renderer->ReadRasterizer()->OnCPUWrite(addr, size);
     }
 
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
@@ -305,7 +242,7 @@ struct GPU::Impl {
         }
         const auto wait_fence =
             RequestSyncOperation([this, current_request_counter, &layers, &fences, num_fences] {
-                auto& syncpoint_manager = host1x.GetSyncpointManager();
+                auto& syncpoint_manager = system.Host1x().GetSyncpointManager();
                 if (num_fences == 0) {
                     renderer->Composite(layers);
                 }
@@ -338,17 +275,14 @@ struct GPU::Impl {
         return out;
     }
 
-    GPU& gpu;
     Core::System& system;
-    Host1x::Host1x& host1x;
 
     std::unique_ptr<VideoCore::RendererBase> renderer;
-    VideoCore::RasterizerInterface* rasterizer = nullptr;
     const bool use_nvdec;
 
     s32 new_channel_id{1};
     /// Shader build notifier
-    std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
+    VideoCore::ShaderNotify shader_notify;
     /// When true, we are about to shut down emulation session, so terminate outstanding tasks
     std::atomic_bool shutting_down{};
 
@@ -372,7 +306,7 @@ struct GPU::Impl {
     VideoCommon::GPUThread::ThreadManager gpu_thread;
     std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
 
-    std::unique_ptr<Tegra::Control::Scheduler> scheduler;
+    Tegra::Control::Scheduler scheduler;
     ankerl::unordered_dense::map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
     Tegra::Control::ChannelState* current_channel;
     s32 bound_channel{-1};
@@ -383,7 +317,8 @@ struct GPU::Impl {
 };
 
 GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
-    : impl{std::make_unique<Impl>(*this, system, is_async, use_nvdec)} {}
+    : impl{std::make_unique<Impl>(system, is_async, use_nvdec)}
+{}
 
 GPU::~GPU() = default;
 
@@ -424,8 +359,9 @@ void GPU::OnCommandListEnd() {
 }
 
 u64 GPU::RequestFlush(DAddr addr, std::size_t size) {
-    return impl->RequestSyncOperation(
-        [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); });
+    return impl->RequestSyncOperation([this, addr, size]() {
+        impl->renderer->ReadRasterizer()->FlushRegion(addr, size);
+    });
 }
 
 u64 GPU::CurrentSyncRequestFence() const {
@@ -442,52 +378,52 @@ void GPU::TickWork() {
 
 /// Gets a mutable reference to the Host1x interface
 Host1x::Host1x& GPU::Host1x() {
-    return impl->host1x;
+    return impl->system.Host1x();
 }
 
 /// Gets an immutable reference to the Host1x interface.
 const Host1x::Host1x& GPU::Host1x() const {
-    return impl->host1x;
+    return impl->system.Host1x();
 }
 
 Engines::Maxwell3D& GPU::Maxwell3D() {
-    return impl->Maxwell3D();
+    return *impl->current_channel->maxwell_3d;
 }
 
 const Engines::Maxwell3D& GPU::Maxwell3D() const {
-    return impl->Maxwell3D();
+    return *impl->current_channel->maxwell_3d;
 }
 
 Engines::KeplerCompute& GPU::KeplerCompute() {
-    return impl->KeplerCompute();
+    return *impl->current_channel->kepler_compute;
 }
 
 const Engines::KeplerCompute& GPU::KeplerCompute() const {
-    return impl->KeplerCompute();
+    return *impl->current_channel->kepler_compute;
 }
 
 Tegra::DmaPusher& GPU::DmaPusher() {
-    return impl->DmaPusher();
+    return *impl->current_channel->dma_pusher;
 }
 
 const Tegra::DmaPusher& GPU::DmaPusher() const {
-    return impl->DmaPusher();
+    return *impl->current_channel->dma_pusher;
 }
 
 VideoCore::RendererBase& GPU::Renderer() {
-    return impl->Renderer();
+    return *impl->renderer;
 }
 
 const VideoCore::RendererBase& GPU::Renderer() const {
-    return impl->Renderer();
+    return *impl->renderer;
 }
 
 VideoCore::ShaderNotify& GPU::ShaderNotify() {
-    return impl->ShaderNotify();
+    return impl->shader_notify;
 }
 
 const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
-    return impl->ShaderNotify();
+    return impl->shader_notify;
 }
 
 void GPU::RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
@@ -504,11 +440,11 @@ u64 GPU::GetTicks() const {
 }
 
 bool GPU::IsAsync() const {
-    return impl->IsAsync();
+    return impl->is_async;
 }
 
 bool GPU::UseNvdec() const {
-    return impl->UseNvdec();
+    return impl->use_nvdec;
 }
 
 void GPU::RendererFrameEndNotify() {