From 4f2d46a25f293a0df40b07dfc9040bc7f5b270bf Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 17 Apr 2026 09:38:35 +0000 Subject: [PATCH] [dynarmic] use constant resolution instead of clobbering a register when doing spinlocks Signed-off-by: lizzie --- .../dynarmic/dynarmic_exclusive_monitor.cpp | 24 ++--- src/dynarmic/src/dynarmic/CMakeLists.txt | 4 - .../backend/arm64/a64_address_space.cpp | 18 ++-- .../backend/arm64/exclusive_monitor.cpp | 61 ------------- .../backend/riscv64/exclusive_monitor.cpp | 54 ------------ .../backend/x64/a32_emit_x64_memory.cpp | 50 ++--------- .../backend/x64/a64_emit_x64_memory.cpp | 62 +++---------- .../backend/x64/emit_x64_memory.cpp.inc | 44 +++++----- .../dynarmic/backend/x64/emit_x64_memory.h | 49 +++++------ .../src/dynarmic/common/spin_lock_x64.cpp | 50 ++++++----- .../src/dynarmic/common/spin_lock_x64.h | 4 +- .../dynarmic/interface/exclusive_monitor.h | 88 ++++++++++--------- 12 files changed, 161 insertions(+), 347 deletions(-) delete mode 100644 src/dynarmic/src/dynarmic/backend/arm64/exclusive_monitor.cpp delete mode 100644 src/dynarmic/src/dynarmic/backend/riscv64/exclusive_monitor.cpp diff --git a/src/core/arm/dynarmic/dynarmic_exclusive_monitor.cpp b/src/core/arm/dynarmic/dynarmic_exclusive_monitor.cpp index b5c9c43c42..40a837e457 100644 --- a/src/core/arm/dynarmic/dynarmic_exclusive_monitor.cpp +++ b/src/core/arm/dynarmic/dynarmic_exclusive_monitor.cpp @@ -7,28 +7,30 @@ namespace Core { DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count_) - : monitor{core_count_}, memory{memory_} {} + : monitor{} + , memory{memory_} +{} DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default; u8 DynarmicExclusiveMonitor::ExclusiveRead8(std::size_t core_index, VAddr addr) { - return monitor.ReadAndMark(core_index, addr, [&]() -> u8 { return memory.Read8(addr); }); + return monitor.ReadAndMark(core_index, addr, [=]() -> u8 { return memory.Read8(addr); }); } u16 DynarmicExclusiveMonitor::ExclusiveRead16(std::size_t core_index, VAddr addr) { - return monitor.ReadAndMark(core_index, addr, [&]() -> u16 { return memory.Read16(addr); }); + return monitor.ReadAndMark(core_index, addr, [=]() -> u16 { return memory.Read16(addr); }); } u32 DynarmicExclusiveMonitor::ExclusiveRead32(std::size_t core_index, VAddr addr) { - return monitor.ReadAndMark(core_index, addr, [&]() -> u32 { return memory.Read32(addr); }); + return monitor.ReadAndMark(core_index, addr, [=]() -> u32 { return memory.Read32(addr); }); } u64 DynarmicExclusiveMonitor::ExclusiveRead64(std::size_t core_index, VAddr addr) { - return monitor.ReadAndMark(core_index, addr, [&]() -> u64 { return memory.Read64(addr); }); + return monitor.ReadAndMark(core_index, addr, [=]() -> u64 { return memory.Read64(addr); }); } u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr addr) { - return monitor.ReadAndMark(core_index, addr, [&]() -> u128 { + return monitor.ReadAndMark(core_index, addr, [=]() -> u128 { u128 result; result[0] = memory.Read64(addr); result[1] = memory.Read64(addr + 8); @@ -41,31 +43,31 @@ void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) { } bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, [&](u8 expected) -> bool { + return monitor.DoExclusiveOperation(core_index, vaddr, [=](u8 expected) -> bool { return memory.WriteExclusive8(vaddr, value, expected); }); } bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, [&](u16 expected) -> bool { + return monitor.DoExclusiveOperation(core_index, vaddr, [=](u16 expected) -> bool { return memory.WriteExclusive16(vaddr, value, expected); }); } bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, [&](u32 expected) -> bool { + return monitor.DoExclusiveOperation(core_index, vaddr, [=](u32 expected) -> bool { return memory.WriteExclusive32(vaddr, value, expected); }); } bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, [&](u64 expected) -> bool { + return monitor.DoExclusiveOperation(core_index, vaddr, [=](u64 expected) -> bool { return memory.WriteExclusive64(vaddr, value, expected); }); } bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) { - return monitor.DoExclusiveOperation(core_index, vaddr, [&](u128 expected) -> bool { + return monitor.DoExclusiveOperation(core_index, vaddr, [=](u128 expected) -> bool { return memory.WriteExclusive128(vaddr, value, expected); }); } diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 45f4d3fdce..484e41b60a 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -169,8 +169,6 @@ if ("x86_64" IN_LIST ARCHITECTURE) backend/x64/emit_x64_vector.cpp backend/x64/emit_x64_vector_floating_point.cpp backend/x64/emit_x64_vector_saturation.cpp - backend/x64/exclusive_monitor.cpp - backend/x64/exclusive_monitor_friend.h backend/x64/host_feature.h backend/x64/hostloc.h backend/x64/jitstate_info.h @@ -231,7 +229,6 @@ if ("arm64" IN_LIST ARCHITECTURE) backend/arm64/emit_arm64_vector_floating_point.cpp backend/arm64/emit_arm64_vector_saturation.cpp backend/arm64/emit_context.h - backend/arm64/exclusive_monitor.cpp backend/arm64/fastmem.h backend/arm64/fpsr_manager.cpp backend/arm64/fpsr_manager.h @@ -278,7 +275,6 @@ if ("riscv64" IN_LIST ARCHITECTURE) backend/riscv64/emit_riscv64_vector.cpp backend/riscv64/emit_riscv64.cpp backend/riscv64/emit_riscv64.h - backend/riscv64/exclusive_monitor.cpp backend/riscv64/reg_alloc.cpp backend/riscv64/reg_alloc.h backend/riscv64/stack_layout.h diff --git a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp index 2c71ffe282..74451f19a8 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/a64_address_space.cpp @@ -135,12 +135,9 @@ static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const oaknut::Label l_addr, l_this; auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, T value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](T expected) -> bool { - return (conf.callbacks->*callback)(vaddr, value, expected); - }) - ? 0 - : 1; + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, [&](T expected) -> bool { + return (conf.callbacks->*callback)(vaddr, value, expected); + }) ? 0 : 1; }; void* target = code.xptr(); @@ -300,12 +297,9 @@ static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, co oaknut::Label l_addr, l_this; auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, Vector value) -> u32 { - return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, - [&](Vector expected) -> bool { - return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected); - }) - ? 0 - : 1; + return conf.global_monitor->DoExclusiveOperation(conf.processor_id, vaddr, [&](Vector expected) -> bool { + return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected); + }) ? 0 : 1; }; void* target = code.xptr(); diff --git a/src/dynarmic/src/dynarmic/backend/arm64/exclusive_monitor.cpp b/src/dynarmic/src/dynarmic/backend/arm64/exclusive_monitor.cpp deleted file mode 100644 index d9bc9b2395..0000000000 --- a/src/dynarmic/src/dynarmic/backend/arm64/exclusive_monitor.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -/* This file is part of the dynarmic project. - * Copyright (c) 2022 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/interface/exclusive_monitor.h" - -#include - -#include "common/assert.h" - -namespace Dynarmic { - -ExclusiveMonitor::ExclusiveMonitor(std::size_t processor_count) - : exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {} - -size_t ExclusiveMonitor::GetProcessorCount() const { - return exclusive_addresses.size(); -} - -void ExclusiveMonitor::Lock() { - lock.Lock(); -} - -void ExclusiveMonitor::Unlock() { - lock.Unlock(); -} - -bool ExclusiveMonitor::CheckAndClear(std::size_t processor_id, VAddr address) { - const VAddr masked_address = address & RESERVATION_GRANULE_MASK; - - Lock(); - if (exclusive_addresses[processor_id] != masked_address) { - Unlock(); - return false; - } - - for (VAddr& other_address : exclusive_addresses) { - if (other_address == masked_address) { - other_address = INVALID_EXCLUSIVE_ADDRESS; - } - } - return true; -} - -void ExclusiveMonitor::Clear() { - Lock(); - std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS); - Unlock(); -} - -void ExclusiveMonitor::ClearProcessor(std::size_t processor_id) { - Lock(); - exclusive_addresses[processor_id] = INVALID_EXCLUSIVE_ADDRESS; - Unlock(); -} - -} // namespace Dynarmic diff --git a/src/dynarmic/src/dynarmic/backend/riscv64/exclusive_monitor.cpp b/src/dynarmic/src/dynarmic/backend/riscv64/exclusive_monitor.cpp deleted file mode 100644 index b3585c64ea..0000000000 --- a/src/dynarmic/src/dynarmic/backend/riscv64/exclusive_monitor.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "dynarmic/interface/exclusive_monitor.h" - -#include - -namespace Dynarmic { - -ExclusiveMonitor::ExclusiveMonitor(std::size_t processor_count) - : exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {} - -size_t ExclusiveMonitor::GetProcessorCount() const { - return exclusive_addresses.size(); -} - -void ExclusiveMonitor::Lock() { - lock.Lock(); -} - -void ExclusiveMonitor::Unlock() { - lock.Unlock(); -} - -bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address) { - const VAddr masked_address = address & RESERVATION_GRANULE_MASK; - - Lock(); - if (exclusive_addresses[processor_id] != masked_address) { - Unlock(); - return false; - } - - for (VAddr& other_address : exclusive_addresses) { - if (other_address == masked_address) { - other_address = INVALID_EXCLUSIVE_ADDRESS; - } - } - return true; -} - -void ExclusiveMonitor::Clear() { - Lock(); - std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS); - Unlock(); -} - -void ExclusiveMonitor::ClearProcessor(size_t processor_id) { - Lock(); - exclusive_addresses[processor_id] = INVALID_EXCLUSIVE_ADDRESS; - Unlock(); -} - -} // namespace Dynarmic diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp index f138b5f137..c72d669b5b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64_memory.cpp @@ -20,7 +20,7 @@ #include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/devirtualize.h" #include "dynarmic/backend/x64/emit_x64_memory.h" -#include "dynarmic/backend/x64/exclusive_monitor_friend.h" +#include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/interface/exclusive_monitor.h" @@ -174,67 +174,35 @@ void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) { } void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveReadMemoryInline<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); - } else { - EmitExclusiveReadMemory<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); - } + EmitExclusiveReadMemoryInline<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst); } void A32EmitX64::EmitA32ExclusiveReadMemory16(A32EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveReadMemoryInline<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); - } else { - EmitExclusiveReadMemory<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); - } + EmitExclusiveReadMemoryInline<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst); } void A32EmitX64::EmitA32ExclusiveReadMemory32(A32EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveReadMemoryInline<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); - } else { - EmitExclusiveReadMemory<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); - } + EmitExclusiveReadMemoryInline<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst); } void A32EmitX64::EmitA32ExclusiveReadMemory64(A32EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveReadMemoryInline<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); - } else { - EmitExclusiveReadMemory<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); - } + EmitExclusiveReadMemoryInline<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst); } void A32EmitX64::EmitA32ExclusiveWriteMemory8(A32EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveWriteMemoryInline<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); - } else { - EmitExclusiveWriteMemory<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); - } + EmitExclusiveWriteMemoryInline<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); } void A32EmitX64::EmitA32ExclusiveWriteMemory16(A32EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveWriteMemoryInline<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); - } else { - EmitExclusiveWriteMemory<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); - } + EmitExclusiveWriteMemoryInline<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); } void A32EmitX64::EmitA32ExclusiveWriteMemory32(A32EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveWriteMemoryInline<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); - } else { - EmitExclusiveWriteMemory<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); - } + EmitExclusiveWriteMemoryInline<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); } void A32EmitX64::EmitA32ExclusiveWriteMemory64(A32EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveWriteMemoryInline<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); - } else { - EmitExclusiveWriteMemory<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); - } + EmitExclusiveWriteMemoryInline<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); } void A32EmitX64::EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak::Label* end) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp index 4b7054b4b2..1070ee8d7e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64_memory.cpp @@ -20,7 +20,7 @@ #include "dynarmic/backend/x64/abi.h" #include "dynarmic/backend/x64/devirtualize.h" #include "dynarmic/backend/x64/emit_x64_memory.h" -#include "dynarmic/backend/x64/exclusive_monitor_friend.h" +#include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/backend/x64/perf_map.h" #include "dynarmic/common/spin_lock_x64.h" #include "dynarmic/interface/exclusive_monitor.h" @@ -330,83 +330,43 @@ void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) { } void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveReadMemoryInline<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst); - } else { - EmitExclusiveReadMemory<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst); - } + EmitExclusiveReadMemoryInline<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst); } void A64EmitX64::EmitA64ExclusiveReadMemory16(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveReadMemoryInline<16, &A64::UserCallbacks::MemoryRead16>(ctx, inst); - } else { - EmitExclusiveReadMemory<16, &A64::UserCallbacks::MemoryRead16>(ctx, inst); - } + EmitExclusiveReadMemoryInline<16, &A64::UserCallbacks::MemoryRead16>(ctx, inst); } void A64EmitX64::EmitA64ExclusiveReadMemory32(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveReadMemoryInline<32, &A64::UserCallbacks::MemoryRead32>(ctx, inst); - } else { - EmitExclusiveReadMemory<32, &A64::UserCallbacks::MemoryRead32>(ctx, inst); - } + EmitExclusiveReadMemoryInline<32, &A64::UserCallbacks::MemoryRead32>(ctx, inst); } void A64EmitX64::EmitA64ExclusiveReadMemory64(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveReadMemoryInline<64, &A64::UserCallbacks::MemoryRead64>(ctx, inst); - } else { - EmitExclusiveReadMemory<64, &A64::UserCallbacks::MemoryRead64>(ctx, inst); - } + EmitExclusiveReadMemoryInline<64, &A64::UserCallbacks::MemoryRead64>(ctx, inst); } void A64EmitX64::EmitA64ExclusiveReadMemory128(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveReadMemoryInline<128, &A64::UserCallbacks::MemoryRead128>(ctx, inst); - } else { - EmitExclusiveReadMemory<128, &A64::UserCallbacks::MemoryRead128>(ctx, inst); - } + EmitExclusiveReadMemoryInline<128, &A64::UserCallbacks::MemoryRead128>(ctx, inst); } void A64EmitX64::EmitA64ExclusiveWriteMemory8(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveWriteMemoryInline<8, &A64::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); - } else { - EmitExclusiveWriteMemory<8, &A64::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); - } + EmitExclusiveWriteMemoryInline<8, &A64::UserCallbacks::MemoryWriteExclusive8>(ctx, inst); } void A64EmitX64::EmitA64ExclusiveWriteMemory16(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveWriteMemoryInline<16, &A64::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); - } else { - EmitExclusiveWriteMemory<16, &A64::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); - } + EmitExclusiveWriteMemoryInline<16, &A64::UserCallbacks::MemoryWriteExclusive16>(ctx, inst); } void A64EmitX64::EmitA64ExclusiveWriteMemory32(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveWriteMemoryInline<32, &A64::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); - } else { - EmitExclusiveWriteMemory<32, &A64::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); - } + EmitExclusiveWriteMemoryInline<32, &A64::UserCallbacks::MemoryWriteExclusive32>(ctx, inst); } void A64EmitX64::EmitA64ExclusiveWriteMemory64(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveWriteMemoryInline<64, &A64::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); - } else { - EmitExclusiveWriteMemory<64, &A64::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); - } + EmitExclusiveWriteMemoryInline<64, &A64::UserCallbacks::MemoryWriteExclusive64>(ctx, inst); } void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* inst) { - if (conf.fastmem_exclusive_access) { - EmitExclusiveWriteMemoryInline<128, &A64::UserCallbacks::MemoryWriteExclusive128>(ctx, inst); - } else { - EmitExclusiveWriteMemory<128, &A64::UserCallbacks::MemoryWriteExclusive128>(ctx, inst); - } + EmitExclusiveWriteMemoryInline<128, &A64::UserCallbacks::MemoryWriteExclusive128>(ctx, inst); } void A64EmitX64::EmitCheckMemoryAbort(A64EmitContext&, IR::Inst* inst, Xbyak::Label* end) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 4fa14d504b..4cf21948ba 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -230,12 +230,11 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { if (ordered) { code.mfence(); } - code.CallLambda( - [](AxxUserConfig& conf, Axx::VAddr vaddr) -> T { - return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { - return (conf.callbacks->*callback)(vaddr); - }); + code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr) -> T { + return conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> T { + return (conf.callbacks->*callback)(vaddr); }); + }); code.ZeroExtendFrom(bitsize, code.ABI_RETURN); } else { const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); @@ -250,12 +249,11 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { if (ordered) { code.mfence(); } - code.CallLambda( - [](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& ret) { - ret = conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> Vector { - return (conf.callbacks->*callback)(vaddr); - }); + code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& ret) { + ret = conf.global_monitor->ReadAndMark(conf.processor_id, vaddr, [&]() -> Vector { + return (conf.callbacks->*callback)(vaddr); }); + }); code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE); @@ -320,11 +318,12 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { template void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* inst) { - ASSERT(conf.global_monitor && conf.fastmem_pointer); - if (!exception_handler.SupportsFastmem()) { + ASSERT(conf.global_monitor); + if (!conf.fastmem_exclusive_access || !exception_handler.SupportsFastmem()) { EmitExclusiveReadMemory(ctx, inst); return; } + ASSERT(conf.fastmem_pointer); auto args = ctx.reg_alloc.GetArgumentInfo(inst); constexpr bool ordered = true; @@ -344,10 +343,10 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)]; - EmitExclusiveLock(code, conf, tmp, tmp2.cvt32()); + EmitExclusiveLock(code, conf, tmp2.cvt32()); code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); - code.mov(tmp, std::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); + code.mov(tmp, std::bit_cast(conf.global_monitor->exclusive_addresses.data() + conf.processor_id)); code.mov(qword[tmp], vaddr); const auto fastmem_marker = ShouldFastmem(ctx, inst); @@ -381,10 +380,10 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in code.call(wrapped_fn); } - code.mov(tmp, std::bit_cast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); + code.mov(tmp, std::bit_cast(conf.global_monitor->exclusive_addresses.data() + conf.processor_id)); EmitWriteMemoryMov(code, tmp, value_idx, false); - EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32()); + EmitExclusiveUnlock(code, conf, tmp2.cvt32()); if constexpr (bitsize == 128) { ctx.reg_alloc.DefineValue(code, inst, Xbyak::Xmm{value_idx}); @@ -397,11 +396,12 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in template void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* inst) { - ASSERT(conf.global_monitor && conf.fastmem_pointer); - if (!exception_handler.SupportsFastmem()) { + ASSERT(conf.global_monitor); + if (!conf.fastmem_exclusive_access || !exception_handler.SupportsFastmem()) { EmitExclusiveWriteMemory(ctx, inst); return; } + ASSERT(conf.fastmem_pointer); auto args = ctx.reg_alloc.GetArgumentInfo(inst); constexpr bool ordered = true; @@ -425,7 +425,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())]; - EmitExclusiveLock(code, conf, tmp, tmp2.cvt32()); + EmitExclusiveLock(code, conf, tmp2.cvt32()); SharedLabel end = ctx.GenSharedLabel(); @@ -433,14 +433,14 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); code.test(tmp.cvt8(), tmp.cvt8()); code.je(*end, code.T_NEAR); - code.mov(tmp, std::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id))); + code.mov(tmp, std::bit_cast(conf.global_monitor->exclusive_addresses.data() + conf.processor_id)); code.cmp(qword[tmp], vaddr); code.jne(*end, code.T_NEAR); EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax); code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0)); - code.mov(tmp, std::bit_cast(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id))); + code.mov(tmp, std::bit_cast(conf.global_monitor->exclusive_addresses.data() + conf.processor_id)); if constexpr (bitsize == 128) { code.mov(rax, qword[tmp + 0]); @@ -519,7 +519,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i } code.L(*end); - EmitExclusiveUnlock(code, conf, tmp, eax); + EmitExclusiveUnlock(code, conf, eax); ctx.reg_alloc.DefineValue(code, inst, status); EmitCheckMemoryAbort(ctx, inst); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index 3ac078f1d7..4581f9d7c2 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -13,7 +13,7 @@ #include "dynarmic/backend/x64/a32_emit_x64.h" #include "dynarmic/backend/x64/a64_emit_x64.h" -#include "dynarmic/backend/x64/exclusive_monitor_friend.h" +#include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/common/spin_lock_x64.h" #include "dynarmic/interface/exclusive_monitor.h" #include "dynarmic/ir/acc_type.h" @@ -344,43 +344,36 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int } template -void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 pointer, Xbyak::Reg32 tmp) { - if (conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) { - return; +void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg32 tmp) { + if (!conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) { + u64 const slp = std::bit_cast(std::addressof(conf.global_monitor->lock.storage)); + EmitSpinLockLock(code, dword[slp], tmp, code.HasHostFeature(HostFeature::WAITPKG)); } - - code.mov(pointer, std::bit_cast(GetExclusiveMonitorLockPointer(conf.global_monitor))); - EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG)); } template -void EmitExclusiveUnlock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 pointer, Xbyak::Reg32 tmp) { - if (conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) { - return; +void EmitExclusiveUnlock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg32 tmp) { + if (!conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) { + u64 const slp = std::bit_cast(std::addressof(conf.global_monitor->lock.storage)); + EmitSpinLockUnlock(code, dword[slp], tmp); } - - code.mov(pointer, std::bit_cast(GetExclusiveMonitorLockPointer(conf.global_monitor))); - EmitSpinLockUnlock(code, pointer, tmp); } template void EmitExclusiveTestAndClear(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 vaddr, Xbyak::Reg64 pointer, Xbyak::Reg64 tmp) { - if (conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) { - return; - } - - code.mov(tmp, 0xDEAD'DEAD'DEAD'DEAD); - const size_t processor_count = GetExclusiveMonitorProcessorCount(conf.global_monitor); - for (size_t processor_index = 0; processor_index < processor_count; processor_index++) { - if (processor_index == conf.processor_id) { - continue; + if (!conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) { + code.mov(tmp, 0xDEAD'DEAD'DEAD'DEAD); + static_assert(ExclusiveMonitor::MAX_NUM_CPU_CORES == 4); + for (size_t i = 0; i < ExclusiveMonitor::MAX_NUM_CPU_CORES; i++) { + if (i != conf.processor_id) { + Xbyak::Label ok; + code.mov(pointer, std::bit_cast(conf.global_monitor->exclusive_addresses.data() + i)); + code.cmp(qword[pointer], vaddr); + code.jne(ok, code.T_NEAR); + code.mov(qword[pointer], tmp); + code.L(ok); + } } - Xbyak::Label ok; - code.mov(pointer, std::bit_cast(GetExclusiveMonitorAddressPointer(conf.global_monitor, processor_index))); - code.cmp(qword[pointer], vaddr); - code.jne(ok, code.T_NEAR); - code.mov(qword[pointer], tmp); - code.L(ok); } } diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp index 7607bbaafc..969632364c 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp @@ -22,20 +22,22 @@ static const auto default_cg_mode = nullptr; //Allow RWE namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) { +/// @brief Emits a lock path for a given spinlock +/// @arg ptr Operand must be a dword[ptr] +/// @arg waitpkg Whetever or not the "UMWAIT" instruction can be used +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Address ptr, Xbyak::Reg32 tmp, bool waitpkg) { // TODO: this is because we lack regalloc - so better to be safe :( if (waitpkg) { code.push(Xbyak::util::eax); code.push(Xbyak::util::ebx); code.push(Xbyak::util::edx); - } - Xbyak::Label start, loop; - code.jmp(start, code.T_NEAR); - code.L(loop); - if (waitpkg) { + Xbyak::Label start, loop; + code.jmp(start, code.T_NEAR); + code.L(loop); // TODO: This clobbers EAX and EDX did we tell the regalloc? // ARM ptr for address-monitoring - code.umonitor(ptr); + code.mov(Xbyak::util::eax, ptr); + code.umonitor(Xbyak::util::eax); // tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings // tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings // edx:eax is implicitly used as a 64-bit deadline timestamp @@ -49,24 +51,31 @@ void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 code.umwait(Xbyak::util::ebx); // CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write // CF == 0 if we exited the wait for any other reason - } else { - code.pause(); - } - code.L(start); - code.mov(tmp, 1); - /*code.lock();*/ code.xchg(code.dword[ptr], tmp); - code.test(tmp, tmp); - code.jnz(loop, code.T_NEAR); - if (waitpkg) { + code.L(start); + code.mov(tmp, 1); + /*code.lock();*/ code.xchg(ptr, tmp); + code.test(tmp, tmp); + code.jnz(loop, code.T_NEAR); code.pop(Xbyak::util::edx); code.pop(Xbyak::util::ebx); code.pop(Xbyak::util::eax); + } else { + Xbyak::Label start, loop; + code.jmp(start, code.T_NEAR); + code.L(loop); + code.pause(); + code.L(start); + code.mov(tmp, 1); + /*code.lock();*/ code.xchg(ptr, tmp); + code.test(tmp, tmp); + code.jnz(loop, code.T_NEAR); } } -void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { +// ptr operand must be a dword[ptr] +void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Address ptr, Xbyak::Reg32 tmp) { code.xor_(tmp, tmp); - code.xchg(code.dword[ptr], tmp); + code.xchg(ptr, tmp); code.mfence(); } @@ -89,11 +98,12 @@ void SpinLockImpl::Initialize() noexcept { Xbyak::Reg64 const ABI_PARAM1 = Backend::X64::HostLocToReg64(Backend::X64::ABI_PARAM1); code.align(); lock = code.getCurr(); - EmitSpinLockLock(code, ABI_PARAM1, code.eax, false); + EmitSpinLockLock(code, code.dword[ABI_PARAM1], code.eax, false); code.ret(); + code.align(); unlock = code.getCurr(); - EmitSpinLockUnlock(code, ABI_PARAM1, code.eax); + EmitSpinLockUnlock(code, code.dword[ABI_PARAM1], code.eax); code.ret(); } diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h index b17d4f67f8..f2b01895cb 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h @@ -12,7 +12,7 @@ namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg); -void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Address ptr, Xbyak::Reg32 tmp, bool waitpkg); +void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Address ptr, Xbyak::Reg32 tmp); } // namespace Dynarmic diff --git a/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h b/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h index 566743c767..cf036bd270 100644 --- a/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h +++ b/src/dynarmic/src/dynarmic/interface/exclusive_monitor.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD @@ -20,68 +23,71 @@ using Vector = std::array; class ExclusiveMonitor { public: - /// @param processor_count Maximum number of processors using this global - /// exclusive monitor. Each processor must have a - /// unique id. - explicit ExclusiveMonitor(size_t processor_count); - - size_t GetProcessorCount() const; + explicit ExclusiveMonitor() noexcept { + std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS); + } /// Marks a region containing [address, address+size) to be exclusive to - /// processor processor_id. - template - T ReadAndMark(size_t processor_id, VAddr address, Function op) { + /// processor index. + template + [[nodiscard]] inline T ReadAndMark(std::size_t index, VAddr address, F f) { static_assert(std::is_trivially_copyable_v); const VAddr masked_address = address & RESERVATION_GRANULE_MASK; - - Lock(); - exclusive_addresses[processor_id] = masked_address; - const T value = op(); - std::memcpy(exclusive_values[processor_id].data(), &value, sizeof(T)); - Unlock(); + lock.Lock(); + exclusive_addresses[index] = masked_address; + T const value = f(); + std::memcpy(exclusive_values[index].data(), std::addressof(value), sizeof(T)); + lock.Unlock(); return value; } - /// Checks to see if processor processor_id has exclusive access to the + [[nodiscard]] inline bool CheckAndClear(std::size_t index, VAddr address) { + const VAddr masked_address = address & RESERVATION_GRANULE_MASK; + if (exclusive_addresses[index] != masked_address) + return false; + for (VAddr& other_address : exclusive_addresses) + if (other_address == masked_address) + other_address = INVALID_EXCLUSIVE_ADDRESS; + return true; + } + + /// Checks to see if processor index has exclusive access to the /// specified region. If it does, executes the operation then clears /// the exclusive state for processors if their exclusive region(s) /// contain [address, address+size). - template - bool DoExclusiveOperation(size_t processor_id, VAddr address, Function op) { + template + [[nodiscard]] inline bool DoExclusiveOperation(std::size_t index, VAddr address, F&& f) { static_assert(std::is_trivially_copyable_v); - if (!CheckAndClear(processor_id, address)) { - return false; + bool result = false; + lock.Lock(); + if (CheckAndClear(index, address)) { + T saved_value{}; + std::memcpy(std::addressof(saved_value), exclusive_values[index].data(), sizeof(T)); + result = f(saved_value); } - - T saved_value; - std::memcpy(&saved_value, exclusive_values[processor_id].data(), sizeof(T)); - const bool result = op(saved_value); - - Unlock(); + lock.Unlock(); return result; } /// Unmark everything. - void Clear(); + inline void Clear() { + lock.Lock(); + std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS); + lock.Unlock(); + } + /// Unmark processor id - void ClearProcessor(size_t processor_id); - -private: - bool CheckAndClear(size_t processor_id, VAddr address); - - void Lock(); - void Unlock(); - - friend volatile int* GetExclusiveMonitorLockPointer(ExclusiveMonitor*); - friend size_t GetExclusiveMonitorProcessorCount(ExclusiveMonitor*); - friend VAddr* GetExclusiveMonitorAddressPointer(ExclusiveMonitor*, size_t index); - friend Vector* GetExclusiveMonitorValuePointer(ExclusiveMonitor*, size_t index); + inline void ClearProcessor(size_t index) { + lock.Lock(); + exclusive_addresses[index] = INVALID_EXCLUSIVE_ADDRESS; + lock.Unlock(); + } static constexpr VAddr RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFFFull; static constexpr VAddr INVALID_EXCLUSIVE_ADDRESS = 0xDEAD'DEAD'DEAD'DEADull; static constexpr size_t MAX_NUM_CPU_CORES = 4; // Sync with src/core/hardware_properties - boost::container::static_vector exclusive_addresses; - boost::container::static_vector exclusive_values; + std::array exclusive_addresses; + std::array exclusive_values; SpinLock lock; };