[dynarmic] use constant resolution instead of clobbering a register when doing spinlocks

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-04-17 09:38:35 +00:00
parent 3f746b2cbe
commit 4f2d46a25f
12 changed files with 161 additions and 347 deletions

View file

@ -7,28 +7,30 @@
namespace Core {
DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::Memory& memory_, std::size_t core_count_)
: monitor{core_count_}, memory{memory_} {}
: monitor{}
, memory{memory_}
{}
DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
u8 DynarmicExclusiveMonitor::ExclusiveRead8(std::size_t core_index, VAddr addr) {
return monitor.ReadAndMark<u8>(core_index, addr, [&]() -> u8 { return memory.Read8(addr); });
return monitor.ReadAndMark<u8>(core_index, addr, [=]() -> u8 { return memory.Read8(addr); });
}
u16 DynarmicExclusiveMonitor::ExclusiveRead16(std::size_t core_index, VAddr addr) {
return monitor.ReadAndMark<u16>(core_index, addr, [&]() -> u16 { return memory.Read16(addr); });
return monitor.ReadAndMark<u16>(core_index, addr, [=]() -> u16 { return memory.Read16(addr); });
}
u32 DynarmicExclusiveMonitor::ExclusiveRead32(std::size_t core_index, VAddr addr) {
return monitor.ReadAndMark<u32>(core_index, addr, [&]() -> u32 { return memory.Read32(addr); });
return monitor.ReadAndMark<u32>(core_index, addr, [=]() -> u32 { return memory.Read32(addr); });
}
u64 DynarmicExclusiveMonitor::ExclusiveRead64(std::size_t core_index, VAddr addr) {
return monitor.ReadAndMark<u64>(core_index, addr, [&]() -> u64 { return memory.Read64(addr); });
return monitor.ReadAndMark<u64>(core_index, addr, [=]() -> u64 { return memory.Read64(addr); });
}
u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr addr) {
return monitor.ReadAndMark<u128>(core_index, addr, [&]() -> u128 {
return monitor.ReadAndMark<u128>(core_index, addr, [=]() -> u128 {
u128 result;
result[0] = memory.Read64(addr);
result[1] = memory.Read64(addr + 8);
@ -41,31 +43,31 @@ void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) {
}
bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
return monitor.DoExclusiveOperation<u8>(core_index, vaddr, [&](u8 expected) -> bool {
return monitor.DoExclusiveOperation<u8>(core_index, vaddr, [=](u8 expected) -> bool {
return memory.WriteExclusive8(vaddr, value, expected);
});
}
bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
return monitor.DoExclusiveOperation<u16>(core_index, vaddr, [&](u16 expected) -> bool {
return monitor.DoExclusiveOperation<u16>(core_index, vaddr, [=](u16 expected) -> bool {
return memory.WriteExclusive16(vaddr, value, expected);
});
}
bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
return monitor.DoExclusiveOperation<u32>(core_index, vaddr, [&](u32 expected) -> bool {
return monitor.DoExclusiveOperation<u32>(core_index, vaddr, [=](u32 expected) -> bool {
return memory.WriteExclusive32(vaddr, value, expected);
});
}
bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
return monitor.DoExclusiveOperation<u64>(core_index, vaddr, [&](u64 expected) -> bool {
return monitor.DoExclusiveOperation<u64>(core_index, vaddr, [=](u64 expected) -> bool {
return memory.WriteExclusive64(vaddr, value, expected);
});
}
bool DynarmicExclusiveMonitor::ExclusiveWrite128(std::size_t core_index, VAddr vaddr, u128 value) {
return monitor.DoExclusiveOperation<u128>(core_index, vaddr, [&](u128 expected) -> bool {
return monitor.DoExclusiveOperation<u128>(core_index, vaddr, [=](u128 expected) -> bool {
return memory.WriteExclusive128(vaddr, value, expected);
});
}

View file

@ -169,8 +169,6 @@ if ("x86_64" IN_LIST ARCHITECTURE)
backend/x64/emit_x64_vector.cpp
backend/x64/emit_x64_vector_floating_point.cpp
backend/x64/emit_x64_vector_saturation.cpp
backend/x64/exclusive_monitor.cpp
backend/x64/exclusive_monitor_friend.h
backend/x64/host_feature.h
backend/x64/hostloc.h
backend/x64/jitstate_info.h
@ -231,7 +229,6 @@ if ("arm64" IN_LIST ARCHITECTURE)
backend/arm64/emit_arm64_vector_floating_point.cpp
backend/arm64/emit_arm64_vector_saturation.cpp
backend/arm64/emit_context.h
backend/arm64/exclusive_monitor.cpp
backend/arm64/fastmem.h
backend/arm64/fpsr_manager.cpp
backend/arm64/fpsr_manager.h
@ -278,7 +275,6 @@ if ("riscv64" IN_LIST ARCHITECTURE)
backend/riscv64/emit_riscv64_vector.cpp
backend/riscv64/emit_riscv64.cpp
backend/riscv64/emit_riscv64.h
backend/riscv64/exclusive_monitor.cpp
backend/riscv64/reg_alloc.cpp
backend/riscv64/reg_alloc.h
backend/riscv64/stack_layout.h

View file

@ -135,12 +135,9 @@ static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
})
? 0
: 1;
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr, [&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
}) ? 0 : 1;
};
void* target = code.xptr<void*>();
@ -300,12 +297,9 @@ static void* EmitExclusiveWrite128CallTrampoline(oaknut::CodeGenerator& code, co
oaknut::Label l_addr, l_this;
auto fn = [](const A64::UserConfig& conf, A64::VAddr vaddr, Vector value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr,
[&](Vector expected) -> bool {
return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected);
})
? 0
: 1;
return conf.global_monitor->DoExclusiveOperation<Vector>(conf.processor_id, vaddr, [&](Vector expected) -> bool {
return conf.callbacks->MemoryWriteExclusive128(vaddr, value, expected);
}) ? 0 : 1;
};
void* target = code.xptr<void*>();

View file

@ -1,61 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/interface/exclusive_monitor.h"
#include <algorithm>
#include "common/assert.h"
namespace Dynarmic {
ExclusiveMonitor::ExclusiveMonitor(std::size_t processor_count)
: exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {}
size_t ExclusiveMonitor::GetProcessorCount() const {
return exclusive_addresses.size();
}
void ExclusiveMonitor::Lock() {
lock.Lock();
}
void ExclusiveMonitor::Unlock() {
lock.Unlock();
}
bool ExclusiveMonitor::CheckAndClear(std::size_t processor_id, VAddr address) {
const VAddr masked_address = address & RESERVATION_GRANULE_MASK;
Lock();
if (exclusive_addresses[processor_id] != masked_address) {
Unlock();
return false;
}
for (VAddr& other_address : exclusive_addresses) {
if (other_address == masked_address) {
other_address = INVALID_EXCLUSIVE_ADDRESS;
}
}
return true;
}
void ExclusiveMonitor::Clear() {
Lock();
std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS);
Unlock();
}
void ExclusiveMonitor::ClearProcessor(std::size_t processor_id) {
Lock();
exclusive_addresses[processor_id] = INVALID_EXCLUSIVE_ADDRESS;
Unlock();
}
} // namespace Dynarmic

View file

@ -1,54 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#include "dynarmic/interface/exclusive_monitor.h"
#include <algorithm>
namespace Dynarmic {
ExclusiveMonitor::ExclusiveMonitor(std::size_t processor_count)
: exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {}
size_t ExclusiveMonitor::GetProcessorCount() const {
return exclusive_addresses.size();
}
void ExclusiveMonitor::Lock() {
lock.Lock();
}
void ExclusiveMonitor::Unlock() {
lock.Unlock();
}
bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address) {
const VAddr masked_address = address & RESERVATION_GRANULE_MASK;
Lock();
if (exclusive_addresses[processor_id] != masked_address) {
Unlock();
return false;
}
for (VAddr& other_address : exclusive_addresses) {
if (other_address == masked_address) {
other_address = INVALID_EXCLUSIVE_ADDRESS;
}
}
return true;
}
void ExclusiveMonitor::Clear() {
Lock();
std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS);
Unlock();
}
void ExclusiveMonitor::ClearProcessor(size_t processor_id) {
Lock();
exclusive_addresses[processor_id] = INVALID_EXCLUSIVE_ADDRESS;
Unlock();
}
} // namespace Dynarmic

View file

@ -20,7 +20,7 @@
#include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/devirtualize.h"
#include "dynarmic/backend/x64/emit_x64_memory.h"
#include "dynarmic/backend/x64/exclusive_monitor_friend.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/backend/x64/perf_map.h"
#include "dynarmic/interface/exclusive_monitor.h"
@ -174,67 +174,35 @@ void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) {
}
void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveReadMemoryInline<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst);
} else {
EmitExclusiveReadMemory<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst);
}
EmitExclusiveReadMemoryInline<8, &A32::UserCallbacks::MemoryRead8>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveReadMemory16(A32EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveReadMemoryInline<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst);
} else {
EmitExclusiveReadMemory<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst);
}
EmitExclusiveReadMemoryInline<16, &A32::UserCallbacks::MemoryRead16>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveReadMemory32(A32EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveReadMemoryInline<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst);
} else {
EmitExclusiveReadMemory<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst);
}
EmitExclusiveReadMemoryInline<32, &A32::UserCallbacks::MemoryRead32>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveReadMemory64(A32EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveReadMemoryInline<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst);
} else {
EmitExclusiveReadMemory<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst);
}
EmitExclusiveReadMemoryInline<64, &A32::UserCallbacks::MemoryRead64>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveWriteMemory8(A32EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveWriteMemoryInline<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst);
} else {
EmitExclusiveWriteMemory<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst);
}
EmitExclusiveWriteMemoryInline<8, &A32::UserCallbacks::MemoryWriteExclusive8>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveWriteMemory16(A32EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveWriteMemoryInline<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst);
} else {
EmitExclusiveWriteMemory<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst);
}
EmitExclusiveWriteMemoryInline<16, &A32::UserCallbacks::MemoryWriteExclusive16>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveWriteMemory32(A32EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveWriteMemoryInline<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst);
} else {
EmitExclusiveWriteMemory<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst);
}
EmitExclusiveWriteMemoryInline<32, &A32::UserCallbacks::MemoryWriteExclusive32>(ctx, inst);
}
void A32EmitX64::EmitA32ExclusiveWriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveWriteMemoryInline<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst);
} else {
EmitExclusiveWriteMemory<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst);
}
EmitExclusiveWriteMemoryInline<64, &A32::UserCallbacks::MemoryWriteExclusive64>(ctx, inst);
}
void A32EmitX64::EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak::Label* end) {

View file

@ -20,7 +20,7 @@
#include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/devirtualize.h"
#include "dynarmic/backend/x64/emit_x64_memory.h"
#include "dynarmic/backend/x64/exclusive_monitor_friend.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/backend/x64/perf_map.h"
#include "dynarmic/common/spin_lock_x64.h"
#include "dynarmic/interface/exclusive_monitor.h"
@ -330,83 +330,43 @@ void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) {
}
void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveReadMemoryInline<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst);
} else {
EmitExclusiveReadMemory<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst);
}
EmitExclusiveReadMemoryInline<8, &A64::UserCallbacks::MemoryRead8>(ctx, inst);
}
void A64EmitX64::EmitA64ExclusiveReadMemory16(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveReadMemoryInline<16, &A64::UserCallbacks::MemoryRead16>(ctx, inst);
} else {
EmitExclusiveReadMemory<16, &A64::UserCallbacks::MemoryRead16>(ctx, inst);
}
EmitExclusiveReadMemoryInline<16, &A64::UserCallbacks::MemoryRead16>(ctx, inst);
}
void A64EmitX64::EmitA64ExclusiveReadMemory32(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveReadMemoryInline<32, &A64::UserCallbacks::MemoryRead32>(ctx, inst);
} else {
EmitExclusiveReadMemory<32, &A64::UserCallbacks::MemoryRead32>(ctx, inst);
}
EmitExclusiveReadMemoryInline<32, &A64::UserCallbacks::MemoryRead32>(ctx, inst);
}
void A64EmitX64::EmitA64ExclusiveReadMemory64(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveReadMemoryInline<64, &A64::UserCallbacks::MemoryRead64>(ctx, inst);
} else {
EmitExclusiveReadMemory<64, &A64::UserCallbacks::MemoryRead64>(ctx, inst);
}
EmitExclusiveReadMemoryInline<64, &A64::UserCallbacks::MemoryRead64>(ctx, inst);
}
void A64EmitX64::EmitA64ExclusiveReadMemory128(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveReadMemoryInline<128, &A64::UserCallbacks::MemoryRead128>(ctx, inst);
} else {
EmitExclusiveReadMemory<128, &A64::UserCallbacks::MemoryRead128>(ctx, inst);
}
EmitExclusiveReadMemoryInline<128, &A64::UserCallbacks::MemoryRead128>(ctx, inst);
}
void A64EmitX64::EmitA64ExclusiveWriteMemory8(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveWriteMemoryInline<8, &A64::UserCallbacks::MemoryWriteExclusive8>(ctx, inst);
} else {
EmitExclusiveWriteMemory<8, &A64::UserCallbacks::MemoryWriteExclusive8>(ctx, inst);
}
EmitExclusiveWriteMemoryInline<8, &A64::UserCallbacks::MemoryWriteExclusive8>(ctx, inst);
}
void A64EmitX64::EmitA64ExclusiveWriteMemory16(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveWriteMemoryInline<16, &A64::UserCallbacks::MemoryWriteExclusive16>(ctx, inst);
} else {
EmitExclusiveWriteMemory<16, &A64::UserCallbacks::MemoryWriteExclusive16>(ctx, inst);
}
EmitExclusiveWriteMemoryInline<16, &A64::UserCallbacks::MemoryWriteExclusive16>(ctx, inst);
}
void A64EmitX64::EmitA64ExclusiveWriteMemory32(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveWriteMemoryInline<32, &A64::UserCallbacks::MemoryWriteExclusive32>(ctx, inst);
} else {
EmitExclusiveWriteMemory<32, &A64::UserCallbacks::MemoryWriteExclusive32>(ctx, inst);
}
EmitExclusiveWriteMemoryInline<32, &A64::UserCallbacks::MemoryWriteExclusive32>(ctx, inst);
}
void A64EmitX64::EmitA64ExclusiveWriteMemory64(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveWriteMemoryInline<64, &A64::UserCallbacks::MemoryWriteExclusive64>(ctx, inst);
} else {
EmitExclusiveWriteMemory<64, &A64::UserCallbacks::MemoryWriteExclusive64>(ctx, inst);
}
EmitExclusiveWriteMemoryInline<64, &A64::UserCallbacks::MemoryWriteExclusive64>(ctx, inst);
}
void A64EmitX64::EmitA64ExclusiveWriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
if (conf.fastmem_exclusive_access) {
EmitExclusiveWriteMemoryInline<128, &A64::UserCallbacks::MemoryWriteExclusive128>(ctx, inst);
} else {
EmitExclusiveWriteMemory<128, &A64::UserCallbacks::MemoryWriteExclusive128>(ctx, inst);
}
EmitExclusiveWriteMemoryInline<128, &A64::UserCallbacks::MemoryWriteExclusive128>(ctx, inst);
}
void A64EmitX64::EmitCheckMemoryAbort(A64EmitContext&, IR::Inst* inst, Xbyak::Label* end) {

View file

@ -230,12 +230,11 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
if (ordered) {
code.mfence();
}
code.CallLambda(
[](AxxUserConfig& conf, Axx::VAddr vaddr) -> T {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr);
});
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr) -> T {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr);
});
});
code.ZeroExtendFrom(bitsize, code.ABI_RETURN);
} else {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
@ -250,12 +249,11 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
if (ordered) {
code.mfence();
}
code.CallLambda(
[](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& ret) {
ret = conf.global_monitor->ReadAndMark<Vector>(conf.processor_id, vaddr, [&]() -> Vector {
return (conf.callbacks->*callback)(vaddr);
});
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& ret) {
ret = conf.global_monitor->ReadAndMark<Vector>(conf.processor_id, vaddr, [&]() -> Vector {
return (conf.callbacks->*callback)(vaddr);
});
});
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE);
@ -320,11 +318,12 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
template<std::size_t bitsize, auto callback>
void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* inst) {
ASSERT(conf.global_monitor && conf.fastmem_pointer);
if (!exception_handler.SupportsFastmem()) {
ASSERT(conf.global_monitor);
if (!conf.fastmem_exclusive_access || !exception_handler.SupportsFastmem()) {
EmitExclusiveReadMemory<bitsize, callback>(ctx, inst);
return;
}
ASSERT(conf.fastmem_pointer);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
constexpr bool ordered = true;
@ -344,10 +343,10 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
EmitExclusiveLock(code, conf, tmp2.cvt32());
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(tmp, std::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
code.mov(tmp, std::bit_cast<u64>(conf.global_monitor->exclusive_addresses.data() + conf.processor_id));
code.mov(qword[tmp], vaddr);
const auto fastmem_marker = ShouldFastmem(ctx, inst);
@ -381,10 +380,10 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
code.call(wrapped_fn);
}
code.mov(tmp, std::bit_cast<u64>(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id)));
code.mov(tmp, std::bit_cast<u64>(conf.global_monitor->exclusive_addresses.data() + conf.processor_id));
EmitWriteMemoryMov<bitsize>(code, tmp, value_idx, false);
EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32());
EmitExclusiveUnlock(code, conf, tmp2.cvt32());
if constexpr (bitsize == 128) {
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Xmm{value_idx});
@ -397,11 +396,12 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
template<std::size_t bitsize, auto callback>
void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* inst) {
ASSERT(conf.global_monitor && conf.fastmem_pointer);
if (!exception_handler.SupportsFastmem()) {
ASSERT(conf.global_monitor);
if (!conf.fastmem_exclusive_access || !exception_handler.SupportsFastmem()) {
EmitExclusiveWriteMemory<bitsize, callback>(ctx, inst);
return;
}
ASSERT(conf.fastmem_pointer);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
constexpr bool ordered = true;
@ -425,7 +425,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];
EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
EmitExclusiveLock(code, conf, tmp2.cvt32());
SharedLabel end = ctx.GenSharedLabel();
@ -433,14 +433,14 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
code.test(tmp.cvt8(), tmp.cvt8());
code.je(*end, code.T_NEAR);
code.mov(tmp, std::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
code.mov(tmp, std::bit_cast<u64>(conf.global_monitor->exclusive_addresses.data() + conf.processor_id));
code.cmp(qword[tmp], vaddr);
code.jne(*end, code.T_NEAR);
EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
code.mov(tmp, std::bit_cast<u64>(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id)));
code.mov(tmp, std::bit_cast<u64>(conf.global_monitor->exclusive_addresses.data() + conf.processor_id));
if constexpr (bitsize == 128) {
code.mov(rax, qword[tmp + 0]);
@ -519,7 +519,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
}
code.L(*end);
EmitExclusiveUnlock(code, conf, tmp, eax);
EmitExclusiveUnlock(code, conf, eax);
ctx.reg_alloc.DefineValue(code, inst, status);
EmitCheckMemoryAbort(ctx, inst);
}

View file

@ -13,7 +13,7 @@
#include "dynarmic/backend/x64/a32_emit_x64.h"
#include "dynarmic/backend/x64/a64_emit_x64.h"
#include "dynarmic/backend/x64/exclusive_monitor_friend.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/common/spin_lock_x64.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/acc_type.h"
@ -344,43 +344,36 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int
}
template<typename UserConfig>
void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 pointer, Xbyak::Reg32 tmp) {
if (conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) {
return;
void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg32 tmp) {
if (!conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) {
u64 const slp = std::bit_cast<u64>(std::addressof(conf.global_monitor->lock.storage));
EmitSpinLockLock(code, dword[slp], tmp, code.HasHostFeature(HostFeature::WAITPKG));
}
code.mov(pointer, std::bit_cast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG));
}
template<typename UserConfig>
void EmitExclusiveUnlock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 pointer, Xbyak::Reg32 tmp) {
if (conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) {
return;
void EmitExclusiveUnlock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg32 tmp) {
if (!conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) {
u64 const slp = std::bit_cast<u64>(std::addressof(conf.global_monitor->lock.storage));
EmitSpinLockUnlock(code, dword[slp], tmp);
}
code.mov(pointer, std::bit_cast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
EmitSpinLockUnlock(code, pointer, tmp);
}
template<typename UserConfig>
void EmitExclusiveTestAndClear(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 vaddr, Xbyak::Reg64 pointer, Xbyak::Reg64 tmp) {
if (conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) {
return;
}
code.mov(tmp, 0xDEAD'DEAD'DEAD'DEAD);
const size_t processor_count = GetExclusiveMonitorProcessorCount(conf.global_monitor);
for (size_t processor_index = 0; processor_index < processor_count; processor_index++) {
if (processor_index == conf.processor_id) {
continue;
if (!conf.HasOptimization(OptimizationFlag::Unsafe_IgnoreGlobalMonitor)) {
code.mov(tmp, 0xDEAD'DEAD'DEAD'DEAD);
static_assert(ExclusiveMonitor::MAX_NUM_CPU_CORES == 4);
for (size_t i = 0; i < ExclusiveMonitor::MAX_NUM_CPU_CORES; i++) {
if (i != conf.processor_id) {
Xbyak::Label ok;
code.mov(pointer, std::bit_cast<u64>(conf.global_monitor->exclusive_addresses.data() + i));
code.cmp(qword[pointer], vaddr);
code.jne(ok, code.T_NEAR);
code.mov(qword[pointer], tmp);
code.L(ok);
}
}
Xbyak::Label ok;
code.mov(pointer, std::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, processor_index)));
code.cmp(qword[pointer], vaddr);
code.jne(ok, code.T_NEAR);
code.mov(qword[pointer], tmp);
code.L(ok);
}
}

View file

@ -22,20 +22,22 @@ static const auto default_cg_mode = nullptr; //Allow RWE
namespace Dynarmic {
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) {
/// @brief Emits a lock path for a given spinlock
/// @arg ptr Operand must be a dword[ptr]
/// @arg waitpkg Whetever or not the "UMWAIT" instruction can be used
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Address ptr, Xbyak::Reg32 tmp, bool waitpkg) {
// TODO: this is because we lack regalloc - so better to be safe :(
if (waitpkg) {
code.push(Xbyak::util::eax);
code.push(Xbyak::util::ebx);
code.push(Xbyak::util::edx);
}
Xbyak::Label start, loop;
code.jmp(start, code.T_NEAR);
code.L(loop);
if (waitpkg) {
Xbyak::Label start, loop;
code.jmp(start, code.T_NEAR);
code.L(loop);
// TODO: This clobbers EAX and EDX did we tell the regalloc?
// ARM ptr for address-monitoring
code.umonitor(ptr);
code.mov(Xbyak::util::eax, ptr);
code.umonitor(Xbyak::util::eax);
// tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings
// tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings
// edx:eax is implicitly used as a 64-bit deadline timestamp
@ -49,24 +51,31 @@ void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32
code.umwait(Xbyak::util::ebx);
// CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write
// CF == 0 if we exited the wait for any other reason
} else {
code.pause();
}
code.L(start);
code.mov(tmp, 1);
/*code.lock();*/ code.xchg(code.dword[ptr], tmp);
code.test(tmp, tmp);
code.jnz(loop, code.T_NEAR);
if (waitpkg) {
code.L(start);
code.mov(tmp, 1);
/*code.lock();*/ code.xchg(ptr, tmp);
code.test(tmp, tmp);
code.jnz(loop, code.T_NEAR);
code.pop(Xbyak::util::edx);
code.pop(Xbyak::util::ebx);
code.pop(Xbyak::util::eax);
} else {
Xbyak::Label start, loop;
code.jmp(start, code.T_NEAR);
code.L(loop);
code.pause();
code.L(start);
code.mov(tmp, 1);
/*code.lock();*/ code.xchg(ptr, tmp);
code.test(tmp, tmp);
code.jnz(loop, code.T_NEAR);
}
}
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
// ptr operand must be a dword[ptr]
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Address ptr, Xbyak::Reg32 tmp) {
code.xor_(tmp, tmp);
code.xchg(code.dword[ptr], tmp);
code.xchg(ptr, tmp);
code.mfence();
}
@ -89,11 +98,12 @@ void SpinLockImpl::Initialize() noexcept {
Xbyak::Reg64 const ABI_PARAM1 = Backend::X64::HostLocToReg64(Backend::X64::ABI_PARAM1);
code.align();
lock = code.getCurr<void (*)(volatile int*)>();
EmitSpinLockLock(code, ABI_PARAM1, code.eax, false);
EmitSpinLockLock(code, code.dword[ABI_PARAM1], code.eax, false);
code.ret();
code.align();
unlock = code.getCurr<void (*)(volatile int*)>();
EmitSpinLockUnlock(code, ABI_PARAM1, code.eax);
EmitSpinLockUnlock(code, code.dword[ABI_PARAM1], code.eax);
code.ret();
}

View file

@ -12,7 +12,7 @@
namespace Dynarmic {
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg);
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Address ptr, Xbyak::Reg32 tmp, bool waitpkg);
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Address ptr, Xbyak::Reg32 tmp);
} // namespace Dynarmic

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
@ -20,68 +23,71 @@ using Vector = std::array<std::uint64_t, 2>;
class ExclusiveMonitor {
public:
/// @param processor_count Maximum number of processors using this global
/// exclusive monitor. Each processor must have a
/// unique id.
explicit ExclusiveMonitor(size_t processor_count);
size_t GetProcessorCount() const;
explicit ExclusiveMonitor() noexcept {
std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS);
}
/// Marks a region containing [address, address+size) to be exclusive to
/// processor processor_id.
template<typename T, typename Function>
T ReadAndMark(size_t processor_id, VAddr address, Function op) {
/// processor index.
template<typename T, typename F>
[[nodiscard]] inline T ReadAndMark(std::size_t index, VAddr address, F f) {
static_assert(std::is_trivially_copyable_v<T>);
const VAddr masked_address = address & RESERVATION_GRANULE_MASK;
Lock();
exclusive_addresses[processor_id] = masked_address;
const T value = op();
std::memcpy(exclusive_values[processor_id].data(), &value, sizeof(T));
Unlock();
lock.Lock();
exclusive_addresses[index] = masked_address;
T const value = f();
std::memcpy(exclusive_values[index].data(), std::addressof(value), sizeof(T));
lock.Unlock();
return value;
}
/// Checks to see if processor processor_id has exclusive access to the
[[nodiscard]] inline bool CheckAndClear(std::size_t index, VAddr address) {
const VAddr masked_address = address & RESERVATION_GRANULE_MASK;
if (exclusive_addresses[index] != masked_address)
return false;
for (VAddr& other_address : exclusive_addresses)
if (other_address == masked_address)
other_address = INVALID_EXCLUSIVE_ADDRESS;
return true;
}
/// Checks to see if processor index has exclusive access to the
/// specified region. If it does, executes the operation then clears
/// the exclusive state for processors if their exclusive region(s)
/// contain [address, address+size).
template<typename T, typename Function>
bool DoExclusiveOperation(size_t processor_id, VAddr address, Function op) {
template<typename T, typename F>
[[nodiscard]] inline bool DoExclusiveOperation(std::size_t index, VAddr address, F&& f) {
static_assert(std::is_trivially_copyable_v<T>);
if (!CheckAndClear(processor_id, address)) {
return false;
bool result = false;
lock.Lock();
if (CheckAndClear(index, address)) {
T saved_value{};
std::memcpy(std::addressof(saved_value), exclusive_values[index].data(), sizeof(T));
result = f(saved_value);
}
T saved_value;
std::memcpy(&saved_value, exclusive_values[processor_id].data(), sizeof(T));
const bool result = op(saved_value);
Unlock();
lock.Unlock();
return result;
}
/// Unmark everything.
void Clear();
inline void Clear() {
lock.Lock();
std::fill(exclusive_addresses.begin(), exclusive_addresses.end(), INVALID_EXCLUSIVE_ADDRESS);
lock.Unlock();
}
/// Unmark processor id
void ClearProcessor(size_t processor_id);
private:
bool CheckAndClear(size_t processor_id, VAddr address);
void Lock();
void Unlock();
friend volatile int* GetExclusiveMonitorLockPointer(ExclusiveMonitor*);
friend size_t GetExclusiveMonitorProcessorCount(ExclusiveMonitor*);
friend VAddr* GetExclusiveMonitorAddressPointer(ExclusiveMonitor*, size_t index);
friend Vector* GetExclusiveMonitorValuePointer(ExclusiveMonitor*, size_t index);
inline void ClearProcessor(size_t index) {
lock.Lock();
exclusive_addresses[index] = INVALID_EXCLUSIVE_ADDRESS;
lock.Unlock();
}
static constexpr VAddr RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFFFull;
static constexpr VAddr INVALID_EXCLUSIVE_ADDRESS = 0xDEAD'DEAD'DEAD'DEADull;
static constexpr size_t MAX_NUM_CPU_CORES = 4; // Sync with src/core/hardware_properties
boost::container::static_vector<VAddr, MAX_NUM_CPU_CORES> exclusive_addresses;
boost::container::static_vector<Vector, MAX_NUM_CPU_CORES> exclusive_values;
std::array<VAddr, MAX_NUM_CPU_CORES> exclusive_addresses;
std::array<Vector, MAX_NUM_CPU_CORES> exclusive_values;
SpinLock lock;
};