mirror of
https://github.com/4jcraft/4jcraft.git
synced 2026-04-23 19:13:36 +00:00
261 lines
8.1 KiB
C++
261 lines
8.1 KiB
C++
#include "../Platform/stdafx.h"
|
|
#include "FrameProfiler.h"
|
|
|
|
#ifdef ENABLE_FRAME_PROFILER
|
|
|
|
#include <array>
|
|
#include <atomic>
|
|
#include <chrono>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <string_view>
|
|
|
|
#if defined(_MSC_VER)
|
|
#define FRAME_PROFILER_NOINLINE __declspec(noinline)
|
|
#elif defined(__GNUC__) || defined(__clang__)
|
|
#define FRAME_PROFILER_NOINLINE __attribute__((noinline))
|
|
#else
|
|
#define FRAME_PROFILER_NOINLINE
|
|
#endif
|
|
|
|
namespace {
|
|
|
|
using FrameProfilerClock = std::chrono::steady_clock;
|
|
using Bucket = FrameProfiler::Bucket;
|
|
constexpr std::uint64_t kNsPerMs = 1000ULL * 1000ULL;
|
|
constexpr std::uint64_t kReportIntervalNs = 1000ULL * 1000ULL * 1000ULL;
|
|
constexpr std::size_t kBucketCount = FrameProfiler::BucketCount();
|
|
constexpr auto kFalseTokens = std::to_array<std::string_view>({
|
|
"0", "false", "False", "FALSE", "no", "No", "NO", "off", "Off", "OFF",
|
|
});
|
|
constexpr std::array<FrameProfiler::BucketDescriptor, kBucketCount>
|
|
kBucketDescriptors = {{
|
|
{Bucket::Frame, "frame"},
|
|
{Bucket::World, "world"},
|
|
{Bucket::Terrain, "terrain"},
|
|
{Bucket::ChunkCull, "chunkCull"},
|
|
{Bucket::ChunkCollect, "chunkCollect"},
|
|
{Bucket::ChunkPlayback, "chunkPlayback"},
|
|
{Bucket::ChunkDirtyScan, "chunkDirtyScan"},
|
|
{Bucket::ChunkRebuildSchedule, "chunkRebuildSchedule"},
|
|
{Bucket::ChunkRebuildBody, "chunkRebuildBody"},
|
|
{Bucket::Entity, "entities"},
|
|
{Bucket::Particle, "particles"},
|
|
{Bucket::WeatherSky, "weather"},
|
|
{Bucket::UIHud, "ui"},
|
|
{Bucket::Lightmap, "lightmap"},
|
|
}};
|
|
|
|
struct BucketTotals {
|
|
std::uint64_t totalNs{};
|
|
std::uint64_t maxNs{};
|
|
std::uint64_t calls{};
|
|
|
|
void Record(std::uint64_t elapsedNs) noexcept {
|
|
totalNs += elapsedNs;
|
|
++calls;
|
|
if (elapsedNs > maxNs) maxNs = elapsedNs;
|
|
}
|
|
|
|
void Merge(const BucketTotals& other) noexcept {
|
|
totalNs += other.totalNs;
|
|
calls += other.calls;
|
|
if (other.maxNs > maxNs) maxNs = other.maxNs;
|
|
}
|
|
};
|
|
|
|
struct AtomicBucketTotals {
|
|
std::atomic<std::uint64_t> totalNs{0};
|
|
std::atomic<std::uint64_t> maxNs{0};
|
|
std::atomic<std::uint64_t> calls{0};
|
|
};
|
|
|
|
struct ProfilerState {
|
|
std::array<AtomicBucketTotals, kBucketCount> workerBuckets{};
|
|
};
|
|
|
|
struct ThreadState {
|
|
std::uint32_t frameScopeDepth{};
|
|
std::uint64_t windowStartNs{};
|
|
std::array<BucketTotals, kBucketCount> localBuckets{};
|
|
};
|
|
|
|
constinit ProfilerState g_profilerState{};
|
|
constinit thread_local ThreadState t_threadState{};
|
|
|
|
static_assert(kBucketDescriptors.size() == kBucketCount);
|
|
|
|
[[nodiscard]] inline std::uint64_t nowNs() noexcept {
|
|
return static_cast<std::uint64_t>(
|
|
std::chrono::duration_cast<std::chrono::nanoseconds>(
|
|
FrameProfilerClock::now().time_since_epoch())
|
|
.count());
|
|
}
|
|
|
|
[[nodiscard]] constexpr double nsToMs(std::uint64_t ns) noexcept {
|
|
return static_cast<double>(ns) / static_cast<double>(kNsPerMs);
|
|
}
|
|
|
|
[[nodiscard]] constexpr bool envSaysDisabled(
|
|
std::string_view value) noexcept {
|
|
if (value.empty()) return false;
|
|
|
|
for (std::string_view falseToken : kFalseTokens) {
|
|
if (value == falseToken) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
inline void updateAtomicMax(std::atomic<std::uint64_t>& value,
|
|
std::uint64_t candidate) noexcept {
|
|
std::uint64_t current = value.load(std::memory_order_relaxed);
|
|
while (current < candidate &&
|
|
!value.compare_exchange_weak(current, candidate,
|
|
std::memory_order_relaxed,
|
|
std::memory_order_relaxed)) {
|
|
}
|
|
}
|
|
|
|
inline void recordWorkerBucket(Bucket bucket, std::uint64_t elapsedNs) noexcept {
|
|
AtomicBucketTotals& state =
|
|
g_profilerState.workerBuckets[FrameProfiler::BucketIndex(bucket)];
|
|
state.totalNs.fetch_add(elapsedNs, std::memory_order_relaxed);
|
|
state.calls.fetch_add(1, std::memory_order_relaxed);
|
|
updateAtomicMax(state.maxNs, elapsedNs);
|
|
}
|
|
|
|
[[nodiscard]] inline bool isFrameThread() noexcept {
|
|
return t_threadState.frameScopeDepth != 0;
|
|
}
|
|
|
|
FRAME_PROFILER_NOINLINE
|
|
[[nodiscard]] bool computeEnabled() noexcept {
|
|
const char* const envValue = std::getenv("C4J_FRAME_PROFILER");
|
|
if (envValue == nullptr) return true;
|
|
return !envSaysDisabled(envValue);
|
|
}
|
|
|
|
FRAME_PROFILER_NOINLINE void emitWindowReport(
|
|
const std::array<BucketTotals, kBucketCount>& buckets) noexcept {
|
|
const std::uint64_t frames =
|
|
buckets[FrameProfiler::BucketIndex(Bucket::Frame)].calls;
|
|
if (frames == 0) return;
|
|
|
|
std::fprintf(stderr,
|
|
"[frame-prof] avg/frame(ms) frames=%llu",
|
|
static_cast<unsigned long long>(frames));
|
|
for (const auto& descriptor : kBucketDescriptors) {
|
|
const BucketTotals& bucket =
|
|
buckets[FrameProfiler::BucketIndex(descriptor.bucket)];
|
|
const std::string_view label = descriptor.label;
|
|
std::fprintf(stderr, " %.*s=%.2f", static_cast<int>(label.size()),
|
|
label.data(), nsToMs(bucket.totalNs) / frames);
|
|
}
|
|
std::fputc('\n', stderr);
|
|
|
|
std::fputs("[frame-prof] max(ms)/calls", stderr);
|
|
for (const auto& descriptor : kBucketDescriptors) {
|
|
const BucketTotals& bucket =
|
|
buckets[FrameProfiler::BucketIndex(descriptor.bucket)];
|
|
const std::string_view label = descriptor.label;
|
|
std::fprintf(stderr, " %.*s=%.2f/%llu",
|
|
static_cast<int>(label.size()), label.data(),
|
|
nsToMs(bucket.maxNs),
|
|
static_cast<unsigned long long>(bucket.calls));
|
|
}
|
|
std::fputc('\n', stderr);
|
|
std::fflush(stderr);
|
|
}
|
|
|
|
[[nodiscard]] std::array<BucketTotals, kBucketCount> snapshotAndResetWorkerBuckets()
|
|
noexcept {
|
|
std::array<BucketTotals, kBucketCount> snapshot = {};
|
|
for (std::size_t i = 0; i < kBucketCount; ++i) {
|
|
AtomicBucketTotals& workerBucket = g_profilerState.workerBuckets[i];
|
|
snapshot[i].totalNs =
|
|
workerBucket.totalNs.exchange(0, std::memory_order_relaxed);
|
|
snapshot[i].maxNs =
|
|
workerBucket.maxNs.exchange(0, std::memory_order_relaxed);
|
|
snapshot[i].calls =
|
|
workerBucket.calls.exchange(0, std::memory_order_relaxed);
|
|
}
|
|
return snapshot;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
bool FrameProfiler::IsEnabled() noexcept {
|
|
static const bool enabled = computeEnabled();
|
|
return enabled;
|
|
}
|
|
|
|
void FrameProfiler::Record(Bucket bucket, std::uint64_t elapsedNs) noexcept {
|
|
if (isFrameThread()) {
|
|
t_threadState.localBuckets[BucketIndex(bucket)].Record(elapsedNs);
|
|
return;
|
|
}
|
|
|
|
recordWorkerBucket(bucket, elapsedNs);
|
|
}
|
|
|
|
void FrameProfiler::EndFrame(std::uint64_t elapsedNs) noexcept {
|
|
Record(Bucket::Frame, elapsedNs);
|
|
|
|
ThreadState& threadState = t_threadState;
|
|
const std::uint64_t now = nowNs();
|
|
|
|
if (threadState.windowStartNs == 0) {
|
|
threadState.windowStartNs = now;
|
|
return;
|
|
}
|
|
|
|
if ((now - threadState.windowStartNs) < kReportIntervalNs) return;
|
|
|
|
std::array<BucketTotals, kBucketCount> combined = threadState.localBuckets;
|
|
const auto workerSnapshot = snapshotAndResetWorkerBuckets();
|
|
|
|
for (std::size_t i = 0; i < kBucketCount; ++i) {
|
|
combined[i].Merge(workerSnapshot[i]);
|
|
}
|
|
|
|
emitWindowReport(combined);
|
|
|
|
threadState.windowStartNs = now;
|
|
threadState.localBuckets = {};
|
|
}
|
|
|
|
FrameProfiler::Scope::Scope(Bucket bucket) noexcept
|
|
: m_startNs(0), m_bucket(bucket), m_enabled(FrameProfiler::IsEnabled()) {
|
|
if (m_enabled) m_startNs = nowNs();
|
|
}
|
|
|
|
FrameProfiler::Scope::~Scope() noexcept {
|
|
if (!m_enabled) return;
|
|
FrameProfiler::Record(m_bucket, nowNs() - m_startNs);
|
|
}
|
|
|
|
FrameProfiler::FrameScope::FrameScope() noexcept
|
|
: m_startNs(0), m_enabled(false) {
|
|
if (!FrameProfiler::IsEnabled()) return;
|
|
|
|
m_enabled = (t_threadState.frameScopeDepth++ == 0);
|
|
if (m_enabled) m_startNs = nowNs();
|
|
}
|
|
|
|
FrameProfiler::FrameScope::~FrameScope() noexcept {
|
|
if (!m_enabled) {
|
|
if (t_threadState.frameScopeDepth > 0) {
|
|
--t_threadState.frameScopeDepth;
|
|
}
|
|
return;
|
|
}
|
|
|
|
FrameProfiler::EndFrame(nowNs() - m_startNs);
|
|
|
|
if (t_threadState.frameScopeDepth > 0) {
|
|
--t_threadState.frameScopeDepth;
|
|
}
|
|
}
|
|
|
|
#endif
|