feat(profiling): add low-overhead in-engine frame profiler

This commit is contained in:
MatthewBeshay 2026-03-29 12:10:19 +11:00 committed by Tropical
parent b5b4fc34fe
commit 47b7d90835
6 changed files with 816 additions and 374 deletions

View file

@ -50,6 +50,7 @@
#include "../Textures/Packs/TexturePackRepository.h"
#include "../Textures/Packs/TexturePack.h"
#include "../Textures/TextureAtlas.h"
#include "../Utils/FrameProfiler.h"
bool GameRenderer::anaglyph3d = false;
int GameRenderer::anaglyphPass = 0;
@ -776,6 +777,7 @@ void GameRenderer::renderItemInHand(float a, int eye) {
// 4J - change brought forward from 1.8.2
void GameRenderer::turnOffLightLayer(double alpha) { // 4J - TODO
FRAME_PROFILE_SCOPE(Lightmap);
#ifdef __linux__
if (SharedConstants::TEXTURE_LIGHTING) {
LinuxLogStubLightmapProbe();
@ -807,6 +809,7 @@ void GameRenderer::turnOffLightLayer(double alpha) { // 4J - TODO
void GameRenderer::turnOnLightLayer(
double alpha,
bool scaleLight) { // 4jcraft: added scaleLight for entity lighting
FRAME_PROFILE_SCOPE(Lightmap);
#ifdef __linux__
if (!SharedConstants::TEXTURE_LIGHTING) return;
@ -881,6 +884,7 @@ void GameRenderer::tickLightTexture() {
}
void GameRenderer::updateLightTexture(float a) {
FRAME_PROFILE_SCOPE(Lightmap);
// 4J-JEV: Now doing light textures on PER PLAYER basis.
// 4J - we *had* added separate light textures for all dimensions, and this
// loop to update them all here
@ -1025,6 +1029,8 @@ int GameRenderer::getLightTexture(int iPad, Level* level) {
}
void GameRenderer::render(float a, bool bFirst) {
FRAME_PROFILE_FRAME_SCOPE();
if (_updateLightTexture && bFirst) updateLightTexture(a);
if (Display::isActive()) {
lastActiveTime = System::currentTimeMillis();
@ -1083,6 +1089,7 @@ void GameRenderer::render(float a, bool bFirst) {
lastNsTime = System::nanoTime();
if (!mc->options->hideGui || mc->screen != NULL) {
FRAME_PROFILE_SCOPE(UIHud);
mc->gui->render(a, mc->screen != NULL, xMouse, yMouse);
}
} else {
@ -1097,6 +1104,7 @@ void GameRenderer::render(float a, bool bFirst) {
}
if (mc->screen != NULL) {
FRAME_PROFILE_SCOPE(UIHud);
glClear(GL_DEPTH_BUFFER_BIT);
mc->screen->render(xMouse, yMouse, a);
if (mc->screen != NULL && mc->screen->particles != NULL)
@ -1244,6 +1252,8 @@ void GameRenderer::DisableUpdateThread() {
}
void GameRenderer::renderLevel(float a, int64_t until) {
FRAME_PROFILE_SCOPE(World);
// if (updateLightTexture) updateLightTexture(); // 4J - TODO -
// Java 1.0.1 has this line enabled, should check why - don't want to put it
// in now in case it breaks split-screen
@ -1296,9 +1306,12 @@ void GameRenderer::renderLevel(float a, int64_t until) {
Frustum::getFrustum();
if (mc->options->viewDistance < 2) {
setupFog(-1, a);
levelRenderer->renderSky(a);
if (mc->skins->getSelected()->getId() == 1026)
levelRenderer->renderHaloRing(a);
{
FRAME_PROFILE_SCOPE(WeatherSky);
levelRenderer->renderSky(a);
if (mc->skins->getSelected()->getId() == 1026)
levelRenderer->renderHaloRing(a);
}
}
// 4jcraft: needs to be enabled for proper transparent texturing on low
// render dists this was done in renderSky() for the far and normal
@ -1320,7 +1333,10 @@ void GameRenderer::renderLevel(float a, int64_t until) {
MemSect(0);
frustum->prepare(xOff, yOff, zOff);
mc->levelRenderer->cull(frustum, a);
{
FRAME_PROFILE_SCOPE(ChunkCull);
mc->levelRenderer->cull(frustum, a);
}
PIXEndNamedEvent();
#ifndef MULTITHREAD_ENABLE
@ -1347,6 +1363,7 @@ void GameRenderer::renderLevel(float a, int64_t until) {
#endif
if (cameraEntity->y < Level::genDepth) {
FRAME_PROFILE_SCOPE(WeatherSky);
prepareAndRenderClouds(levelRenderer, a);
}
Frustum::getFrustum(); // 4J added - re-calculate frustum as rendering
@ -1385,7 +1402,10 @@ void GameRenderer::renderLevel(float a, int64_t until) {
cameraPos.x = cameraPosTemp.x;
cameraPos.y = cameraPosTemp.y;
cameraPos.z = cameraPosTemp.z;
levelRenderer->renderEntities(&cameraPos, frustum, a);
{
FRAME_PROFILE_SCOPE(Entity);
levelRenderer->renderEntities(&cameraPos, frustum, a);
}
#ifdef __PSVITA__
// AP - make sure we're using the Alpha cut out effect for particles
glEnable(GL_ALPHA_TEST);
@ -1393,12 +1413,18 @@ void GameRenderer::renderLevel(float a, int64_t until) {
PIXEndNamedEvent();
PIXBeginNamedEvent(0, "Particle render");
turnOnLightLayer(a); // 4J - brought forward from 1.8.2
particleEngine->renderLit(cameraEntity, a,
ParticleEngine::OPAQUE_LIST);
{
FRAME_PROFILE_SCOPE(Particle);
particleEngine->renderLit(cameraEntity, a,
ParticleEngine::OPAQUE_LIST);
}
Lighting::turnOff();
setupFog(0, a);
particleEngine->render(cameraEntity, a,
ParticleEngine::OPAQUE_LIST);
{
FRAME_PROFILE_SCOPE(Particle);
particleEngine->render(cameraEntity, a,
ParticleEngine::OPAQUE_LIST);
}
PIXEndNamedEvent();
turnOffLightLayer(a); // 4J - brought forward from 1.8.2
@ -1467,12 +1493,18 @@ void GameRenderer::renderLevel(float a, int64_t until) {
PIXBeginNamedEvent(0, "Particle render (translucent)");
Lighting::turnOn();
turnOnLightLayer(a); // 4J - brought forward from 1.8.2
particleEngine->renderLit(cameraEntity, a,
ParticleEngine::TRANSLUCENT_LIST);
{
FRAME_PROFILE_SCOPE(Particle);
particleEngine->renderLit(cameraEntity, a,
ParticleEngine::TRANSLUCENT_LIST);
}
Lighting::turnOff();
setupFog(0, a);
particleEngine->render(cameraEntity, a,
ParticleEngine::TRANSLUCENT_LIST);
{
FRAME_PROFILE_SCOPE(Particle);
particleEngine->render(cameraEntity, a,
ParticleEngine::TRANSLUCENT_LIST);
}
PIXEndNamedEvent();
turnOffLightLayer(a); // 4J - brought forward from 1.8.2
////////////////////////// End of 4J added section
@ -1503,12 +1535,16 @@ void GameRenderer::renderLevel(float a, int64_t until) {
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE);
levelRenderer->renderDestroyAnimation(
Tesselator::getInstance(),
std::dynamic_pointer_cast<Player>(cameraEntity), a);
{
FRAME_PROFILE_SCOPE(WeatherSky);
levelRenderer->renderDestroyAnimation(
Tesselator::getInstance(),
std::dynamic_pointer_cast<Player>(cameraEntity), a);
}
glDisable(GL_BLEND);
if (cameraEntity->y >= Level::genDepth) {
FRAME_PROFILE_SCOPE(WeatherSky);
prepareAndRenderClouds(levelRenderer, a);
}
@ -1517,7 +1553,10 @@ void GameRenderer::renderLevel(float a, int64_t until) {
setupFog(0, a);
glEnable(GL_FOG);
PIXBeginNamedEvent(0, "Rendering snow and rain");
renderSnowAndRain(a);
{
FRAME_PROFILE_SCOPE(WeatherSky);
renderSnowAndRain(a);
}
PIXEndNamedEvent();
glDisable(GL_FOG);

View file

@ -63,6 +63,7 @@
#include "../Level/MultiPlayerLevel.h"
#include "../../Minecraft.World/Util/SoundTypes.h"
#include "FrustumCuller.h"
#include "../Utils/FrameProfiler.h"
// #define DISABLE_SPU_CODE
@ -736,6 +737,8 @@ void LevelRenderer::resortChunks(int xc, int yc, int zc) {
int LevelRenderer::render(std::shared_ptr<LivingEntity> player, int layer,
double alpha, bool updateChunks) {
FRAME_PROFILE_SCOPE(Terrain);
int playerIndex = mc->player->GetXboxPad();
// 4J - added - if the number of players has changed, we need to rebuild
@ -851,49 +854,55 @@ int LevelRenderer::renderChunks(int from, int to, int layer, double alpha) {
if (sortList.capacity() < (size_t)chunks[playerIndex].length) {
sortList.reserve(chunks[playerIndex].length);
}
for (int i = 0; i < chunks[playerIndex].length; i++, pClipChunk++) {
if (!pClipChunk->visible)
continue; // This will be set if the chunk isn't visible, or isn't
// compiled, or has both empty flags set
if (pClipChunk->globalIdx == -1)
continue; // Not sure if we should ever encounter this... TODO
// check
if ((globalChunkFlags[pClipChunk->globalIdx] & emptyFlag) == emptyFlag)
continue;
{
FRAME_PROFILE_SCOPE(ChunkCollect);
for (int i = 0; i < chunks[playerIndex].length; i++, pClipChunk++) {
if (!pClipChunk->visible)
continue; // This will be set if the chunk isn't visible, or
// isn't compiled, or has both empty flags set
if (pClipChunk->globalIdx == -1)
continue; // Not sure if we should ever encounter this...
// TODO check
if ((globalChunkFlags[pClipChunk->globalIdx] & emptyFlag) ==
emptyFlag)
continue;
sortList.push_back(pClipChunk);
}
// he sorts me till i
std::sort(sortList.begin(), sortList.end(),
[xOff, yOff, zOff, layer](ClipChunk* a, ClipChunk* b) {
float dxA = (float)((a->chunk->x + 8.0f) - xOff);
float dyA = (float)((a->chunk->y + 8.0f) - yOff);
float dzA = (float)((a->chunk->z + 8.0f) - zOff);
float distSqA = dxA * dxA + dyA * dyA + dzA * dzA;
float dxB = (float)((b->chunk->x + 8.0f) - xOff);
float dyB = (float)((b->chunk->y + 8.0f) - yOff);
float dzB = (float)((b->chunk->z + 8.0f) - zOff);
float distSqB = dxB * dxB + dyB * dyB + dzB * dzB;
if (layer == 0)
return distSqA < distSqB; // Opaque: Closest first
return distSqA > distSqB; // Transparent: Furthest first
});
for (ClipChunk* chunk : sortList) {
// ugly occluder
float dx = (chunk->chunk->x + 8.0f) - (float)xOff;
float dy = (chunk->chunk->y + 8.0f) - (float)yOff;
float dz = (chunk->chunk->z + 8.0f) - (float)zOff;
bool isVeryNear = (dx * dx + dy * dy + dz * dz) < (16.0f * 16.0f);
if (!isVeryNear && layer == 0) {
// todo: occlusion flag
sortList.push_back(pClipChunk);
}
// he sorts me till i
std::sort(sortList.begin(), sortList.end(),
[xOff, yOff, zOff, layer](ClipChunk* a, ClipChunk* b) {
float dxA = (float)((a->chunk->x + 8.0f) - xOff);
float dyA = (float)((a->chunk->y + 8.0f) - yOff);
float dzA = (float)((a->chunk->z + 8.0f) - zOff);
float distSqA = dxA * dxA + dyA * dyA + dzA * dzA;
int list = chunk->globalIdx * 2 + layer;
list += chunkLists;
float dxB = (float)((b->chunk->x + 8.0f) - xOff);
float dyB = (float)((b->chunk->y + 8.0f) - yOff);
float dzB = (float)((b->chunk->z + 8.0f) - zOff);
float distSqB = dxB * dxB + dyB * dyB + dzB * dzB;
if (layer == 0)
return distSqA < distSqB; // Opaque: Closest first
return distSqA > distSqB; // Transparent: Furthest
// first
});
}
{
FRAME_PROFILE_SCOPE(ChunkPlayback);
for (ClipChunk* chunk : sortList) {
// ugly occluder
float dx = (chunk->chunk->x + 8.0f) - (float)xOff;
float dy = (chunk->chunk->y + 8.0f) - (float)yOff;
float dz = (chunk->chunk->z + 8.0f) - (float)zOff;
bool isVeryNear = (dx * dx + dy * dy + dz * dz) < (16.0f * 16.0f);
if (!isVeryNear && layer == 0) {
// todo: occlusion flag
}
int list = chunk->globalIdx * 2 + layer;
list += chunkLists;
// 4jcraft: replaced glPushMatrix/glTranslatef/glPopMatrix per chunk
// no more full MVP upload per chunk, can also be bkwards compat
@ -901,12 +910,13 @@ int LevelRenderer::renderChunks(int from, int to, int layer, double alpha) {
(float)chunk->chunk->y,
(float)chunk->chunk->z);
if (RenderManager.CBuffCall(list, first)) {
first = false;
if (RenderManager.CBuffCall(list, first)) {
first = false;
}
count++;
}
count++;
RenderManager.SetChunkOffset(0.f, 0.f, 0.f);
}
RenderManager.SetChunkOffset(0.f, 0.f, 0.f);
#ifdef __PSVITA__
// AP - alpha cut out is expensive on vita. Now we render all the alpha cut
@ -917,25 +927,29 @@ int LevelRenderer::renderChunks(int from, int to, int layer, double alpha) {
first = true;
pClipChunk = chunks[playerIndex].data;
emptyFlag = LevelRenderer::CHUNK_FLAG_EMPTY0 << layer;
for (int i = 0; i < chunks[playerIndex].length; i++, pClipChunk++) {
if (!pClipChunk->visible)
continue; // This will be set if the chunk isn't visible, or isn't
// compiled, or has both empty flags set
if (pClipChunk->globalIdx == -1)
continue; // Not sure if we should ever encounter this... TODO
// check
if ((globalChunkFlags[pClipChunk->globalIdx] & emptyFlag) == emptyFlag)
continue; // Check that this particular layer isn't empty
if (!(globalChunkFlags[pClipChunk->globalIdx] &
LevelRenderer::CHUNK_FLAG_CUT_OUT))
continue; // Does this chunk contain any cut out geometry
{
FRAME_PROFILE_SCOPE(ChunkPlayback);
for (int i = 0; i < chunks[playerIndex].length; i++, pClipChunk++) {
if (!pClipChunk->visible)
continue; // This will be set if the chunk isn't visible, or
// isn't compiled, or has both empty flags set
if (pClipChunk->globalIdx == -1)
continue; // Not sure if we should ever encounter this... TODO
// check
if ((globalChunkFlags[pClipChunk->globalIdx] & emptyFlag) ==
emptyFlag)
continue; // Check that this particular layer isn't empty
if (!(globalChunkFlags[pClipChunk->globalIdx] &
LevelRenderer::CHUNK_FLAG_CUT_OUT))
continue; // Does this chunk contain any cut out geometry
// List can be calculated directly from the chunk's global idex
int list = pClipChunk->globalIdx * 2 + layer;
list += chunkLists;
// List can be calculated directly from the chunk's global idex
int list = pClipChunk->globalIdx * 2 + layer;
list += chunkLists;
if (RenderManager.CBuffCallCutOut(list, first)) {
first = false;
if (RenderManager.CBuffCallCutOut(list, first)) {
first = false;
}
}
}
RenderManager.StateSetForceLOD(-1); // AP - back to normal mipmapping
@ -1959,312 +1973,319 @@ bool LevelRenderer::updateDirtyChunks() {
// Set a flag if we should only rebuild existing chunks, not create anything
// new
unsigned int memAlloc = RenderManager.CBuffSize(-1);
/*
static int throttle = 0;
if( ( throttle % 100 ) == 0 )
{
app.DebugPrintf("CBuffSize: %d\n",memAlloc/(1024*1024));
}
throttle++;
*/
PIXAddNamedCounter(((float)memAlloc) / (1024.0f * 1024.0f),
"Command buffer allocations");
bool onlyRebuild = (memAlloc >= MAX_COMMANDBUFFER_ALLOCATIONS);
EnterCriticalSection(&m_csDirtyChunks);
FRAME_PROFILE_SCOPE(ChunkDirtyScan);
// Move any dirty chunks stored in the lock free stack into global flags
int index = 0;
do {
// See comment on dirtyChunksLockFreeStack.Push() regarding details of
// this casting/subtracting -2.
index = (size_t)dirtyChunksLockFreeStack.Pop();
#ifdef _CRITICAL_CHUNKS
int oldIndex = index;
index &= 0x0fffffff; // remove the top bit that marked the chunk as
// non-critical
#endif
if (index == 1)
dirtyChunkPresent =
true; // 1 is a special value passed to let this thread know
// that a chunk which isn't on this stack has been set to
// dirty
else if (index > 1) {
int i2 = index - 2;
if (i2 >= DIMENSION_OFFSETS[2]) {
i2 -= DIMENSION_OFFSETS[2];
int y2 = i2 & (CHUNK_Y_COUNT - 1);
i2 /= CHUNK_Y_COUNT;
int z2 = i2 / MAX_LEVEL_RENDER_SIZE[2];
int x2 = i2 - z2 * MAX_LEVEL_RENDER_SIZE[2];
x2 -= MAX_LEVEL_RENDER_SIZE[2] / 2;
z2 -= MAX_LEVEL_RENDER_SIZE[2] / 2;
}
setGlobalChunkFlag(index - 2, CHUNK_FLAG_DIRTY);
#ifdef _CRITICAL_CHUNKS
if (!(oldIndex & 0x10000000)) // was this chunk not marked as
// non-critical. Ugh double negatives
{
setGlobalChunkFlag(index - 2, CHUNK_FLAG_CRITICAL);
}
#endif
dirtyChunkPresent = true;
unsigned int memAlloc = RenderManager.CBuffSize(-1);
/*
static int throttle = 0;
if( ( throttle % 100 ) == 0 )
{
app.DebugPrintf("CBuffSize: %d\n",memAlloc/(1024*1024));
}
} while (index);
throttle++;
*/
PIXAddNamedCounter(((float)memAlloc) / (1024.0f * 1024.0f),
"Command buffer allocations");
bool onlyRebuild = (memAlloc >= MAX_COMMANDBUFFER_ALLOCATIONS);
EnterCriticalSection(&m_csDirtyChunks);
// Only bother searching round all the chunks if we have some dirty chunk(s)
if (dirtyChunkPresent) {
lastDirtyChunkFound = System::currentTimeMillis();
PIXBeginNamedEvent(0, "Finding nearest chunk\n");
// Move any dirty chunks stored in the lock free stack into global flags
int index = 0;
do {
// See comment on dirtyChunksLockFreeStack.Push() regarding details of
// this casting/subtracting -2.
index = (size_t)dirtyChunksLockFreeStack.Pop();
#ifdef _CRITICAL_CHUNKS
int oldIndex = index;
index &= 0x0fffffff; // remove the top bit that marked the chunk as
// non-critical
#endif
if (index == 1)
dirtyChunkPresent =
true; // 1 is a special value passed to let this thread know
// that a chunk which isn't on this stack has been set to
// dirty
else if (index > 1) {
int i2 = index - 2;
if (i2 >= DIMENSION_OFFSETS[2]) {
i2 -= DIMENSION_OFFSETS[2];
int y2 = i2 & (CHUNK_Y_COUNT - 1);
i2 /= CHUNK_Y_COUNT;
int z2 = i2 / MAX_LEVEL_RENDER_SIZE[2];
int x2 = i2 - z2 * MAX_LEVEL_RENDER_SIZE[2];
x2 -= MAX_LEVEL_RENDER_SIZE[2] / 2;
z2 -= MAX_LEVEL_RENDER_SIZE[2] / 2;
}
setGlobalChunkFlag(index - 2, CHUNK_FLAG_DIRTY);
#ifdef _CRITICAL_CHUNKS
if (!(oldIndex & 0x10000000)) // was this chunk not marked as
// non-critical. Ugh double negatives
{
setGlobalChunkFlag(index - 2, CHUNK_FLAG_CRITICAL);
}
#endif
dirtyChunkPresent = true;
}
} while (index);
// Only bother searching round all the chunks if we have some dirty chunk(s)
if (dirtyChunkPresent) {
lastDirtyChunkFound = System::currentTimeMillis();
PIXBeginNamedEvent(0, "Finding nearest chunk\n");
#if defined __PS3__ && !defined DISABLE_SPU_CODE
// find the nearest chunk with a spu task, copy all the data over here
// for uploading to SPU
g_findNearestChunkDataIn.numGlobalChunks = getGlobalChunkCount();
g_findNearestChunkDataIn.pGlobalChunkFlags = globalChunkFlags;
g_findNearestChunkDataIn.onlyRebuild = onlyRebuild;
g_findNearestChunkDataIn.lowerOffset =
(int)&((LevelChunk*)0)
->lowerBlocks; // dodgy bit of class structure poking, as we
// don't want to try and get the whole of
// LevelChunk copmpiling on SPU
g_findNearestChunkDataIn.upperOffset =
(int)&((LevelChunk*)0)->upperBlocks;
g_findNearestChunkDataIn.xChunks = xChunks;
g_findNearestChunkDataIn.yChunks = yChunks;
g_findNearestChunkDataIn.zChunks = zChunks;
// find the nearest chunk with a spu task, copy all the data over here
// for uploading to SPU
g_findNearestChunkDataIn.numGlobalChunks = getGlobalChunkCount();
g_findNearestChunkDataIn.pGlobalChunkFlags = globalChunkFlags;
g_findNearestChunkDataIn.onlyRebuild = onlyRebuild;
g_findNearestChunkDataIn.lowerOffset =
(int)&((LevelChunk*)0)
->lowerBlocks; // dodgy bit of class structure poking, as we
// don't want to try and get the whole of
// LevelChunk copmpiling on SPU
g_findNearestChunkDataIn.upperOffset =
(int)&((LevelChunk*)0)->upperBlocks;
g_findNearestChunkDataIn.xChunks = xChunks;
g_findNearestChunkDataIn.yChunks = yChunks;
g_findNearestChunkDataIn.zChunks = zChunks;
for (int i = 0; i < 4; i++) {
g_findNearestChunkDataIn.chunks[i] =
(LevelRenderer_FindNearestChunk_DataIn::ClipChunk*)chunks[i]
.data;
g_findNearestChunkDataIn.chunkLengths[i] = chunks[i].length;
g_findNearestChunkDataIn.level[i] = level[i];
g_findNearestChunkDataIn.playerData[i].bValid =
mc->localplayers[i] != NULL;
if (mc->localplayers[i] != NULL) {
g_findNearestChunkDataIn.playerData[i].x =
mc->localplayers[i]->x;
g_findNearestChunkDataIn.playerData[i].y =
mc->localplayers[i]->y;
g_findNearestChunkDataIn.playerData[i].z =
mc->localplayers[i]->z;
for (int i = 0; i < 4; i++) {
g_findNearestChunkDataIn.chunks[i] =
(LevelRenderer_FindNearestChunk_DataIn::ClipChunk*)chunks[i]
.data;
g_findNearestChunkDataIn.chunkLengths[i] = chunks[i].length;
g_findNearestChunkDataIn.level[i] = level[i];
g_findNearestChunkDataIn.playerData[i].bValid =
mc->localplayers[i] != NULL;
if (mc->localplayers[i] != NULL) {
g_findNearestChunkDataIn.playerData[i].x =
mc->localplayers[i]->x;
g_findNearestChunkDataIn.playerData[i].y =
mc->localplayers[i]->y;
g_findNearestChunkDataIn.playerData[i].z =
mc->localplayers[i]->z;
}
if (level[i] != NULL) {
g_findNearestChunkDataIn.multiplayerChunkCache[i].XZOFFSET =
((MultiPlayerChunkCache*)(level[i]->chunkSource))->XZOFFSET;
g_findNearestChunkDataIn.multiplayerChunkCache[i].XZSIZE =
((MultiPlayerChunkCache*)(level[i]->chunkSource))->XZSIZE;
g_findNearestChunkDataIn.multiplayerChunkCache[i].cache =
(void**)((MultiPlayerChunkCache*)(level[i]->chunkSource))
->cache;
}
}
if (level[i] != NULL) {
g_findNearestChunkDataIn.multiplayerChunkCache[i].XZOFFSET =
((MultiPlayerChunkCache*)(level[i]->chunkSource))->XZOFFSET;
g_findNearestChunkDataIn.multiplayerChunkCache[i].XZSIZE =
((MultiPlayerChunkCache*)(level[i]->chunkSource))->XZSIZE;
g_findNearestChunkDataIn.multiplayerChunkCache[i].cache =
(void**)((MultiPlayerChunkCache*)(level[i]->chunkSource))
->cache;
}
}
// assert(sizeof(LevelRenderer_FindNearestChunk_DataIn::Chunk)
// == sizeof(Chunk));
C4JSpursJob_LevelRenderer_FindNearestChunk findJob(
&g_findNearestChunkDataIn);
m_jobPort_FindNearestChunk->submitJob(&findJob);
m_jobPort_FindNearestChunk->waitForCompletion();
nearChunk = (ClipChunk*)g_findNearestChunkDataIn.nearChunk;
veryNearCount = g_findNearestChunkDataIn.veryNearCount;
// assert(sizeof(LevelRenderer_FindNearestChunk_DataIn::Chunk)
// == sizeof(Chunk));
C4JSpursJob_LevelRenderer_FindNearestChunk findJob(
&g_findNearestChunkDataIn);
m_jobPort_FindNearestChunk->submitJob(&findJob);
m_jobPort_FindNearestChunk->waitForCompletion();
nearChunk = (ClipChunk*)g_findNearestChunkDataIn.nearChunk;
veryNearCount = g_findNearestChunkDataIn.veryNearCount;
#else // __PS3__
#ifdef _LARGE_WORLDS
int maxNearestChunks = MAX_CONCURRENT_CHUNK_REBUILDS;
// 4J Stu - On XboxOne we should cut this down if in a constrained state
// so the saving threads get more time
int maxNearestChunks = MAX_CONCURRENT_CHUNK_REBUILDS;
// 4J Stu - On XboxOne we should cut this down if in a constrained state
// so the saving threads get more time
#endif
// Find nearest chunk that is dirty
for (int p = 0; p < XUSER_MAX_COUNT; p++) {
// It's possible that the localplayers member can be set to NULL on
// the main thread when a player chooses to exit the game So take a
// reference to the player object now. As it is a shared_ptr it
// should live as long as we need it
std::shared_ptr<LocalPlayer> player = mc->localplayers[p];
if (player == NULL) continue;
if (chunks[p].data == NULL) continue;
if (level[p] == NULL) continue;
if (chunks[p].length != xChunks * zChunks * CHUNK_Y_COUNT) continue;
int px = (int)player->x;
int py = (int)player->y;
int pz = (int)player->z;
// Find nearest chunk that is dirty
for (int p = 0; p < XUSER_MAX_COUNT; p++) {
// It's possible that the localplayers member can be set to NULL on
// the main thread when a player chooses to exit the game So take a
// reference to the player object now. As it is a shared_ptr it
// should live as long as we need it
std::shared_ptr<LocalPlayer> player = mc->localplayers[p];
if (player == NULL) continue;
if (chunks[p].data == NULL) continue;
if (level[p] == NULL) continue;
if (chunks[p].length != xChunks * zChunks * CHUNK_Y_COUNT) continue;
int px = (int)player->x;
int py = (int)player->y;
int pz = (int)player->z;
// app.DebugPrintf("!! %d %d %d, %d %d %d {%d,%d}
//",px,py,pz,stackChunkDirty,nonStackChunkDirty,onlyRebuild,
// xChunks, zChunks);
// app.DebugPrintf("!! %d %d %d, %d %d %d {%d,%d}
//",px,py,pz,stackChunkDirty,nonStackChunkDirty,onlyRebuild,
// xChunks, zChunks);
int considered = 0;
int wouldBeNearButEmpty = 0;
for (int x = 0; x < xChunks; x++) {
for (int z = 0; z < zChunks; z++) {
for (int y = 0; y < CHUNK_Y_COUNT; y++) {
ClipChunk* pClipChunk =
&chunks[p][(z * yChunks + y) * xChunks + x];
// Get distance to this chunk - deliberately not calling
// the chunk's method of doing this to avoid overheads
// (passing entitie, type conversion etc.) that this
// involves
int xd = pClipChunk->xm - px;
int yd = pClipChunk->ym - py;
int zd = pClipChunk->zm - pz;
int distSq = xd * xd + yd * yd + zd * zd;
int distSqWeighted =
xd * xd + yd * yd * 4 +
zd *
zd; // Weighting against y to prioritise things
// in same x/z plane as player first
int considered = 0;
int wouldBeNearButEmpty = 0;
for (int x = 0; x < xChunks; x++) {
for (int z = 0; z < zChunks; z++) {
for (int y = 0; y < CHUNK_Y_COUNT; y++) {
ClipChunk* pClipChunk =
&chunks[p][(z * yChunks + y) * xChunks + x];
// Get distance to this chunk - deliberately not calling
// the chunk's method of doing this to avoid overheads
// (passing entitie, type conversion etc.) that this
// involves
int xd = pClipChunk->xm - px;
int yd = pClipChunk->ym - py;
int zd = pClipChunk->zm - pz;
int distSq = xd * xd + yd * yd + zd * zd;
int distSqWeighted =
xd * xd + yd * yd * 4 +
zd *
zd; // Weighting against y to prioritise things
// in same x/z plane as player first
if (globalChunkFlags[pClipChunk->globalIdx] &
CHUNK_FLAG_DIRTY) {
if ((!onlyRebuild) ||
globalChunkFlags[pClipChunk->globalIdx] &
CHUNK_FLAG_COMPILED ||
(distSq <
20 * 20)) // Always rebuild really near things
// or else building (say) at tower
// up into empty blocks when we are
// low on memory will not create
// render data
{
considered++;
// Is this chunk nearer than our nearest?
#ifdef _LARGE_WORLDS
bool isNearer = nearestClipChunks.empty();
AUTO_VAR(itNearest, nearestClipChunks.begin());
for (; itNearest != nearestClipChunks.end();
++itNearest) {
isNearer =
distSqWeighted < itNearest->second;
if (isNearer) break;
}
isNearer =
isNearer || (nearestClipChunks.size() <
maxNearestChunks);
#else
bool isNearer = distSqWeighted < minDistSq;
#endif
#ifdef _CRITICAL_CHUNKS
// AP - this will make sure that if a deferred
// grouping has started, only critical chunks go
// into that grouping, even if a non-critical
// chunk is closer.
if ((!veryNearCount && isNearer) ||
(distSq < 20 * 20 &&
(globalChunkFlags[pClipChunk->globalIdx] &
CHUNK_FLAG_CRITICAL)))
#else
if (isNearer)
#endif
if (globalChunkFlags[pClipChunk->globalIdx] &
CHUNK_FLAG_DIRTY) {
if ((!onlyRebuild) ||
globalChunkFlags[pClipChunk->globalIdx] &
CHUNK_FLAG_COMPILED ||
(distSq <
20 * 20)) // Always rebuild really near things
// or else building (say) at tower
// up into empty blocks when we are
// low on memory will not create
// render data
{
// At this point we've got a chunk that we
// would like to consider for rendering, at
// least based on its proximity to the
// player(s). Its *quite* quick to generate
// empty render data for render chunks, but
// if we let the rebuilding do that then the
// after rebuilding we will have to start
// searching for the next nearest chunk from
// scratch again. Instead, its better to
// detect empty chunks at this stage, flag
// them up as not dirty (and empty), and
// carry on. The levelchunk's
// isRenderChunkEmpty method can be quite
// optimal as it can make use of the chunk's
// data compression to detect emptiness
// without actually testing as many data
// items as uncompressed data would.
Chunk* chunk = pClipChunk->chunk;
LevelChunk* lc = level[p]->getChunkAt(
chunk->x, chunk->z);
if (!lc->isRenderChunkEmpty(y * 16)) {
nearChunk = pClipChunk;
minDistSq = distSqWeighted;
considered++;
// Is this chunk nearer than our nearest?
#ifdef _LARGE_WORLDS
nearestClipChunks.insert(
itNearest,
std::pair<ClipChunk*, int>(
nearChunk, minDistSq));
if (nearestClipChunks.size() >
maxNearestChunks) {
nearestClipChunks.pop_back();
}
#endif
} else {
chunk->clearDirty();
globalChunkFlags[pClipChunk
->globalIdx] |=
CHUNK_FLAG_EMPTYBOTH;
wouldBeNearButEmpty++;
bool isNearer = nearestClipChunks.empty();
AUTO_VAR(itNearest, nearestClipChunks.begin());
for (; itNearest != nearestClipChunks.end();
++itNearest) {
isNearer =
distSqWeighted < itNearest->second;
if (isNearer) break;
}
}
isNearer =
isNearer || (nearestClipChunks.size() <
maxNearestChunks);
#else
bool isNearer = distSqWeighted < minDistSq;
#endif
#ifdef _CRITICAL_CHUNKS
// AP - is the chunk near and also critical
if (distSq < 20 * 20 &&
((globalChunkFlags[pClipChunk->globalIdx] &
CHUNK_FLAG_CRITICAL)))
// AP - this will make sure that if a deferred
// grouping has started, only critical chunks go
// into that grouping, even if a non-critical
// chunk is closer.
if ((!veryNearCount && isNearer) ||
(distSq < 20 * 20 &&
(globalChunkFlags[pClipChunk->globalIdx] &
CHUNK_FLAG_CRITICAL)))
#else
if (distSq < 20 * 20)
if (isNearer)
#endif
{
veryNearCount++;
{
// At this point we've got a chunk that we
// would like to consider for rendering, at
// least based on its proximity to the
// player(s). Its *quite* quick to generate
// empty render data for render chunks, but
// if we let the rebuilding do that then the
// after rebuilding we will have to start
// searching for the next nearest chunk from
// scratch again. Instead, its better to
// detect empty chunks at this stage, flag
// them up as not dirty (and empty), and
// carry on. The levelchunk's
// isRenderChunkEmpty method can be quite
// optimal as it can make use of the chunk's
// data compression to detect emptiness
// without actually testing as many data
// items as uncompressed data would.
Chunk* chunk = pClipChunk->chunk;
LevelChunk* lc = level[p]->getChunkAt(
chunk->x, chunk->z);
if (!lc->isRenderChunkEmpty(y * 16)) {
nearChunk = pClipChunk;
minDistSq = distSqWeighted;
#ifdef _LARGE_WORLDS
nearestClipChunks.insert(
itNearest,
std::pair<ClipChunk*, int>(
nearChunk, minDistSq));
if (nearestClipChunks.size() >
maxNearestChunks) {
nearestClipChunks.pop_back();
}
#endif
} else {
chunk->clearDirty();
globalChunkFlags[pClipChunk
->globalIdx] |=
CHUNK_FLAG_EMPTYBOTH;
wouldBeNearButEmpty++;
}
}
#ifdef _CRITICAL_CHUNKS
// AP - is the chunk near and also critical
if (distSq < 20 * 20 &&
((globalChunkFlags[pClipChunk->globalIdx] &
CHUNK_FLAG_CRITICAL)))
#else
if (distSq < 20 * 20)
#endif
{
veryNearCount++;
}
}
}
}
}
}
// app.DebugPrintf("[%d,%d,%d]\n",nearestClipChunks.empty(),considered,wouldBeNearButEmpty);
}
// app.DebugPrintf("[%d,%d,%d]\n",nearestClipChunks.empty(),considered,wouldBeNearButEmpty);
}
#endif // __PS3__
PIXEndNamedEvent();
PIXEndNamedEvent();
}
}
Chunk* chunk = NULL;
#ifdef _LARGE_WORLDS
if (!nearestClipChunks.empty()) {
int index = 0;
for (AUTO_VAR(it, nearestClipChunks.begin());
it != nearestClipChunks.end(); ++it) {
chunk = it->first->chunk;
// If this chunk is very near, then move the renderer into a
// deferred mode. This won't commit any command buffers for
// rendering until we call CBuffDeferredModeEnd(), allowing us to
// group any near changes into an atomic unit. This is essential so
// we don't temporarily create any holes in the environment whilst
// updating one chunk and not the neighbours. The "ver near" aspect
// of this is just a cosmetic nicety - exactly the same thing would
// happen further away, but we just don't care about it so much from
// terms of visual impact.
if (veryNearCount > 0) {
RenderManager.CBuffDeferredModeStart();
{
FRAME_PROFILE_SCOPE(ChunkRebuildSchedule);
for (AUTO_VAR(it, nearestClipChunks.begin());
it != nearestClipChunks.end(); ++it) {
chunk = it->first->chunk;
// If this chunk is very near, then move the renderer into a
// deferred mode. This won't commit any command buffers for
// rendering until we call CBuffDeferredModeEnd(), allowing us to
// group any near changes into an atomic unit. This is essential so
// we don't temporarily create any holes in the environment whilst
// updating one chunk and not the neighbours. The "ver near" aspect
// of this is just a cosmetic nicety - exactly the same thing would
// happen further away, but we just don't care about it so much from
// terms of visual impact.
if (veryNearCount > 0) {
RenderManager.CBuffDeferredModeStart();
}
// Build this chunk & return false to continue processing
chunk->clearDirty();
// Take a copy of the details that are required for chunk
// rebuilding, and rebuild That instead of the original chunk data.
// This is done within the m_csDirtyChunks critical section, which
// means that any chunks can't be repositioned whilst we are doing
// this copy. The copy will then be guaranteed to be consistent
// whilst rebuilding takes place outside of that critical section.
permaChunk[index].makeCopyForRebuild(chunk);
++index;
}
// Build this chunk & return false to continue processing
chunk->clearDirty();
// Take a copy of the details that are required for chunk
// rebuilding, and rebuild That instead of the original chunk data.
// This is done within the m_csDirtyChunks critical section, which
// means that any chunks can't be repositioned whilst we are doing
// this copy. The copy will then be guaranteed to be consistent
// whilst rebuilding takes place outside of that critical section.
permaChunk[index].makeCopyForRebuild(chunk);
++index;
}
LeaveCriticalSection(&m_csDirtyChunks);
LeaveCriticalSection(&m_csDirtyChunks);
--index; // Bring it back into 0 counted range
--index; // Bring it back into 0 counted range
for (int i = MAX_CHUNK_REBUILD_THREADS - 1; i >= 0; --i) {
// Set the events that won't run
if ((i + 1) > index)
s_rebuildCompleteEvents->Set(i);
else
break;
for (int i = MAX_CHUNK_REBUILD_THREADS - 1; i >= 0; --i) {
// Set the events that won't run
if ((i + 1) > index)
s_rebuildCompleteEvents->Set(i);
else
break;
}
}
for (; index >= 0; --index) {
@ -2283,13 +2304,18 @@ bool LevelRenderer::updateDirtyChunks() {
// app.DebugPrintf("Rebuilding permaChunk %d\n", index);
permaChunk[index].rebuild();
{
FRAME_PROFILE_SCOPE(ChunkRebuildBody);
permaChunk[index].rebuild();
}
if (index != 0)
if (index != 0) {
FRAME_PROFILE_SCOPE(ChunkRebuildSchedule);
s_rebuildCompleteEvents->Set(
index - 1); // MGH - this rebuild happening on the main
// thread instead, mark the thread it
// should have been running on as complete
}
// int64_t endTime = System::currentTimeMillis();
// totalTime += (endTime - startTime);
@ -2300,44 +2326,54 @@ bool LevelRenderer::updateDirtyChunks() {
// 4J Stu - Ignore this path when in constrained mode on Xbox One
else {
// Activate thread to rebuild this chunk
FRAME_PROFILE_SCOPE(ChunkRebuildSchedule);
s_activationEventA[index - 1]->Set();
}
}
// Wait for the other threads to be done as well
s_rebuildCompleteEvents->WaitForAll(INFINITE);
{
FRAME_PROFILE_SCOPE(ChunkRebuildSchedule);
s_rebuildCompleteEvents->WaitForAll(INFINITE);
}
}
#else
if (nearChunk) {
chunk = nearChunk->chunk;
PIXBeginNamedEvent(0, "Rebuilding near chunk %d %d %d", chunk->x,
chunk->y, chunk->z);
// If this chunk is very near, then move the renderer into a deferred
// mode. This won't commit any command buffers for rendering until we
// call CBuffDeferredModeEnd(), allowing us to group any near changes
// into an atomic unit. This is essential so we don't temporarily create
// any holes in the environment whilst updating one chunk and not the
// neighbours. The "ver near" aspect of this is just a cosmetic nicety -
// exactly the same thing would happen further away, but we just don't
// care about it so much from terms of visual impact.
if (veryNearCount > 0) {
RenderManager.CBuffDeferredModeStart();
}
// Build this chunk & return false to continue processing
chunk->clearDirty();
// Take a copy of the details that are required for chunk rebuilding,
// and rebuild That instead of the original chunk data. This is done
// within the m_csDirtyChunks critical section, which means that any
// chunks can't be repositioned whilst we are doing this copy. The copy
// will then be guaranteed to be consistent whilst rebuilding takes
// place outside of that critical section.
static Chunk permaChunk;
permaChunk.makeCopyForRebuild(chunk);
LeaveCriticalSection(&m_csDirtyChunks);
{
FRAME_PROFILE_SCOPE(ChunkRebuildSchedule);
// If this chunk is very near, then move the renderer into a deferred
// mode. This won't commit any command buffers for rendering until we
// call CBuffDeferredModeEnd(), allowing us to group any near changes
// into an atomic unit. This is essential so we don't temporarily create
// any holes in the environment whilst updating one chunk and not the
// neighbours. The "ver near" aspect of this is just a cosmetic nicety -
// exactly the same thing would happen further away, but we just don't
// care about it so much from terms of visual impact.
if (veryNearCount > 0) {
RenderManager.CBuffDeferredModeStart();
}
// Build this chunk & return false to continue processing
chunk->clearDirty();
// Take a copy of the details that are required for chunk rebuilding,
// and rebuild That instead of the original chunk data. This is done
// within the m_csDirtyChunks critical section, which means that any
// chunks can't be repositioned whilst we are doing this copy. The copy
// will then be guaranteed to be consistent whilst rebuilding takes
// place outside of that critical section.
permaChunk.makeCopyForRebuild(chunk);
LeaveCriticalSection(&m_csDirtyChunks);
}
// static int64_t totalTime = 0;
// static int64_t countTime = 0;
// int64_t startTime = System::currentTimeMillis();
permaChunk.rebuild();
{
FRAME_PROFILE_SCOPE(ChunkRebuildBody);
permaChunk.rebuild();
}
// int64_t endTime = System::currentTimeMillis();
// totalTime += (endTime - startTime);
// countTime++;
@ -4102,7 +4138,10 @@ int LevelRenderer::rebuildChunkThreadProc(void* lpParam) {
s_activationEventA[index]->WaitForSignal(INFINITE);
// app.DebugPrintf("Rebuilding permaChunk %d\n", index + 1);
permaChunk[index + 1].rebuild();
{
FRAME_PROFILE_SCOPE(ChunkRebuildBody);
permaChunk[index + 1].rebuild();
}
// Inform the producer thread that we are done with this chunk
s_rebuildCompleteEvents->Set(index);

View file

@ -0,0 +1,260 @@
#include "../Platform/stdafx.h"
#include "FrameProfiler.h"
#ifdef ENABLE_FRAME_PROFILER
#include <array>
#include <atomic>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <string_view>
#if defined(_MSC_VER)
#define FRAME_PROFILER_NOINLINE __declspec(noinline)
#elif defined(__GNUC__) || defined(__clang__)
#define FRAME_PROFILER_NOINLINE __attribute__((noinline))
#else
#define FRAME_PROFILER_NOINLINE
#endif
namespace {
using FrameProfilerClock = std::chrono::steady_clock;
using Bucket = FrameProfiler::Bucket;
constexpr std::uint64_t kNsPerMs = 1000ULL * 1000ULL;
constexpr std::uint64_t kReportIntervalNs = 1000ULL * 1000ULL * 1000ULL;
constexpr std::size_t kBucketCount = FrameProfiler::BucketCount();
constexpr auto kFalseTokens = std::to_array<std::string_view>({
"0", "false", "False", "FALSE", "no", "No", "NO", "off", "Off", "OFF",
});
constexpr std::array<FrameProfiler::BucketDescriptor, kBucketCount>
kBucketDescriptors = {{
{Bucket::Frame, "frame"},
{Bucket::World, "world"},
{Bucket::Terrain, "terrain"},
{Bucket::ChunkCull, "chunkCull"},
{Bucket::ChunkCollect, "chunkCollect"},
{Bucket::ChunkPlayback, "chunkPlayback"},
{Bucket::ChunkDirtyScan, "chunkDirtyScan"},
{Bucket::ChunkRebuildSchedule, "chunkRebuildSchedule"},
{Bucket::ChunkRebuildBody, "chunkRebuildBody"},
{Bucket::Entity, "entities"},
{Bucket::Particle, "particles"},
{Bucket::WeatherSky, "weather"},
{Bucket::UIHud, "ui"},
{Bucket::Lightmap, "lightmap"},
}};
struct BucketTotals {
std::uint64_t totalNs{};
std::uint64_t maxNs{};
std::uint64_t calls{};
void Record(std::uint64_t elapsedNs) noexcept {
totalNs += elapsedNs;
++calls;
if (elapsedNs > maxNs) maxNs = elapsedNs;
}
void Merge(const BucketTotals& other) noexcept {
totalNs += other.totalNs;
calls += other.calls;
if (other.maxNs > maxNs) maxNs = other.maxNs;
}
};
struct AtomicBucketTotals {
std::atomic<std::uint64_t> totalNs{0};
std::atomic<std::uint64_t> maxNs{0};
std::atomic<std::uint64_t> calls{0};
};
struct ProfilerState {
std::array<AtomicBucketTotals, kBucketCount> workerBuckets{};
};
struct ThreadState {
std::uint32_t frameScopeDepth{};
std::uint64_t windowStartNs{};
std::array<BucketTotals, kBucketCount> localBuckets{};
};
constinit ProfilerState g_profilerState{};
constinit thread_local ThreadState t_threadState{};
static_assert(kBucketDescriptors.size() == kBucketCount);
[[nodiscard]] inline std::uint64_t nowNs() noexcept {
return static_cast<std::uint64_t>(
std::chrono::duration_cast<std::chrono::nanoseconds>(
FrameProfilerClock::now().time_since_epoch())
.count());
}
[[nodiscard]] constexpr double nsToMs(std::uint64_t ns) noexcept {
return static_cast<double>(ns) / static_cast<double>(kNsPerMs);
}
[[nodiscard]] constexpr bool envSaysDisabled(
std::string_view value) noexcept {
if (value.empty()) return false;
for (std::string_view falseToken : kFalseTokens) {
if (value == falseToken) return true;
}
return false;
}
inline void updateAtomicMax(std::atomic<std::uint64_t>& value,
std::uint64_t candidate) noexcept {
std::uint64_t current = value.load(std::memory_order_relaxed);
while (current < candidate &&
!value.compare_exchange_weak(current, candidate,
std::memory_order_relaxed,
std::memory_order_relaxed)) {
}
}
inline void recordWorkerBucket(Bucket bucket, std::uint64_t elapsedNs) noexcept {
AtomicBucketTotals& state =
g_profilerState.workerBuckets[FrameProfiler::BucketIndex(bucket)];
state.totalNs.fetch_add(elapsedNs, std::memory_order_relaxed);
state.calls.fetch_add(1, std::memory_order_relaxed);
updateAtomicMax(state.maxNs, elapsedNs);
}
[[nodiscard]] inline bool isFrameThread() noexcept {
return t_threadState.frameScopeDepth != 0;
}
FRAME_PROFILER_NOINLINE
[[nodiscard]] bool computeEnabled() noexcept {
const char* const envValue = std::getenv("C4J_FRAME_PROFILER");
if (envValue == nullptr) return true;
return !envSaysDisabled(envValue);
}
FRAME_PROFILER_NOINLINE void emitWindowReport(
const std::array<BucketTotals, kBucketCount>& buckets) noexcept {
const std::uint64_t frames =
buckets[FrameProfiler::BucketIndex(Bucket::Frame)].calls;
if (frames == 0) return;
std::fprintf(stderr,
"[frame-prof] avg/frame(ms) frames=%llu",
static_cast<unsigned long long>(frames));
for (const auto& descriptor : kBucketDescriptors) {
const BucketTotals& bucket =
buckets[FrameProfiler::BucketIndex(descriptor.bucket)];
const std::string_view label = descriptor.label;
std::fprintf(stderr, " %.*s=%.2f", static_cast<int>(label.size()),
label.data(), nsToMs(bucket.totalNs) / frames);
}
std::fputc('\n', stderr);
std::fputs("[frame-prof] max(ms)/calls", stderr);
for (const auto& descriptor : kBucketDescriptors) {
const BucketTotals& bucket =
buckets[FrameProfiler::BucketIndex(descriptor.bucket)];
const std::string_view label = descriptor.label;
std::fprintf(stderr, " %.*s=%.2f/%llu",
static_cast<int>(label.size()), label.data(),
nsToMs(bucket.maxNs),
static_cast<unsigned long long>(bucket.calls));
}
std::fputc('\n', stderr);
std::fflush(stderr);
}
[[nodiscard]] std::array<BucketTotals, kBucketCount> snapshotAndResetWorkerBuckets()
noexcept {
std::array<BucketTotals, kBucketCount> snapshot = {};
for (std::size_t i = 0; i < kBucketCount; ++i) {
AtomicBucketTotals& workerBucket = g_profilerState.workerBuckets[i];
snapshot[i].totalNs =
workerBucket.totalNs.exchange(0, std::memory_order_relaxed);
snapshot[i].maxNs =
workerBucket.maxNs.exchange(0, std::memory_order_relaxed);
snapshot[i].calls =
workerBucket.calls.exchange(0, std::memory_order_relaxed);
}
return snapshot;
}
} // namespace
bool FrameProfiler::IsEnabled() noexcept {
static const bool enabled = computeEnabled();
return enabled;
}
void FrameProfiler::Record(Bucket bucket, std::uint64_t elapsedNs) noexcept {
if (isFrameThread()) {
t_threadState.localBuckets[BucketIndex(bucket)].Record(elapsedNs);
return;
}
recordWorkerBucket(bucket, elapsedNs);
}
void FrameProfiler::EndFrame(std::uint64_t elapsedNs) noexcept {
Record(Bucket::Frame, elapsedNs);
ThreadState& threadState = t_threadState;
const std::uint64_t now = nowNs();
if (threadState.windowStartNs == 0) {
threadState.windowStartNs = now;
return;
}
if ((now - threadState.windowStartNs) < kReportIntervalNs) return;
std::array<BucketTotals, kBucketCount> combined = threadState.localBuckets;
const auto workerSnapshot = snapshotAndResetWorkerBuckets();
for (std::size_t i = 0; i < kBucketCount; ++i) {
combined[i].Merge(workerSnapshot[i]);
}
emitWindowReport(combined);
threadState.windowStartNs = now;
threadState.localBuckets = {};
}
FrameProfiler::Scope::Scope(Bucket bucket) noexcept
: m_startNs(0), m_bucket(bucket), m_enabled(FrameProfiler::IsEnabled()) {
if (m_enabled) m_startNs = nowNs();
}
FrameProfiler::Scope::~Scope() noexcept {
if (!m_enabled) return;
FrameProfiler::Record(m_bucket, nowNs() - m_startNs);
}
FrameProfiler::FrameScope::FrameScope() noexcept
: m_startNs(0), m_enabled(false) {
if (!FrameProfiler::IsEnabled()) return;
m_enabled = (t_threadState.frameScopeDepth++ == 0);
if (m_enabled) m_startNs = nowNs();
}
FrameProfiler::FrameScope::~FrameScope() noexcept {
if (!m_enabled) {
if (t_threadState.frameScopeDepth > 0) {
--t_threadState.frameScopeDepth;
}
return;
}
FrameProfiler::EndFrame(nowNs() - m_startNs);
if (t_threadState.frameScopeDepth > 0) {
--t_threadState.frameScopeDepth;
}
}
#endif

View file

@ -0,0 +1,93 @@
#pragma once
#ifdef ENABLE_FRAME_PROFILER
#include <cstddef>
#include <cstdint>
#include <utility>
class FrameProfiler {
public:
enum class Bucket : std::uint8_t {
Frame,
World,
Terrain,
ChunkCull,
ChunkCollect,
ChunkPlayback,
ChunkDirtyScan,
ChunkRebuildSchedule,
ChunkRebuildBody,
Entity,
Particle,
WeatherSky,
UIHud,
Lightmap,
Count,
};
struct BucketDescriptor {
Bucket bucket;
const char* label;
};
[[nodiscard]] static constexpr std::size_t BucketIndex(
Bucket bucket) noexcept {
return static_cast<std::size_t>(std::to_underlying(bucket));
}
[[nodiscard]] static constexpr std::size_t BucketCount() noexcept {
return BucketIndex(Bucket::Count);
}
[[nodiscard]] static bool IsEnabled() noexcept;
class Scope {
public:
explicit Scope(Bucket bucket) noexcept;
Scope(const Scope&) = delete;
Scope& operator=(const Scope&) = delete;
Scope(Scope&&) = delete;
Scope& operator=(Scope&&) = delete;
~Scope() noexcept;
private:
std::uint64_t m_startNs;
Bucket m_bucket;
bool m_enabled;
};
class FrameScope {
public:
FrameScope() noexcept;
FrameScope(const FrameScope&) = delete;
FrameScope& operator=(const FrameScope&) = delete;
FrameScope(FrameScope&&) = delete;
FrameScope& operator=(FrameScope&&) = delete;
~FrameScope() noexcept;
private:
std::uint64_t m_startNs;
bool m_enabled;
};
private:
static void Record(Bucket bucket, std::uint64_t elapsedNs) noexcept;
static void EndFrame(std::uint64_t elapsedNs) noexcept;
};
#define FRAME_PROFILE_CONCAT_INNER(a, b) a##b
#define FRAME_PROFILE_CONCAT(a, b) FRAME_PROFILE_CONCAT_INNER(a, b)
#define FRAME_PROFILE_SCOPE(bucket_name) \
FrameProfiler::Scope FRAME_PROFILE_CONCAT(frameProfileScope_, __LINE__)( \
FrameProfiler::Bucket::bucket_name)
#define FRAME_PROFILE_FRAME_SCOPE() \
FrameProfiler::FrameScope FRAME_PROFILE_CONCAT(frameProfileFrameScope_, \
__LINE__)
#else
#define FRAME_PROFILE_SCOPE(bucket_name) ((void)0)
#define FRAME_PROFILE_FRAME_SCOPE() ((void)0)
#endif

View file

@ -80,6 +80,10 @@ if get_option('classic_panorama')
global_cpp_defs += '-DCLASSIC_PANORAMA'
endif
if get_option('enable_frame_profiler')
global_cpp_defs += ['-DENABLE_FRAME_PROFILER']
endif
if get_option('ui_backend') == 'shiggy'
shiggy_dep = dependency(
'shiggy',
@ -127,4 +131,4 @@ custom_target(
'@OUTPUT@',
],
build_by_default: true,
)
)

View file

@ -24,4 +24,11 @@ option(
type: 'boolean',
value: true,
description: 'Toggles V-Sync and adds options to unlock maximum in-game framerate.',
)
)
option(
'enable_frame_profiler',
type: 'boolean',
value: false,
description: 'Enable the in-engine frame profiler for render hotspot discovery.',
)