LCEMP/Minecraft.Client/Platform_Libs/Dev/Render/microprofile/microprofile.cpp

16191 lines
469 KiB
C++

#include "../Profiler.h"
#ifdef ENABLE_PROFILING
#define MICROPROFILE_IMPL
#include "microprofile.h"
#if MICROPROFILE_ENABLED
#define BREAK_SKIP() __builtin_trap()
#ifdef _WIN32
#if !defined(WIN32_LEAN_AND_MEAN)
#define WIN32_LEAN_AND_MEAN
#endif
#include <malloc.h>
#endif
#ifdef _WIN32
#define MICROPROFILE_MAX_PATH MAX_PATH
#else
#define MICROPROFILE_MAX_PATH 1024
#endif
#include <atomic>
#include <ctype.h>
#include <mutex>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <thread>
#if defined(MICROPROFILE_SYSTEM_STB)
#include <stb_sprintf.h>
#else
#define STB_SPRINTF_IMPLEMENTATION
#include "stb/stb_sprintf.h"
#endif
#if defined(_WIN32) && _MSC_VER == 1700
#define PRIx64 "llx"
#define PRIu64 "llu"
#define PRId64 "lld"
#else
#include <inttypes.h>
#endif
#define MICROPROFILE_MAX_COUNTERS 512
#define MICROPROFILE_MAX_COUNTER_NAME_CHARS (MICROPROFILE_MAX_COUNTERS * 16)
#define MICROPROFILE_MAX_GROUP_INTS (MICROPROFILE_MAX_GROUPS / 32)
#define MICROPROFILE_MAX_CATEGORIES 16
#define MICROPROFILE_MAX_GRAPHS 5
#define MICROPROFILE_GRAPH_HISTORY 128
#define MICROPROFILE_BUFFER_SIZE ((MICROPROFILE_PER_THREAD_BUFFER_SIZE) / sizeof(MicroProfileLogEntry))
#define MICROPROFILE_GPU_BUFFER_SIZE ((MICROPROFILE_PER_THREAD_GPU_BUFFER_SIZE) / sizeof(MicroProfileLogEntry))
#define MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS 256
#define MICROPROFILE_WEBSOCKET_BUFFER_SIZE (64 << 10)
#define MICROPROFILE_INVALID_TICK ((uint64_t) - 1)
#define MICROPROFILE_DROPPED_TICK ((uint64_t) - 2)
#define MICROPROFILE_INVALID_FRAME ((uint32_t) - 1)
#define MICROPROFILE_GROUP_MASK_ALL 0xffffffff
#define MICROPROFILE_MAX_PATCH_ERRORS 32
#define MICROPROFILE_MAX_MODULE_EXEC_REGIONS 16
#define MP_LOG_TICK_MASK 0x0000ffffffffffff
#define MP_LOG_INDEX_MASK 0x3fff000000000000
#define MP_LOG_BEGIN_MASK 0xc000000000000000
#define MP_LOG_CSTR_MASK 0xe000000000000000
#define MP_LOG_CSTR_BIT 0x2000000000000000
#define MP_LOG_PAYLOAD_PTR_MASK (~(MP_LOG_BEGIN_MASK | MP_LOG_CSTR_BIT))
#define MP_LOG_ENTER_LEAVE_MASK 0x8000000000000000
#define MP_LOG_LEAVE 0x0
#define MP_LOG_ENTER 0x1
#define MP_LOG_EXTENDED 0x2
#define MP_LOG_EXTENDED_NO_DATA 0x3
#ifndef MICROPROFILE_SETTINGS_FILE
#define MICROPROFILE_SETTINGS_FILE "mppresets.cfg"
#endif
#ifndef MICROPROFILE_SETTINGS_FILE_BUILTIN
#define MICROPROFILE_SETTINGS_FILE_BUILTIN "mppresets.builtin.cfg"
#endif
#ifndef MICROPROFILE_SETTINGS_FILE_TEMP
#define MICROPROFILE_SETTINGS_FILE_TEMP ".tmp"
#endif
// #define MP_LOG_EXTRA_DATA 0x3
static_assert(0 == (MICROPROFILE_MAX_GROUPS % 32), "MICROPROFILE_MAX_GROUPS must be divisible by 32");
enum EMicroProfileTokenExtended
{
ETOKEN_GPU_CPU_TIMESTAMP = 0x3fff,
ETOKEN_GPU_CPU_SOURCE_THREAD = 0x3ffe,
ETOKEN_META_MARKER = 0x3ffd,
ETOKEN_CUSTOM_NAME = 0x3ffc,
ETOKEN_CUSTOM_COLOR = 0x3ffb,
ETOKEN_CUSTOM_ID = 0x3ffa,
ETOKEN_CSTR_PTR = 0x2000, // note, matches MP_LOG_CSTR_BIT
ETOKEN_MAX = 0x2000,
};
enum
{
MICROPROFILE_WEBSOCKET_DIRTY_MENU,
MICROPROFILE_WEBSOCKET_DIRTY_ENABLED,
};
#ifndef MICROPROFILE_ALLOC // redefine all if overriding
#define MICROPROFILE_ALLOC(nSize, nAlign) MicroProfileAllocAligned(nSize, nAlign);
#define MICROPROFILE_REALLOC(p, s) realloc(p, s)
#define MICROPROFILE_FREE(p) MicroProfileFreeAligned(p)
#define MICROPROFILE_FREE_NON_ALIGNED(p) free(p)
#endif
#define MP_ALLOC(nSize, nAlign) MicroProfileAllocInternal(nSize, nAlign)
#define MP_REALLOC(p, s) MicroProfileReallocInternal(p, s)
#define MP_FREE(p) MicroProfileFreeInternal(p)
#define MP_ALLOC_OBJECT(T) (T*)MP_ALLOC(sizeof(T), alignof(T))
#define MP_ALLOC_OBJECT_ARRAY(T, Count) (T*)MP_ALLOC(sizeof(T) * Count, alignof(T))
#ifndef MICROPROFILE_DEBUG
#define MICROPROFILE_DEBUG 0
#endif
typedef uint64_t MicroProfileLogEntry;
void MicroProfileSleep(uint32_t nMs);
template <typename T>
T MicroProfileMin(T a, T b);
template <typename T>
T MicroProfileMax(T a, T b);
template <typename T>
T MicroProfileClamp(T a, T min_, T max_);
int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond);
float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond);
uint32_t MicroProfileLogGetType(MicroProfileLogEntry Index);
uint64_t MicroProfileLogGetTimerIndex(MicroProfileLogEntry Index);
MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick);
int64_t MicroProfileLogTickDifference(MicroProfileLogEntry Start, MicroProfileLogEntry End);
int64_t MicroProfileLogSetTick(MicroProfileLogEntry e, int64_t nTick);
uint16_t MicroProfileGetTimerIndex(MicroProfileToken t);
uint32_t MicroProfileGetGroupMask(MicroProfileToken t);
MicroProfileToken MicroProfileMakeToken(uint64_t nGroupMask, uint32_t nGroupIndex, uint16_t nTimer);
bool MicroProfileAnyGroupActive();
void MicroProfileWriteFile(void* Handle, size_t nSize, const char* pData);
// defer implementation
#define CONCAT_INTERNAL(x, y) x##y
#define CONCAT(x, y) CONCAT_INTERNAL(x, y)
void IntentionallyNotDefinedFunction__(); // DO NOT DEFINE THIS
template <typename T>
struct MicroProfileExitScope
{
T lambda;
MicroProfileExitScope(T lambda)
: lambda(lambda)
{
}
~MicroProfileExitScope()
{
lambda();
}
MicroProfileExitScope(const MicroProfileExitScope& rhs)
: lambda(rhs.lambda)
{
IntentionallyNotDefinedFunction__(); // this is here to ensure the compiler does not create duplicate copies
}
private:
MicroProfileExitScope& operator=(const MicroProfileExitScope&);
};
class MicroProfileExitScopeHelp
{
public:
template <typename T>
MicroProfileExitScope<T> operator+(T t)
{
return t;
}
};
#define defer const auto& CONCAT(defer__, __LINE__) = MicroProfileExitScopeHelp() + [&]()
//////////////////////////////////////////////////////////////////////////
// platform IMPL
void* MicroProfileAllocInternal(size_t nSize, size_t nAlign);
void MicroProfileFreeInternal(void* pPtr);
void* MicroProfileReallocInternal(void* pPtr, size_t nSize);
void* MicroProfileAllocAligned(size_t nSize, size_t nAlign);
void MicroProfileFreeAligned(void* pMem);
#if defined(__APPLE__)
#include <TargetConditionals.h>
#include <float.h>
#include <libkern/OSAtomic.h>
#include <mach/mach.h>
#include <mach/mach_time.h>
#include <unistd.h>
#if TARGET_OS_IPHONE
#define MICROPROFILE_IOS
#endif
#define MP_TICK() mach_absolute_time()
inline int64_t MicroProfileTicksPerSecondCpu_()
{
static int64_t nTicksPerSecond = 0;
if(nTicksPerSecond == 0)
{
mach_timebase_info_data_t sTimebaseInfo;
mach_timebase_info(&sTimebaseInfo);
nTicksPerSecond = 1000000000ll * sTimebaseInfo.denom / sTimebaseInfo.numer;
}
return nTicksPerSecond;
}
int64_t MicroProfileTicksPerSecondCpu()
{
return MicroProfileTicksPerSecondCpu_();
}
#define MicroProfileTicksPerSecondCpu MicroProfileTicksPerSecondCpu_
inline uint64_t MicroProfileGetCurrentThreadId()
{
uint64_t tid;
pthread_threadid_np(pthread_self(), &tid);
return tid;
}
#include <stdlib.h>
#define MP_BREAK() __builtin_trap()
#define MP_THREAD_LOCAL __thread
#define MP_STRCASECMP strcasecmp
#define MP_GETCURRENTTHREADID() MicroProfileGetCurrentThreadId()
#define MP_STRCASESTR strcasestr
#define MP_THREAD_LOCAL __thread
#define MP_NOINLINE __attribute__((noinline))
void* MicroProfileAllocAligned(size_t nSize, size_t nAlign)
{
void* p;
int result = posix_memalign(&p, nAlign, nSize);
if(result != 0)
{
return nullptr;
}
return p;
}
void MicroProfileFreeAligned(void* pMem)
{
free(pMem);
}
#elif defined(_WIN32)
#include <Shlwapi.h>
#include <winsock2.h>
#include <ws2tcpip.h>
int64_t MicroProfileGetTick();
#define MP_TICK() MicroProfileGetTick()
#define MP_BREAK() __debugbreak()
#define MP_THREAD_LOCAL __declspec(thread)
#define MP_STRCASECMP _stricmp
#define MP_GETCURRENTTHREADID() GetCurrentThreadId()
#define MP_STRCASESTR StrStrI
#define MP_THREAD_LOCAL __declspec(thread)
#define MP_NOINLINE __declspec(noinline)
#ifndef MICROPROFILE_WIN32_TRAP_ALLOCATOR
#define MICROPROFILE_WIN32_TRAP_ALLOCATOR 0
#endif
#if MICROPROFILE_WIN32_TRAP_ALLOCATOR
// minimal trap allocator
#define PAGE_SIZE (4096)
void* MicroProfileAllocAligned(size_t nSize, size_t nAlign)
{
(void)nAlign;
size_t nAlignedSize = (nSize + PAGE_SIZE - 1) & (~(PAGE_SIZE - 1));
size_t nDelta = nAlignedSize - nSize;
size_t nFullSize = nAlignedSize + 2 * PAGE_SIZE;
void* ptr = VirtualAlloc(0, nFullSize, MEM_RESERVE, PAGE_READWRITE);
intptr_t intptr = (intptr_t)ptr;
void* pResult = VirtualAlloc((void*)(intptr + PAGE_SIZE), nAlignedSize, MEM_COMMIT, PAGE_READWRITE);
memset(pResult, 0xf0, nAlignedSize);
intptr_t page = (intptr_t)pResult;
//((char*)page)[-1] = 0x70; //trap test
page += nDelta;
pResult = (void*)page;
memset(pResult, 0xfe, nSize);
//((char*)page)[nSize] = 0x70; //trap test
return (void*)page;
}
void MicroProfileFreeAligned(void* pMem)
{
intptr_t intptr = (intptr_t)pMem;
intptr = (intptr & (~(PAGE_SIZE - 1))) - PAGE_SIZE;
VirtualFree(pMem, 0, MEM_RELEASE);
}
#else
void* MicroProfileAllocAligned(size_t nSize, size_t nAlign)
{
return _aligned_malloc(nSize, nAlign);
}
void MicroProfileFreeAligned(void* pMem)
{
_aligned_free(pMem);
}
#endif
#else
#ifndef MICROPROFILE_CUSTOM_PLATFORM
#include <float.h>
#include <malloc.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
inline int64_t MicroProfileTicksPerSecondCpu_()
{
return 1000000000ll;
}
int64_t MicroProfileTicksPerSecondCpu()
{
return MicroProfileTicksPerSecondCpu_();
}
#define MicroProfileTicksPerSecondCpu MicroProfileTicksPerSecondCpu_
inline int64_t MicroProfileGetTick()
{
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return 1000000000ll * ts.tv_sec + ts.tv_nsec;
}
#define MP_TICK() MicroProfileGetTick()
#define MP_BREAK() __builtin_trap()
#define MP_THREAD_LOCAL __thread
#define MP_STRCASECMP strcasecmp
#define MP_GETCURRENTTHREADID() (uint64_t) pthread_self()
#define MP_STRCASESTR strcasestr
#define MP_THREAD_LOCAL __thread
#define MP_NOINLINE __attribute__((noinline))
void* MicroProfileAllocAligned(size_t nSize, size_t nAlign)
{
#if defined(__linux__)
void* p;
int result = posix_memalign(&p, nAlign, nSize);
if(result != 0)
{
return nullptr;
}
return p;
#else
return memalign(nAlign, nSize);
#endif
}
void MicroProfileFreeAligned(void* pMem)
{
free(pMem);
}
#endif
#endif
#ifdef MICROPROFILE_PS4
#define MICROPROFILE_PS4_DECL
#include "microprofile_ps4.h"
#endif
#ifdef MICROPROFILE_XBOXONE
#define MICROPROFILE_XBOXONE_DECL
#include "microprofile_xboxone.h"
#else
#ifdef _WIN32
#include <d3d11_1.h>
#endif
#endif
#ifdef _WIN32
typedef uint32_t MicroProfileThreadIdType;
#else
#ifdef MICROPROFILE_THREADID_SIZE_4BYTE
typedef uint32_t MicroProfileThreadIdType;
#elif MICROPROFILE_THREADID_SIZE_8BYTE
typedef uint64_t MicroProfileThreadIdType;
#else
typedef uint64_t MicroProfileThreadIdType;
#endif
#endif
#define MP_ASSERT(a) \
do \
{ \
if(!(a)) \
{ \
MP_BREAK(); \
} \
} while(0)
#ifdef _WIN32
#include <basetsd.h>
typedef UINT_PTR MpSocket;
#else
typedef int MpSocket;
#endif
#ifndef _WIN32
typedef pthread_t MicroProfileThread;
#elif defined(_WIN32)
#if _MSC_VER == 1900
typedef void* HANDLE;
#endif
typedef HANDLE MicroProfileThread;
#else
typedef std::thread* MicroProfileThread;
#endif
#if MICROPROFILE_DYNAMIC_INSTRUMENT
struct MicroProfileSymbolDesc;
#define MICROPROFILE_SUSPEND_MAX (4 << 10)
struct MicroProfileSuspendState
{
uint32_t SuspendCounter = 0;
uint32_t NumSuspended = 0;
#ifdef _WIN32
HANDLE Suspended[MICROPROFILE_SUSPEND_MAX];
intptr_t SuspendedIP[MICROPROFILE_SUSPEND_MAX];
#endif
};
void MicroProfileSymbolQueryFunctions(MpSocket Connection, const char* pFilter);
bool MicroProfileInstrumentFunction(void* pFunction, const char* pModuleName, const char* pFunctionName, uint32_t nColor);
bool MicroProfileSymbolInitialize(bool bStartLoad, const char* pModuleName = 0);
MicroProfileSymbolDesc* MicroProfileSymbolFindFuction(void* pAddress);
void MicroProfileInstrumentFunctionsCalled(void* pFunction, const char* pModuleName, const char* pFunctionName, int nMinBytes, int nMaxCalls);
void MicroProfileSymbolQuerySendResult(MpSocket Connection);
void MicroProfileSymbolSendFunctionNames(MpSocket Connection);
void MicroProfileSymbolSendErrors(MpSocket Connection);
const char* MicroProfileSymbolModuleGetString(uint32_t nIndex);
void MicroProfileInstrumentWithoutSymbols(const char** pModules, const char** pSymbols, uint32_t nNumSymbols);
void MicroProfileSymbolUpdateModuleList();
bool MicroProfileSymInit();
void MicroProfileSymCleanup();
#endif
struct MicroProfileFunctionQuery;
// hash table functions & declarations
struct MicroProfileHashTable;
struct MicroProfileHashTableIterator;
typedef bool (*MicroProfileHashCompareFunction)(uint64_t l, uint64_t r);
typedef uint64_t (*MicroProfileHashFunction)(uint64_t p);
uint64_t MicroProfileHashTableHashString(uint64_t pString);
bool MicroProfileHashTableCompareString(uint64_t L, uint64_t R);
uint64_t MicroProfileHashTableHashPtr(uint64_t pString);
bool MicroProfileHashTableComparePtr(uint64_t L, uint64_t R);
void MicroProfileHashTableInit(MicroProfileHashTable* pTable, uint32_t nInitialSize, uint32_t nSearchLimit, MicroProfileHashCompareFunction CompareFunc, MicroProfileHashFunction HashFunc);
void MicroProfileHashTableDestroy(MicroProfileHashTable* pTable);
uint64_t MicroProfileHashTableHash(MicroProfileHashTable* pTable, uint64_t K);
bool MicroProfileHashTableSet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t Value);
MicroProfileHashTableIterator MicroProfileGetHashTableIteratorBegin(MicroProfileHashTable* HashTable);
MicroProfileHashTableIterator MicroProfileGetHashTableIteratorEnd(MicroProfileHashTable* HashTable);
template <typename T>
struct MicroProfileArray
{
T* Data = nullptr;
uint32_t Size = 0;
uint32_t Capacity = 0;
T& operator[](const uint32_t Index);
const T& operator[](const uint32_t Index) const;
T* begin();
T* end();
};
template <typename T>
void MicroProfileArrayInit(MicroProfileArray<T>& Array, uint32_t InitialCapacity);
template <typename T>
void MicroProfileArrayDestroy(MicroProfileArray<T>& Array, uint32_t InitialCapacity);
template <typename T>
void MicroProfileArrayClear(MicroProfileArray<T>& Array);
template <typename T>
void MicroProfileArrayPushBack(MicroProfileArray<T>& Array, const T& v);
struct MicroProfileTimer
{
uint64_t nTicks;
uint32_t nCount;
};
struct MicroProfileCategory
{
char pName[MICROPROFILE_NAME_MAX_LEN];
uint32_t nGroupMask[MICROPROFILE_MAX_GROUP_INTS];
};
struct MicroProfileGroupInfo
{
char pName[MICROPROFILE_NAME_MAX_LEN];
uint32_t nNameLen;
uint32_t nGroupIndex;
uint32_t nNumTimers;
uint32_t nMaxTimerNameLen;
uint32_t nColor;
uint32_t nCategory;
MicroProfileTokenType Type;
int nWSNext;
};
struct MicroProfileTimerInfo
{
MicroProfileToken nToken;
uint32_t nTimerIndex;
uint32_t nGroupIndex;
char pName[MICROPROFILE_NAME_MAX_LEN];
char pNameExt[MICROPROFILE_NAME_MAX_LEN];
uint32_t nNameLen;
uint32_t nColor;
int nWSNext;
bool bGraph;
MicroProfileTokenType Type;
uint32_t Flags;
};
struct MicroProfileCounterInfo
{
int nParent;
int nSibling;
int nFirstChild;
uint16_t nNameLen;
uint8_t nLevel;
const char* pName;
uint32_t nFlags;
int64_t nLimit;
double dLimit;
int nWSNext;
MicroProfileCounterFormat eFormat;
std::atomic<int64_t> ExternalAtomic;
};
struct MicroProfileCounterHistory
{
uint32_t nPut;
uint64_t nHistory[MICROPROFILE_GRAPH_HISTORY];
};
struct MicroProfileCounterSource
{
void* pSource;
uint32_t nSourceSize;
};
struct MicroProfileGraphState
{
int64_t nHistory[MICROPROFILE_GRAPH_HISTORY];
MicroProfileToken nToken;
int32_t nKey;
};
struct MicroProfileContextSwitch
{
MicroProfileThreadIdType nThreadOut;
MicroProfileThreadIdType nThreadIn;
int64_t nCpu : 8;
int64_t nTicks : 56;
};
struct MicroProfileFrameState
{
uint64_t nFrameStartCpu;
uint64_t nFrameStartGpu;
uint64_t nFrameId;
uint32_t nGpuPending;
uint32_t nLogStart[MICROPROFILE_MAX_THREADS];
uint32_t nLogStartTimeline;
uint32_t nTimelineFrameMax;
int32_t nHistoryTimeline;
};
// All frame counter data stored. Used to store the time for all counters/groups for every frame.
// Must be enabled with MicroProfileEnableFrameCounterExtraData()
// Will allocate sizeof(MicroProfileFrameExtraCounterData) * MICROPROFILE_MAX_FRAME_HISTORY bytes
struct MicroProfileFrameExtraCounterData
{
uint16_t NumTimers;
uint16_t NumGroups;
uint64_t Timers[MICROPROFILE_MAX_TIMERS];
uint64_t Groups[MICROPROFILE_MAX_GROUPS];
};
struct MicroProfileCsvConfig
{
enum CsvConfigState
{
INACTIVE = 0,
CONFIG,
ACTIVE,
};
CsvConfigState State;
uint32_t NumTimers;
uint32_t NumGroups;
uint32_t NumCounters;
uint32_t MaxTimers;
uint32_t MaxGroups;
uint32_t MaxCounters;
uint32_t TotalElements;
uint16_t* TimerIndices;
uint16_t* GroupIndices;
uint16_t* CounterIndices;
uint64_t* FrameData;
const char** pTimerNames;
const char** pGroupNames;
const char** pCounterNames;
uint32_t Flags;
};
#ifdef _WIN32
#pragma warning(push)
#pragma warning(disable : 4200) // zero-sized struct
#pragma warning(disable : 4201) // nameless struct/union
#pragma warning(disable : 4244) // possible loss of data
#pragma warning(disable : 4100) // unreferenced formal parameter
#pragma warning(disable : 4091)
#pragma warning(disable : 4189) // local variable is initialized but not referenced. (for defer local variables)
#pragma warning(disable : 4456)
#pragma warning(disable : 4702)
#endif
struct MicroProfileStringBlock
{
enum
{
DEFAULT_SIZE = 8192,
};
MicroProfileStringBlock* pNext;
uint32_t nUsed;
uint32_t nSize;
char Memory[];
};
struct MicroProfileHashTableEntry
{
uint64_t Key;
uint64_t Hash;
uintptr_t Value;
};
struct MicroProfileHashTable
{
MicroProfileHashTableEntry* pEntries;
uint32_t nUsed;
uint32_t nAllocated;
uint32_t nSearchLimit;
uint32_t nLim;
MicroProfileHashCompareFunction CompareFunc;
MicroProfileHashFunction HashFunc;
};
struct MicroProfileHashTableIterator
{
MicroProfileHashTableIterator(uint32_t nIndex, MicroProfileHashTable* pTable)
: nIndex(nIndex)
, pTable(pTable)
{
}
MicroProfileHashTableIterator(const MicroProfileHashTableIterator& other)
: nIndex(other.nIndex)
, pTable(other.pTable)
{
}
uint32_t nIndex;
MicroProfileHashTable* pTable;
void AssertValid()
{
MP_ASSERT(nIndex < pTable->nAllocated);
}
MicroProfileHashTableEntry& operator*()
{
AssertValid();
return pTable->pEntries[nIndex];
}
MicroProfileHashTableEntry* operator->()
{
AssertValid();
return &pTable->pEntries[nIndex];
}
bool operator==(const MicroProfileHashTableIterator& rhs)
{
return nIndex == rhs.nIndex && pTable == rhs.pTable;
}
bool operator!=(const MicroProfileHashTableIterator& rhs)
{
return nIndex != rhs.nIndex || pTable != rhs.pTable;
}
void SkipInvalid()
{
while(nIndex < pTable->nAllocated && pTable->pEntries[nIndex].Hash == 0)
nIndex++;
}
MicroProfileHashTableIterator operator++()
{
AssertValid();
nIndex++;
SkipInvalid();
return *this;
}
MicroProfileHashTableIterator operator++(int)
{
MicroProfileHashTableIterator tmp = *this;
++(*this);
return tmp;
}
};
struct MicroProfileStrings
{
MicroProfileHashTable HashTable;
MicroProfileStringBlock* pFirst;
MicroProfileStringBlock* pLast;
};
struct MicroProfileThreadLog
{
std::atomic<uint32_t> nPut;
std::atomic<uint32_t> nGet;
MicroProfileLogEntry Log[MICROPROFILE_BUFFER_SIZE];
uint32_t nStackPut;
uint32_t nStackScope;
#ifdef MICROPROFILE_VERIFY_BALANCED
uint64_t VerifyStack[MICROPROFILE_STACK_MAX];
#endif
MicroProfileScopeStateC ScopeState[MICROPROFILE_STACK_MAX];
uint32_t nActive;
uint32_t nGpu;
MicroProfileThreadIdType nThreadId;
uint32_t nLogIndex;
uint32_t nCustomId;
uint32_t nIdleFrames;
MicroProfileLogEntry nStackLogEntry[MICROPROFILE_STACK_MAX];
uint64_t nChildTickStack[MICROPROFILE_STACK_MAX + 1];
int32_t nStackPos;
uint8_t nGroupStackPos[MICROPROFILE_MAX_GROUPS];
uint64_t nGroupTicks[MICROPROFILE_MAX_GROUPS];
uint64_t nAggregateGroupTicks[MICROPROFILE_MAX_GROUPS];
enum
{
THREAD_MAX_LEN = 64,
};
char ThreadName[64];
int nFreeListNext;
};
struct MicroProfileWebSocketBuffer
{
char* pBufferAllocation;
char* pBuffer;
uint32_t nBufferSize;
uint32_t nPut;
MpSocket Socket;
char SendBuffer[MICROPROFILE_WEBSOCKET_BUFFER_SIZE];
std::atomic<uint32_t> nSendPut;
std::atomic<uint32_t> nSendGet;
};
typedef void (*MicroProfileHookFunc)(int x);
struct MicroProfilePatchError
{
unsigned char Code[32];
char Message[256];
int AlreadyInstrumented;
int nCodeSize;
};
// linear, per-frame per-thread gpu log
struct MicroProfileThreadLogGpu
{
MicroProfileLogEntry Log[MICROPROFILE_GPU_BUFFER_SIZE];
uint32_t nPut;
uint32_t nStart;
uint32_t nId;
void* pContext;
uint32_t nAllocated;
uint32_t nStackScope;
MicroProfileScopeStateC ScopeState[MICROPROFILE_STACK_MAX];
};
#if MICROPROFILE_GPU_TIMERS
static MicroProfileGpuInsertTimeStamp_CB MicroProfileGpuInsertTimeStamp_Callback = 0;
static MicroProfileGpuGetTimeStamp_CB MicroProfileGpuGetTimeStamp_Callback = 0;
static MicroProfileTicksPerSecondGpu_CB MicroProfileTicksPerSecondGpu_Callback = 0;
static MicroProfileGetGpuTickReference_CB MicroProfileGetGpuTickReference_Callback = 0;
static MicroProfileGpuFlip_CB MicroProfileGpuFlip_Callback = 0;
static MicroProfileGpuShutdown_CB MicroProfileGpuShutdown_Callback = 0;
uint32_t MicroProfileGpuInsertTimeStamp(void* pContext)
{
return MicroProfileGpuInsertTimeStamp_Callback ? MicroProfileGpuInsertTimeStamp_Callback(pContext) : 0;
}
uint64_t MicroProfileGpuGetTimeStamp(uint32_t nKey)
{
return MicroProfileGpuGetTimeStamp_Callback ? MicroProfileGpuGetTimeStamp_Callback(nKey) : 1;
}
uint64_t MicroProfileTicksPerSecondGpu()
{
return MicroProfileTicksPerSecondGpu_Callback ? MicroProfileTicksPerSecondGpu_Callback() : 1;
}
int MicroProfileGetGpuTickReference(int64_t* pOutCPU, int64_t* pOutGpu)
{
return MicroProfileGetGpuTickReference_Callback ? MicroProfileGetGpuTickReference_Callback(pOutCPU, pOutGpu) : 0;
}
uint32_t MicroProfileGpuFlip(void* p)
{
return MicroProfileGpuFlip_Callback ? MicroProfileGpuFlip_Callback(p) : 0;
}
void MicroProfileGpuShutdown()
{
if(MicroProfileGpuShutdown_Callback)
MicroProfileGpuShutdown_Callback();
}
#endif
#if MICROPROFILE_GPU_TIMERS_D3D11
//:'######:::'########::'##::::'##::::'########:::'#######::'########:::::'##::::::'##:::
//'##... ##:: ##.... ##: ##:::: ##:::: ##.... ##:'##.... ##: ##.... ##::'####::::'####:::
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##:..::::: ##: ##:::: ##::.. ##::::.. ##:::
// ##::'####: ########:: ##:::: ##:::: ##:::: ##::'#######:: ##:::: ##:::: ##:::::: ##:::
// ##::: ##:: ##.....::: ##:::: ##:::: ##:::: ##::...... ##: ##:::: ##:::: ##:::::: ##:::
// ##::: ##:: ##:::::::: ##:::: ##:::: ##:::: ##:'##:::: ##: ##:::: ##:::: ##:::::: ##:::
//. ######::: ##::::::::. #######::::: ########::. #######:: ########:::'######::'######:
//:......::::..::::::::::.......::::::........::::.......:::........::::......:::......::
struct MicroProfileD3D11Frame
{
uint32_t m_nQueryStart;
uint32_t m_nQueryCountMax;
std::atomic<uint32_t> m_nQueryCount;
uint32_t m_nRateQueryStarted;
void* m_pRateQuery;
};
struct MicroProfileGpuTimerStateD3D11 : public MicroProfileGpuTimerState
{
uint32_t bInitialized;
void* m_pDevice;
void* m_pImmediateContext;
void* m_pQueries[MICROPROFILE_D3D11_MAX_QUERIES];
int64_t m_nQueryResults[MICROPROFILE_D3D11_MAX_QUERIES];
uint32_t m_nQueryPut;
uint32_t m_nQueryGet;
uint32_t m_nQueryFrame;
int64_t m_nQueryFrequency;
void* pSyncQuery;
MicroProfileD3D11Frame m_QueryFrames[MICROPROFILE_GPU_FRAME_DELAY];
};
uint32_t MicroProfileGpuInsertTimeStampD3D11(void* pContext_);
uint64_t MicroProfileGpuGetTimeStampD3D11(uint32_t nIndex);
bool MicroProfileGpuGetDataD3D11(void* pQuery, void* pData, uint32_t nDataSize);
uint64_t MicroProfileTicksPerSecondGpuD3D11();
uint32_t MicroProfileGpuFlipD3D11(void* pDeviceContext_);
void MicroProfileGpuInitD3D11(void* pDevice_, void* pImmediateContext);
void MicroProfileGpuShutdownD3D11();
int MicroProfileGetGpuTickReferenceD3D11(int64_t* pOutCPU, int64_t* pOutGpu);
MicroProfileGpuTimerStateD3D11* MicroProfileGetGpuTimerStateD3D11();
#endif
#if MICROPROFILE_GPU_TIMERS_D3D12
//:'######:::'########::'##::::'##::::'########:::'#######::'########:::::'##::::'#######::
//'##... ##:: ##.... ##: ##:::: ##:::: ##.... ##:'##.... ##: ##.... ##::'####:::'##.... ##:
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##:..::::: ##: ##:::: ##::.. ##:::..::::: ##:
// ##::'####: ########:: ##:::: ##:::: ##:::: ##::'#######:: ##:::: ##:::: ##::::'#######::
// ##::: ##:: ##.....::: ##:::: ##:::: ##:::: ##::...... ##: ##:::: ##:::: ##:::'##::::::::
// ##::: ##:: ##:::::::: ##:::: ##:::: ##:::: ##:'##:::: ##: ##:::: ##:::: ##::: ##::::::::
//. ######::: ##::::::::. #######::::: ########::. #######:: ########:::'######: #########:
//:......::::..::::::::::.......::::::........::::.......:::........::::......::.........::
#include <d3d12.h>
#ifndef MICROPROFILE_D3D12_MAX_QUERIES
#define MICROPROFILE_D3D12_MAX_QUERIES (32 << 10)
#endif
#define MICROPROFILE_D3D_MAX_NODE_COUNT 4
#define MICROPROFILE_D3D_INTERNAL_DELAY 8
#define MP_NODE_MASK_ALL(n) ((1u << (n)) - 1u)
#define MP_NODE_MASK_ONE(n) (1u << (n))
struct MicroProfileGpuTimerStateD3D12;
int MicroProfileGetGpuTickReferenceD3D12(int64_t* pOutCPU, int64_t* pOutGpu);
uint32_t MicroProfileGpuInsertTimeStampD3D12(void* pContext);
uint64_t MicroProfileGpuGetTimeStampD3D12(uint32_t nIndex);
uint64_t MicroProfileTicksPerSecondGpuD3D12();
uint32_t MicroProfileGpuFlipD3D12(void* pContext);
void MicroProfileGpuInitD3D12(void* pDevice_, uint32_t nNodeCount, void** pCommandQueues_, void** pCommandQueuesCopy_);
void MicroProfileGpuShutdownD3D12();
void MicroProfileSetCurrentNodeD3D12(uint32_t nNode);
int MicroProfileGetGpuTickReferenceD3D12(int64_t* pOutCPU, int64_t* pOutGpu);
MicroProfileGpuTimerStateD3D12* MicroProfileGetGpuTimerStateD3D12();
struct MicroProfileFrameD3D12
{
uint32_t nTimeStampBegin;
uint32_t nTimeStampCount;
uint32_t nTimeStampBeginCopyQueue;
uint32_t nTimeStampCountCopyQueue;
uint32_t nNode;
ID3D12GraphicsCommandList* pCommandList[MICROPROFILE_D3D_MAX_NODE_COUNT];
ID3D12GraphicsCommandList* pCommandListCopy[MICROPROFILE_D3D_MAX_NODE_COUNT];
ID3D12CommandAllocator* pCommandAllocator;
ID3D12CommandAllocator* pCommandAllocatorCopy;
};
struct MicroProfileGpuTimerStateD3D12 : public MicroProfileGpuTimerState
{
ID3D12Device* pDevice;
uint32_t nNodeCount;
uint32_t nCurrentNode;
uint64_t nFrame;
uint64_t nPendingFrame;
uint32_t nFrameStartTimeStamps;
uint32_t nFrameStartCopyQueueTimeStamps;
std::atomic<uint32_t> nFrameCountTimeStamps;
std::atomic<uint32_t> nFrameCountCopyQueueTimeStamps;
int64_t nFrequency;
ID3D12Resource* pBuffer;
ID3D12Resource* pBufferCopy;
struct
{
ID3D12CommandQueue* pCommandQueue;
ID3D12CommandQueue* pCommandQueueCopy;
ID3D12QueryHeap* pHeap;
ID3D12QueryHeap* pCopyQueueHeap;
ID3D12Fence* pFence;
ID3D12Fence* pFenceCopy;
} NodeState[MICROPROFILE_D3D_MAX_NODE_COUNT];
uint16_t nQueryFrames[MICROPROFILE_D3D12_MAX_QUERIES];
int64_t nResults[MICROPROFILE_D3D12_MAX_QUERIES];
uint16_t nQueryFramesCopy[MICROPROFILE_D3D12_MAX_QUERIES];
int64_t nResultsCopy[MICROPROFILE_D3D12_MAX_QUERIES];
MicroProfileFrameD3D12 Frames[MICROPROFILE_D3D_INTERNAL_DELAY];
};
#endif
#if MICROPROFILE_GPU_TIMERS_GL
//:'######:::'########::'##::::'##:::::'######:::'##:::::::
//'##... ##:: ##.... ##: ##:::: ##::::'##... ##:: ##:::::::
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::..::: ##:::::::
// ##::'####: ########:: ##:::: ##:::: ##::'####: ##:::::::
// ##::: ##:: ##.....::: ##:::: ##:::: ##::: ##:: ##:::::::
// ##::: ##:: ##:::::::: ##:::: ##:::: ##::: ##:: ##:::::::
//. ######::: ##::::::::. #######:::::. ######::: ########:
//:......::::..::::::::::.......:::::::......::::........::
struct MicroProfileGpuTimerStateGL : public MicroProfileGpuTimerState
{
uint32_t GLTimers[MICROPROFILE_GL_MAX_QUERIES];
uint32_t GLTimerPos;
};
MicroProfileGpuTimerStateGL* MicroProfileGetGpuTimerStateGL();
uint32_t MicroProfileGpuInsertTimeStampGL(void* pContext);
uint64_t MicroProfileGpuGetTimeStampGL(uint32_t nKey);
uint64_t MicroProfileTicksPerSecondGpuGL();
int MicroProfileGetGpuTickReferenceGL(int64_t* pOutCpu, int64_t* pOutGpu);
uint32_t MicroProfileGpuFlipGL(void* pContext);
void MicroProfileGpuShutdownGL();
#endif
#if MICROPROFILE_GPU_TIMERS_VULKAN
//:'######:::'########::'##::::'##::::'##::::'##:'##::::'##:'##:::::::'##:::'##::::'###::::'##::: ##:
//'##... ##:: ##.... ##: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: ##::'##::::'## ##::: ###:: ##:
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: ##:'##::::'##:. ##:: ####: ##:
// ##::'####: ########:: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: #####::::'##:::. ##: ## ## ##:
// ##::: ##:: ##.....::: ##:::: ##::::. ##:: ##:: ##:::: ##: ##::::::: ##. ##::: #########: ##. ####:
// ##::: ##:: ##:::::::: ##:::: ##:::::. ## ##::: ##:::: ##: ##::::::: ##:. ##:: ##.... ##: ##:. ###:
//. ######::: ##::::::::. #######:::::::. ###::::. #######:: ########: ##::. ##: ##:::: ##: ##::. ##:
//:......::::..::::::::::.......:::::::::...::::::.......:::........::..::::..::..:::::..::..::::..::
struct MicroProfileGpuTimerStateVulkan;
MicroProfileGpuTimerStateVulkan* MicroProfileGetGpuTimerStateVulkan();
uint32_t MicroProfileGpuInsertTimeStampVulkan(void* pContext);
uint64_t MicroProfileGpuGetTimeStampVulkan(uint32_t nKey);
uint64_t MicroProfileTicksPerSecondGpuVulkan();
int MicroProfileGetGpuTickReferenceVulkan(int64_t* pOutCpu, int64_t* pOutGpu);
uint32_t MicroProfileGpuFlipVulkan(void* pContext);
void MicroProfileGpuShutdownVulkan();
#endif
struct MicroProfileSymbolState
{
std::atomic<int> nModuleLoadsFinished;
std::atomic<int> nModuleLoadsRequested;
std::atomic<int64_t> nSymbolsLoaded;
};
struct MicroProfileSymbolModuleRegion
{
intptr_t nBegin;
intptr_t nEnd;
};
struct MicroProfileSymbolModule
{
uint64_t nModuleBase;
uint32_t nMatchOffset;
uint32_t nStringOffset;
const char* pBaseString;
const char* pTrimmedString;
MicroProfileSymbolModuleRegion Regions[MICROPROFILE_MAX_MODULE_EXEC_REGIONS];
int nNumExecutableRegions;
bool bDownloading;
intptr_t nProgress;
intptr_t nProgressTarget;
struct MicroProfileSymbolBlock* pSymbolBlock;
MicroProfileHashTable AddressToSymbol;
int64_t nSymbols;
std::atomic<int64_t> nSymbolsLoaded;
std::atomic<int> nModuleLoadRequested;
std::atomic<int> nModuleLoadFinished;
};
struct MicroProfileInstrumentMemoryRegion
{
intptr_t Start;
intptr_t Size;
uint32_t Protect;
};
struct MicroProfile
{
uint32_t nTotalTimers;
uint32_t nGroupCount;
uint32_t nCategoryCount;
uint32_t nAggregateClear;
uint32_t nAggregateFlip;
uint32_t nAggregateFlipCount;
uint32_t nAggregateFrames;
uint64_t nFlipStartTick;
uint64_t nAggregateFlipTick;
uint32_t nDisplay;
uint32_t nBars;
uint32_t nActiveGroups[MICROPROFILE_MAX_GROUP_INTS];
bool AnyActive;
uint32_t nFrozen;
uint32_t nWasFrozen;
uint32_t nPlatformMarkersEnabled;
uint32_t nForceEnable;
uint32_t nForceGroups[MICROPROFILE_MAX_GROUP_INTS];
uint32_t nActiveGroupsWanted[MICROPROFILE_MAX_GROUP_INTS];
uint32_t nGroupMask[MICROPROFILE_MAX_GROUP_INTS];
uint32_t nStartEnabled;
uint32_t nAllThreadsWanted;
uint32_t nOverflow;
uint32_t nMaxGroupSize;
uint32_t nDumpFileNextFrame;
uint32_t nDumpFileCountDown;
uint32_t nDumpSpikeMask;
uint32_t nAutoClearFrames;
float fDumpCpuSpike;
float fDumpGpuSpike;
char HtmlDumpPath[512];
char CsvDumpPath[512];
uint32_t DumpFrameCount;
int64_t nPauseTicks;
std::atomic<int64_t> nContextSwitchStalledTick;
int64_t nContextSwitchLastPushed;
int64_t nContextSwitchLastIndexPushed;
float fReferenceTime;
float fRcpReferenceTime;
MicroProfileCategory CategoryInfo[MICROPROFILE_MAX_CATEGORIES];
MicroProfileGroupInfo GroupInfo[MICROPROFILE_MAX_GROUPS];
MicroProfileTimerInfo TimerInfo[MICROPROFILE_MAX_TIMERS];
uint32_t TimerToGroup[MICROPROFILE_MAX_TIMERS];
MicroProfileTimer AccumTimers[MICROPROFILE_MAX_TIMERS];
uint64_t AccumMaxTimers[MICROPROFILE_MAX_TIMERS];
uint64_t AccumMinTimers[MICROPROFILE_MAX_TIMERS];
uint64_t AccumTimersExclusive[MICROPROFILE_MAX_TIMERS];
uint64_t AccumMaxTimersExclusive[MICROPROFILE_MAX_TIMERS];
MicroProfileTimer Frame[MICROPROFILE_MAX_TIMERS];
uint64_t FrameExclusive[MICROPROFILE_MAX_TIMERS];
MicroProfileTimer Aggregate[MICROPROFILE_MAX_TIMERS];
uint64_t AggregateMax[MICROPROFILE_MAX_TIMERS];
uint64_t AggregateMin[MICROPROFILE_MAX_TIMERS];
uint64_t AggregateExclusive[MICROPROFILE_MAX_TIMERS];
uint64_t AggregateMaxExclusive[MICROPROFILE_MAX_TIMERS];
uint32_t FrameGroupThreadValid[MICROPROFILE_MAX_THREADS / 32 + 1];
struct GroupTime
{
uint64_t nTicks;
uint64_t nTicksExclusive;
uint32_t nCount;
};
GroupTime FrameGroupThread[MICROPROFILE_MAX_THREADS][MICROPROFILE_MAX_GROUPS];
GroupTime FrameGroup[MICROPROFILE_MAX_GROUPS];
uint64_t AccumGroup[MICROPROFILE_MAX_GROUPS];
uint64_t AccumGroupMax[MICROPROFILE_MAX_GROUPS];
uint64_t AggregateGroup[MICROPROFILE_MAX_GROUPS];
uint64_t AggregateGroupMax[MICROPROFILE_MAX_GROUPS];
MicroProfileGraphState Graph[MICROPROFILE_MAX_GRAPHS];
uint32_t nGraphPut;
uint32_t nThreadActive[MICROPROFILE_MAX_THREADS];
MicroProfileThreadLog* Pool[MICROPROFILE_MAX_THREADS];
MicroProfileThreadLogGpu* PoolGpu[MICROPROFILE_MAX_THREADS];
MicroProfileThreadLog TimelineLog;
uint32_t TimelineTokenFrameEnter[MICROPROFILE_TIMELINE_MAX_TOKENS];
uint32_t TimelineTokenFrameLeave[MICROPROFILE_TIMELINE_MAX_TOKENS];
uint32_t TimelineToken[MICROPROFILE_TIMELINE_MAX_TOKENS];
const char* TimelineTokenStaticString[MICROPROFILE_TIMELINE_MAX_TOKENS];
uint32_t nTimelineFrameMax;
MicroProfileFrameExtraCounterData* FrameExtraCounterData;
MicroProfileCsvConfig CsvConfig;
const char* pSettings;
const char* pSettingsReadOnly;
const char* pSettingsTemp;
uint32_t nNumLogs;
uint32_t nNumLogsGpu;
uint32_t nMemUsage;
int nFreeListHead;
uint32_t nFrameCurrent;
uint32_t nFrameCurrentIndex;
uint32_t nFramePut;
uint32_t nFrameNext;
uint64_t nFramePutIndex;
MicroProfileFrameState Frames[MICROPROFILE_MAX_FRAME_HISTORY];
uint64_t nFlipTicks;
uint64_t nFlipAggregate;
uint64_t nFlipMax;
uint64_t nFlipAggregateDisplay;
uint64_t nFlipMaxDisplay;
MicroProfileThread ContextSwitchThread;
bool bContextSwitchRunning;
bool bContextSwitchStop;
bool bContextSwitchAllThreads;
bool bContextSwitchNoBars;
uint32_t nContextSwitchUsage;
uint32_t nContextSwitchLastPut;
int64_t nContextSwitchHoverTickIn;
int64_t nContextSwitchHoverTickOut;
uint32_t nContextSwitchHoverThread;
uint32_t nContextSwitchHoverThreadBefore;
uint32_t nContextSwitchHoverThreadAfter;
uint8_t nContextSwitchHoverCpu;
uint8_t nContextSwitchHoverCpuNext;
uint32_t CoreCount;
uint8_t CoreEfficiencyClass[MICROPROFILE_MAX_CPU_CORES];
uint32_t nContextSwitchPut;
MicroProfileContextSwitch ContextSwitch[MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE];
MpSocket ListenerSocket;
uint32_t nWebServerPort;
char WebServerBuffer[MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE];
uint32_t WebServerPut;
uint64_t nWebServerDataSent;
int WebSocketTimers;
int WebSocketCounters;
int WebSocketGroups;
uint32_t nWebSocketDirty;
MpSocket WebSockets[1];
int64_t WebSocketFrameLast[1];
uint32_t nNumWebSockets;
uint32_t nSocketFail; // for error propagation.
MicroProfileThread WebSocketSendThread;
bool WebSocketThreadRunning;
bool WebSocketThreadJoined;
uint32_t WSCategoriesSent;
uint32_t WSGroupsSent;
uint32_t WSTimersSent;
uint32_t WSCountersSent;
MicroProfileWebSocketBuffer WSBuf;
char* pJsonSettings;
const char* pJsonSettingsName;
bool bJsonSettingsReadOnly;
uint32_t nJsonSettingsPending;
uint32_t nJsonSettingsBufferSize;
uint32_t nWSWasConnected;
uint32_t nMicroProfileShutdown;
uint32_t nWSViewMode;
char CounterNames[MICROPROFILE_MAX_COUNTER_NAME_CHARS];
MicroProfileCounterInfo CounterInfo[MICROPROFILE_MAX_COUNTERS];
MicroProfileCounterSource CounterSource[MICROPROFILE_MAX_COUNTERS];
uint32_t nNumCounters;
uint32_t nCounterNamePos;
std::atomic<int64_t> Counters[MICROPROFILE_MAX_COUNTERS];
std::atomic<double>* CountersDouble;
#if MICROPROFILE_COUNTER_HISTORY // uses 1kb per allocated counter. 512kb for default counter count
uint32_t nCounterHistoryPut;
int64_t nCounterHistory[MICROPROFILE_GRAPH_HISTORY][MICROPROFILE_MAX_COUNTERS]; // flipped to make swapping cheap, drawing more expensive.
int64_t nCounterMax[MICROPROFILE_MAX_COUNTERS];
int64_t nCounterMin[MICROPROFILE_MAX_COUNTERS];
double* dCounterHistory;
double* dCounterMax;
double* dCounterMin;
#endif
MicroProfileThread AutoFlipThread;
std::atomic<uint32_t> nAutoFlipDelay;
std::atomic<uint32_t> nAutoFlipStop;
MicroProfileStrings Strings;
MicroProfileToken CounterToken_MicroProfile;
MicroProfileToken CounterToken_StringBlock;
MicroProfileToken CounterToken_StringBlock_Count;
MicroProfileToken CounterToken_StringBlock_Waste;
MicroProfileToken CounterToken_StringBlock_Strings;
MicroProfileToken CounterToken_StringBlock_Memory;
MicroProfileToken CounterToken_Alloc;
MicroProfileToken CounterToken_Alloc_Memory;
MicroProfileToken CounterToken_Alloc_Count;
#if MICROPROFILE_DYNAMIC_INSTRUMENT
uint32_t DynamicTokenIndex;
MicroProfileToken DynamicTokens[MICROPROFILE_MAX_DYNAMIC_TOKENS];
void* FunctionsInstrumented[MICROPROFILE_MAX_DYNAMIC_TOKENS];
const char* FunctionsInstrumentedName[MICROPROFILE_MAX_DYNAMIC_TOKENS];
const char* FunctionsInstrumentedModuleNames[MICROPROFILE_MAX_DYNAMIC_TOKENS];
// const char* FunctionsInstrumentedUnmangled[MICROPROFILE_MAX_DYNAMIC_TOKENS];
uint32_t WSFunctionsInstrumentedSent;
MicroProfileSymbolState SymbolState;
MicroProfileSymbolModule SymbolModules[MICROPROFILE_INSTRUMENT_MAX_MODULES];
char SymbolModuleNameBuffer[MICROPROFILE_INSTRUMENT_MAX_MODULE_CHARS];
int SymbolModuleNameOffset;
int SymbolNumModules;
int WSSymbolModulesSent;
std::atomic<int> nSymbolsDirty;
MicroProfileFunctionQuery* pPendingQuery;
MicroProfileFunctionQuery* pFinishedQuery;
MicroProfileFunctionQuery* pQueryFreeList;
uint32_t nQueryProcessed;
uint32_t nNumQueryFree;
uint32_t nNumQueryAllocated;
int SymbolThreadRunning;
int SymbolThreadFinished;
MicroProfileThread SymbolThread;
int nNumPatchErrors;
MicroProfilePatchError PatchErrors[MICROPROFILE_MAX_PATCH_ERRORS];
int nNumPatchErrorFunctions;
const char* PatchErrorFunctionNames[MICROPROFILE_MAX_PATCH_ERRORS];
MicroProfileSuspendState SuspendState;
MicroProfileArray<MicroProfileInstrumentMemoryRegion> MemoryRegions;
#endif
int GpuQueue;
MicroProfileThreadLogGpu* pGpuGlobal;
MicroProfileGpuTimerState* pGPU;
};
inline uint32_t MicroProfileLogGetType(MicroProfileLogEntry Index)
{
return ((MP_LOG_BEGIN_MASK & Index) >> 62) & 0x3;
}
inline uint64_t MicroProfileLogGetTimerIndex(MicroProfileLogEntry Index)
{
return (0x3fff & (Index >> 48));
}
uint32_t MicroProfileLogGetDataSize(MicroProfileLogEntry Index)
{
if(MicroProfileLogGetType(Index) == MP_LOG_EXTENDED)
return 0xffff & (Index >> 32);
else
return 0;
}
inline EMicroProfileTokenExtended MicroProfileLogGetExtendedToken(MicroProfileLogEntry Index)
{
return (EMicroProfileTokenExtended)(0x3fff & (Index >> 48));
}
inline uint32_t MicroProfileLogGetExtendedDataSize(MicroProfileLogEntry Index)
{
return (uint32_t)(0xffff & (Index >> 32));
}
inline uint32_t MicroProfileLogGetExtendedPayload(MicroProfileLogEntry Index)
{
return (uint32_t)(0xffffffff & Index);
}
inline uint64_t MicroProfileLogGetExtendedPayloadNoData(MicroProfileLogEntry Index)
{
return (uint64_t)(MP_LOG_TICK_MASK & Index);
}
inline void* MicroProfileLogGetExtendedPayloadNoDataPtr(MicroProfileLogEntry Index)
{
return (void*)(MP_LOG_PAYLOAD_PTR_MASK & Index);
}
MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick);
MicroProfileLogEntry MicroProfileMakeLogExtended(EMicroProfileTokenExtended eTokenExt, uint32_t nDataSizeQWords, uint32_t nPayload);
MicroProfileLogEntry MicroProfileMakeLogExtendedNoData(EMicroProfileTokenExtended eTokenExt, uint64_t nTick);
inline MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfileToken nToken, int64_t nTick)
{
MicroProfileLogEntry Entry = (nBegin << 62) | ((0x3fff & nToken) << 48) | (MP_LOG_TICK_MASK & nTick);
uint32_t t = MicroProfileLogGetType(Entry);
uint64_t nTimerIndex = MicroProfileLogGetTimerIndex(Entry);
MP_ASSERT(t == nBegin);
MP_ASSERT(nTimerIndex == (nToken & 0x3fff));
return Entry;
}
// extended data, with the option to store 0xfffe * 8 bytes after
inline MicroProfileLogEntry MicroProfileMakeLogExtended(EMicroProfileTokenExtended eTokenExt, uint32_t nDataSizeQWords, uint32_t nPayload)
{
MP_ASSERT(nDataSizeQWords < 0xffff);
MicroProfileLogEntry Entry = (((uint64_t)MP_LOG_EXTENDED) << 62) | ((0x3fff & (uint64_t)eTokenExt) << 48) | ((0xffff & (uint64_t)nDataSizeQWords) << 32) | nPayload;
MP_ASSERT(MicroProfileLogGetExtendedToken(Entry) == eTokenExt);
MP_ASSERT(MicroProfileLogGetExtendedDataSize(Entry) == nDataSizeQWords);
MP_ASSERT(MicroProfileLogGetExtendedPayload(Entry) == nPayload);
return Entry;
}
// extended with no data, but instead 48 bits payload
inline MicroProfileLogEntry MicroProfileMakeLogExtendedNoData(EMicroProfileTokenExtended eTokenExt, uint64_t nPayload)
{
MicroProfileLogEntry Entry = (((uint64_t)MP_LOG_EXTENDED_NO_DATA) << 62) | ((0x3fff & (uint64_t)eTokenExt) << 48) | (MP_LOG_TICK_MASK & nPayload);
MP_ASSERT(MicroProfileLogGetExtendedToken(Entry) == eTokenExt);
MP_ASSERT(MicroProfileLogGetExtendedPayloadNoData(Entry) == nPayload);
return Entry;
}
// extended with no data, but instead 61 bits payload. used to store a pointer.
inline MicroProfileLogEntry MicroProfileMakeLogExtendedNoDataPtr(uint64_t nPayload)
{
uint64_t hest = ETOKEN_CSTR_PTR;
MicroProfileLogEntry Entry = (((uint64_t)MP_LOG_EXTENDED_NO_DATA) << 62) | (hest << 48) | (MP_LOG_PAYLOAD_PTR_MASK & nPayload);
uint64_t v0 = (MP_LOG_PAYLOAD_PTR_MASK & nPayload);
uint64_t v1 = (uint64_t)MicroProfileLogGetExtendedPayloadNoDataPtr(Entry);
MP_ASSERT(v0 == v1);
return Entry;
}
inline uint32_t MicroProfileGetQWordSize(uint32_t nDataSize)
{
uint32_t nSize = (nDataSize + 7) / 8;
MP_ASSERT(nSize < 0xffff); // won't pack...
return nSize;
}
namespace
{
struct MicroProfilePayloadPack
{
union
{
struct
{
#if MICROPROFILE_BIG_ENDIAN /// NOT implemented.
char h;
char message[7];
#else
char message[7];
char h;
#endif
};
uint64_t LogEntry;
};
};
}; // namespace
inline int64_t MicroProfileLogTickDifference(MicroProfileLogEntry Start, MicroProfileLogEntry End)
{
int64_t nStart = Start;
int64_t nEnd = End;
int64_t nDifference = ((nEnd << 16) - (nStart << 16));
return nDifference >> 16;
}
inline int64_t MicroProfileLogTickMax(MicroProfileLogEntry A, MicroProfileLogEntry B)
{
int64_t Diff = MicroProfileLogTickDifference(A, B);
if(Diff < 0)
{
return A;
}
else
{
return B;
}
}
inline int64_t MicroProfileLogTickMin(MicroProfileLogEntry A, MicroProfileLogEntry B)
{
int64_t Diff = MicroProfileLogTickDifference(A, B);
if(Diff < 0)
{
return B;
}
else
{
return A;
}
}
inline int64_t MicroProfileLogTickClamp(uint64_t T, uint64_t min, uint64_t max)
{
return MicroProfileLogTickMin(MicroProfileLogTickMax(T, min), max);
}
inline int64_t MicroProfileLogGetTick(MicroProfileLogEntry e)
{
return MP_LOG_TICK_MASK & e;
}
inline int64_t MicroProfileLogSetTick(MicroProfileLogEntry e, int64_t nTick)
{
return (MP_LOG_TICK_MASK & nTick) | (e & ~MP_LOG_TICK_MASK);
}
inline uint16_t MicroProfileGetTimerIndex(MicroProfileToken t)
{
return (t & 0xffff);
}
inline uint32_t MicroProfileGetGroupMask(MicroProfileToken t)
{
return (uint32_t)((t >> 16) & MICROPROFILE_GROUP_MASK_ALL);
}
inline uint32_t MicroProfileGetGroupMaskIndex(MicroProfileToken t)
{
return (uint32_t)(t >> 48);
}
inline MicroProfileToken MicroProfileMakeToken(uint32_t nGroupMask, uint16_t nGroupIndex, uint16_t nTimer)
{
uint64_t token = ((uint64_t)nGroupIndex << 48llu) | ((uint64_t)nGroupMask << 16llu) | nTimer;
if(0 != (token & MP_LOG_CSTR_MASK))
{
MP_BREAK(); // should never happen
}
return token;
}
template <typename T>
T MicroProfileMin(T a, T b)
{
return a < b ? a : b;
}
template <typename T>
T MicroProfileMax(T a, T b)
{
return a > b ? a : b;
}
template <typename T>
T MicroProfileClamp(T a, T min_, T max_)
{
return MicroProfileMin(max_, MicroProfileMax(min_, a));
}
inline int64_t MicroProfileMsToTick(float fMs, int64_t nTicksPerSecond)
{
return (int64_t)(fMs * 0.001f * nTicksPerSecond);
}
inline float MicroProfileTickToMsMultiplier(int64_t nTicksPerSecond)
{
return 1000.f / (nTicksPerSecond ? nTicksPerSecond : 1);
}
float MicroProfileTickToMsMultiplierCpu()
{
return MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
}
float MicroProfileTickToMsMultiplierGpu()
{
return MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
}
uint16_t MicroProfileGetGroupIndex(MicroProfileToken t)
{
return (uint16_t)MicroProfileGet()->TimerToGroup[MicroProfileGetTimerIndex(t)];
}
uint64_t MicroProfileTick()
{
return MP_TICK();
}
#ifdef _WIN32
#include <windows.h>
#define fopen microprofile_fopen_helper
FILE* microprofile_fopen_helper(const char* filename, const char* mode)
{
FILE* F = 0;
if(0 == fopen_s(&F, filename, mode))
{
return F;
}
return 0;
}
int64_t MicroProfileTicksPerSecondCpu()
{
static int64_t nTicksPerSecond = 0;
if(nTicksPerSecond == 0)
{
QueryPerformanceFrequency((LARGE_INTEGER*)&nTicksPerSecond);
}
return nTicksPerSecond;
}
int64_t MicroProfileGetTick()
{
int64_t ticks;
QueryPerformanceCounter((LARGE_INTEGER*)&ticks);
return ticks;
}
#endif
#if 1
typedef void* (*MicroProfileThreadFunc)(void*);
#ifndef _WIN32
typedef pthread_t MicroProfileThread;
void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
{
pthread_attr_t Attr;
int r = pthread_attr_init(&Attr);
MP_ASSERT(r == 0);
pthread_create(pThread, &Attr, Func, 0);
}
void MicroProfileThreadJoin(MicroProfileThread* pThread)
{
int r = pthread_join(*pThread, 0);
MP_ASSERT(r == 0);
}
#elif defined(_WIN32)
typedef HANDLE MicroProfileThread;
DWORD __stdcall ThreadTrampoline(void* pFunc)
{
MicroProfileThreadFunc F = (MicroProfileThreadFunc)pFunc;
return (uint32_t)(uintptr_t)F(0);
}
void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
{
*pThread = CreateThread(0, 0, ThreadTrampoline, Func, 0, 0);
}
void MicroProfileThreadJoin(MicroProfileThread* pThread)
{
WaitForSingleObject(*pThread, INFINITE);
CloseHandle(*pThread);
}
#else
#include <thread>
typedef std::thread* MicroProfileThread;
inline void MicroProfileThreadStart(MicroProfileThread* pThread, MicroProfileThreadFunc Func)
{
*pThread = MP_ALLOC_OBJECT(std::thread);
new(*pThread) std::thread(Func, nullptr);
}
inline void MicroProfileThreadJoin(MicroProfileThread* pThread)
{
(*pThread)->join();
(*pThread)->~thread();
MP_FREE(*pThread);
*pThread = 0;
}
#endif
#endif
#if MICROPROFILE_WEBSERVER
#ifdef _WIN32
#define MP_INVALID_SOCKET(f) (f == INVALID_SOCKET)
#else
#include <fcntl.h>
#include <netinet/in.h>
#include <sys/socket.h>
#define MP_INVALID_SOCKET(f) (f < 0)
#endif
void MicroProfileWebServerStart();
void MicroProfileWebServerStop();
void MicroProfileWebServerJoin();
bool MicroProfileWebServerUpdate();
void MicroProfileDumpToFile();
#else
#define MicroProfileWebServerStart() \
do \
{ \
} while(0)
#define MicroProfileWebServerStop() \
do \
{ \
} while(0)
#define MicroProfileWebServerJoin() \
do \
{ \
} while(0)
#define MicroProfileWebServerUpdate() false
#define MicroProfileDumpToFile() \
do \
{ \
} while(0)
#endif
#include <algorithm>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#if MICROPROFILE_DEBUG
#ifdef _WIN32
void uprintf(const char* fmt, ...)
{
va_list args;
va_start(args, fmt);
char buffer[1024];
stbsp_vsnprintf(buffer, sizeof(buffer) - 1, fmt, args);
OutputDebugStringA(buffer);
va_end(args);
}
#else
#define uprintf(...) printf(__VA_ARGS__)
#endif
#else
#define uprintf(...) \
do \
{ \
sizeof(__VA_ARGS__); \
} while(0)
#endif
#define S g_MicroProfile
MicroProfile g_MicroProfile;
#ifdef MICROPROFILE_IOS
// iOS doesn't support __thread
static pthread_key_t g_MicroProfileThreadLogKey;
static pthread_once_t g_MicroProfileThreadLogKeyOnce = PTHREAD_ONCE_INIT;
static void MicroProfileCreateThreadLogKey()
{
pthread_key_create(&g_MicroProfileThreadLogKey, NULL);
}
#else
MP_THREAD_LOCAL MicroProfileThreadLog* g_MicroProfileThreadLogThreadLocal = 0;
#endif
static bool g_bUseLock = false; /// This is used because windows does not support using mutexes under dll init(which is where global initialization is handled)
MICROPROFILE_DEFINE(g_MicroProfileFlip, "MicroProfile", "MicroProfileFlip", MP_GREEN4);
MICROPROFILE_DEFINE(g_MicroProfileThreadLoop, "MicroProfile", "ThreadLoop", MP_GREEN4);
MICROPROFILE_DEFINE(g_MicroProfileClear, "MicroProfile", "Clear", MP_GREEN4);
MICROPROFILE_DEFINE(g_MicroProfileAccumulate, "MicroProfile", "Accumulate", MP_GREEN4);
MICROPROFILE_DEFINE(g_MicroProfileContextSwitchSearch, "MicroProfile", "ContextSwitchSearch", MP_GREEN4);
MICROPROFILE_DEFINE(g_MicroProfileGpuSubmit, "MicroProfile", "MicroProfileGpuSubmit", MP_HOTPINK2);
MICROPROFILE_DEFINE(g_MicroProfileSendLoop, "MicroProfile", "MicroProfileSocketSendLoop", MP_GREEN4);
MICROPROFILE_DEFINE_LOCAL_ATOMIC_COUNTER(g_MicroProfileBytesPerFlip, "microprofile/bytesperflip");
// void MicroProfileHashTableInit(MicroProfileHashTable* pTable, uint32_t nInitialSize, MicroProfileHashCompareFunction CompareFunc, MicroProfileHashFunction HashFunc);
void MicroProfileHashTableDestroy(MicroProfileHashTable* pTable);
uint64_t MicroProfileHashTableHash(MicroProfileHashTable* pTable, uint64_t K);
void MicroProfileHashTableGrow(MicroProfileHashTable* pTable);
bool MicroProfileHashTableSet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t Value, uint64_t H, bool bAllowGrow);
bool MicroProfileHashTableGet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t* pValue);
bool MicroProfileHashTableRemove(MicroProfileHashTable* pTable, uint64_t Key);
bool MicroProfileHashTableSetString(MicroProfileHashTable* pTable, const char* pKey, const char* pValue);
bool MicroProfileHashTableGetString(MicroProfileHashTable* pTable, const char* pKey, const char** pValue);
bool MicroProfileHashTableRemoveString(MicroProfileHashTable* pTable, const char* pKey);
bool MicroProfileHashTableSetPtr(MicroProfileHashTable* pTable, const void* pKey, void* pValue);
template <typename T = void>
bool MicroProfileHashTableGetPtr(MicroProfileHashTable* pTable, const void* pKey, T** pValue = nullptr);
bool MicroProfileHashTableRemovePtr(MicroProfileHashTable* pTable, const void* pKey);
enum
{
ESTRINGINTERN_LOWERCASE = 1,
ESTRINGINTERN_FORCEFORWARDSLASH = 0x2,
};
const char* MicroProfileStringIntern(const char* pStr);
const char* MicroProfileStringInternLower(const char* pStr);
const char* MicroProfileStringInternSlash(const char* pStr);
const char* MicroProfileStringIntern(const char* pStr, uint32_t nLen, uint32_t nInternalFlags = 0);
void MicroProfileStringsInit(MicroProfileStrings* pStrings);
void MicroProfileStringsDestroy(MicroProfileStrings* pStrings);
MicroProfileToken MicroProfileCounterTokenInit(int nParent, uint32_t nFlags);
void MicroProfileCounterTokenInitName(MicroProfileToken nToken, const char* pName);
void MicroProfileCounterConfigToken(MicroProfileToken, uint32_t eFormat, int64_t nLimit, uint32_t nFlags);
uint16_t MicroProfileFindGroup(const char* pGroup);
inline std::recursive_mutex& MicroProfileMutex()
{
static std::recursive_mutex Mutex;
return Mutex;
}
std::recursive_mutex& MicroProfileGetMutex()
{
return MicroProfileMutex();
}
inline std::recursive_mutex& MicroProfileTimelineMutex()
{
static std::recursive_mutex Mutex;
return Mutex;
}
MICROPROFILE_API MicroProfile* MicroProfileGet()
{
return &g_MicroProfile;
}
MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName);
MicroProfileThreadLogGpu* MicroProfileThreadLogGpuAllocInternal();
void* MicroProfileSocketSenderThread(void*);
void MicroProfileInit()
{
static bool bOnce = true;
if(!bOnce)
{
return;
}
std::recursive_mutex& mutex = MicroProfileMutex();
bool bUseLock = g_bUseLock;
if(bUseLock)
mutex.lock();
if(bOnce)
{
bOnce = false;
memset(&S, 0, sizeof(S));
MicroProfileStringsInit(&S.Strings);
// these strings are used for counter names inside the string
S.CounterToken_MicroProfile = MicroProfileCounterTokenInit(-1, 0);
S.CounterToken_StringBlock = MicroProfileCounterTokenInit(S.CounterToken_MicroProfile, 0);
S.CounterToken_StringBlock_Count = MicroProfileCounterTokenInit(S.CounterToken_StringBlock, 0);
S.CounterToken_StringBlock_Waste = MicroProfileCounterTokenInit(S.CounterToken_StringBlock, 0);
S.CounterToken_StringBlock_Strings = MicroProfileCounterTokenInit(S.CounterToken_StringBlock, 0);
S.CounterToken_StringBlock_Memory = MicroProfileCounterTokenInit(S.CounterToken_StringBlock, 0);
S.CounterToken_Alloc = MicroProfileCounterTokenInit(S.CounterToken_MicroProfile, 0);
S.CounterToken_Alloc_Memory = MicroProfileCounterTokenInit(S.CounterToken_Alloc, 0);
S.CounterToken_Alloc_Count = MicroProfileCounterTokenInit(S.CounterToken_Alloc, 0);
MicroProfileCounterTokenInitName(S.CounterToken_MicroProfile, "microprofile");
MicroProfileCounterTokenInitName(S.CounterToken_StringBlock, "stringblock");
MicroProfileCounterTokenInitName(S.CounterToken_StringBlock_Count, "count");
MicroProfileCounterTokenInitName(S.CounterToken_StringBlock_Waste, "waste");
MicroProfileCounterTokenInitName(S.CounterToken_StringBlock_Strings, "strings");
MicroProfileCounterTokenInitName(S.CounterToken_StringBlock_Memory, "memory");
MicroProfileCounterTokenInitName(S.CounterToken_Alloc, "alloc");
MicroProfileCounterTokenInitName(S.CounterToken_Alloc_Memory, "memory");
MicroProfileCounterTokenInitName(S.CounterToken_Alloc_Count, "count");
S.nMemUsage += sizeof(S);
for(int i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
{
S.GroupInfo[i].pName[0] = '\0';
}
for(int i = 0; i < MICROPROFILE_MAX_CATEGORIES; ++i)
{
S.CategoryInfo[i].pName[0] = '\0';
memset(S.CategoryInfo[i].nGroupMask, 0, sizeof(S.CategoryInfo[i].nGroupMask));
}
memcpy(&S.CategoryInfo[0].pName[0], "default", sizeof("default"));
S.nCategoryCount = 1;
for(int i = 0; i < MICROPROFILE_MAX_TIMERS; ++i)
{
S.TimerInfo[i].pName[0] = '\0';
}
S.nGroupCount = 0;
S.nFlipStartTick = MP_TICK();
S.nContextSwitchStalledTick = MP_TICK();
S.nAggregateFlipTick = MP_TICK();
memset(S.nActiveGroups, 0, sizeof(S.nActiveGroups));
S.nFrozen = 0;
S.nWasFrozen = 0;
memset(S.nForceGroups, 0, sizeof(S.nForceGroups));
memset(S.nActiveGroupsWanted, 0, sizeof(S.nActiveGroupsWanted));
S.nStartEnabled = 0;
S.nAllThreadsWanted = 1;
S.nAggregateFlip = 0;
S.nTotalTimers = 0;
for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
{
S.Graph[i].nToken = MICROPROFILE_INVALID_TOKEN;
}
S.fReferenceTime = 33.33f;
S.fRcpReferenceTime = 1.f / S.fReferenceTime;
S.nFreeListHead = -1;
int64_t nTick = MP_TICK();
for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
{
S.Frames[i].nFrameStartCpu = nTick;
S.Frames[i].nFrameStartGpu = MICROPROFILE_INVALID_TICK;
}
S.nWebServerPort = MICROPROFILE_WEBSERVER_PORT; // Use defined value as default port
S.nWebServerDataSent = (uint64_t)-1;
S.WebSocketTimers = -1;
S.WebSocketCounters = -1;
S.WebSocketGroups = -1;
S.nSocketFail = 0;
S.DumpFrameCount = MICROPROFILE_WEBSERVER_DEFAULT_FRAMES;
#if MICROPROFILE_COUNTER_HISTORY
S.nCounterHistoryPut = 0;
for(uint32_t i = 0; i < MICROPROFILE_MAX_COUNTERS; ++i)
{
S.nCounterMin[i] = 0x7fffffffffffffff;
S.nCounterMax[i] = 0x8000000000000000;
}
#endif
S.GpuQueue = MICROPROFILE_GPU_INIT_QUEUE("GPU");
S.pGpuGlobal = MicroProfileThreadLogGpuAllocInternal();
MicroProfileGpuBegin(0, S.pGpuGlobal);
S.pJsonSettings = 0;
S.pJsonSettingsName = nullptr;
S.nJsonSettingsPending = 0;
S.nJsonSettingsBufferSize = 0;
S.nWSWasConnected = 0;
for(uint32_t i = 0; i < MICROPROFILE_TIMELINE_MAX_TOKENS; ++i)
{
S.TimelineTokenFrameEnter[i] = MICROPROFILE_INVALID_FRAME;
S.TimelineTokenFrameLeave[i] = MICROPROFILE_INVALID_FRAME;
S.TimelineTokenStaticString[i] = nullptr;
S.TimelineToken[i] = 0;
}
memset(&S.AccumMinTimers[0], 0xFF, sizeof(S.AccumMinTimers));
S.CountersDouble = (std::atomic<double>*)&S.Counters;
#if MICROPROFILE_COUNTER_HISTORY
S.dCounterHistory = (double*)S.nCounterHistory;
S.dCounterMax = (double*)S.nCounterMax;
S.dCounterMin = (double*)S.nCounterMin;
#endif
}
MicroProfileUpdateSettingsPath();
#if MICROPROFILE_FRAME_EXTRA_DATA
S.FrameExtraCounterData = (MicroProfileFrameExtraCounterData*)1;
#endif
MicroProfileCounterConfigToken(S.CounterToken_Alloc_Memory, MICROPROFILE_COUNTER_FORMAT_BYTES, 0, MICROPROFILE_COUNTER_FLAG_DETAILED);
MICROPROFILE_COUNTER_CONFIG("MicroProfile/ThreadLog/Memory", MICROPROFILE_COUNTER_FORMAT_BYTES, 0, MICROPROFILE_COUNTER_FLAG_DETAILED);
if(bUseLock)
{
mutex.unlock();
}
}
void MicroProfileUpdateSettingsPath()
{
if(S.pSettings)
{
MicroProfileFreeInternal((void*)S.pSettings);
S.pSettings = nullptr;
}
if(S.pSettingsReadOnly)
{
MicroProfileFreeInternal((void*)S.pSettingsReadOnly);
S.pSettingsReadOnly = nullptr;
}
if(S.pSettingsTemp)
{
MicroProfileFreeInternal((void*)S.pSettingsTemp);
S.pSettingsTemp = nullptr;
}
auto DupeString = [](const char* BasePath, const char* File) -> const char*
{
size_t BaseLen = strlen(BasePath);
bool TrailingSlash = BaseLen > 1 && (BasePath[BaseLen - 1] == '\\' || BasePath[BaseLen - 1] == '/');
size_t Len = BaseLen + strlen(File) + 2;
char* Data = (char*)MicroProfileAllocInternal(Len + 1, 1);
#ifdef _WIN32
char Slash = '\\';
#else
char Slash = '/';
#endif
if(TrailingSlash)
snprintf(Data, Len, "%s%s", BasePath, File);
else
snprintf(Data, Len, "%s%c%s", BasePath, Slash, File);
return Data;
};
const char* pBaseSettingsPath = MICROPROFILE_GET_SETTINGS_FILE_PATH;
S.pSettings = DupeString(pBaseSettingsPath, MICROPROFILE_SETTINGS_FILE);
S.pSettingsReadOnly = DupeString(pBaseSettingsPath, MICROPROFILE_SETTINGS_FILE_BUILTIN);
S.pSettingsTemp = DupeString(pBaseSettingsPath, MICROPROFILE_SETTINGS_FILE MICROPROFILE_SETTINGS_FILE_TEMP);
}
void MicroProfileJoinContextSwitchTrace();
void MicroProfileShutdown()
{
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
S.nMicroProfileShutdown = 1;
MicroProfileStopContextSwitchTrace();
}
MicroProfileWebServerJoin();
MicroProfileJoinContextSwitchTrace();
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
if(S.pJsonSettings)
{
MP_FREE(S.pJsonSettings);
S.pJsonSettings = 0;
S.pJsonSettingsName = 0;
S.nJsonSettingsBufferSize = 0;
}
if(S.pGPU)
{
MicroProfileGpuShutdownPlatform();
}
MicroProfileHashTableDestroy(&S.Strings.HashTable);
MicroProfileStringsDestroy(&S.Strings);
MICROPROFILE_FREE_NON_ALIGNED(S.WSBuf.pBufferAllocation);
MicroProfileFreeGpuQueue(S.GpuQueue);
MicroProfileThreadLogGpuFree(S.pGpuGlobal);
for(uint32_t i = 0; i < S.nNumLogs; ++i)
{
#if MICROPROFILE_ASSERT_LOG_FREED
MP_ASSERT(S.Pool[i]->nActive != 1);
#endif
MP_FREE(S.Pool[i]);
}
for(uint32_t i = 0; i < S.nNumLogsGpu; ++i)
{
#if MICROPROFILE_ASSERT_LOG_FREED
MP_ASSERT(!S.PoolGpu[i]->nAllocated);
#endif
MP_FREE(S.PoolGpu[i]);
}
MicroProfileFreeInternal((void*)S.pSettings);
S.pSettings = nullptr;
MicroProfileFreeInternal((void*)S.pSettingsReadOnly);
S.pSettingsReadOnly = nullptr;
MicroProfileFreeInternal((void*)S.pSettingsTemp);
S.pSettingsTemp = nullptr;
}
}
static void* MicroProfileAutoFlipThread(void*)
{
MicroProfileOnThreadCreate("AutoFlipThread");
while(0 == S.nAutoFlipStop.load())
{
MICROPROFILE_SCOPEI("MICROPROFILE", "AutoFlipThread", 0);
MicroProfileSleep(S.nAutoFlipDelay);
MicroProfileFlip(0);
}
MicroProfileOnThreadExit();
return 0;
}
void MicroProfileStartAutoFlip(uint32_t nMsDelay)
{
S.nAutoFlipDelay = nMsDelay;
S.nAutoFlipStop.store(0);
MicroProfileThreadStart(&S.AutoFlipThread, MicroProfileAutoFlipThread);
}
void MicroProfileStopAutoFlip()
{
S.nAutoFlipStop.store(1);
MicroProfileThreadJoin(&S.AutoFlipThread);
}
void MicroProfileEnableFrameExtraCounterData()
{
// should not be called at the same time as MicroProfileFlip.
if(!S.FrameExtraCounterData)
{
S.FrameExtraCounterData = (MicroProfileFrameExtraCounterData*)1;
}
}
void MicroProfileCsvConfigEnd()
{
MP_ASSERT(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG);
S.CsvConfig.State = MicroProfileCsvConfig::ACTIVE;
}
void MicroProfileCsvConfigBegin(uint32_t MaxTimers, uint32_t MaxGroups, uint32_t MaxCounters, uint32_t Flags)
{
MP_ASSERT(S.CsvConfig.State == MicroProfileCsvConfig::INACTIVE); // right now, only support being configured once.
uint32_t TotalElements = MaxTimers + MaxGroups + MaxCounters;
uint32_t BaseSize = (sizeof(MicroProfileCsvConfig) + 7) & 7;
uint32_t TimerIndexSize = sizeof(uint16_t) * MaxTimers;
uint32_t GroupIndexSize = sizeof(uint16_t) * MaxGroups;
uint32_t CounterIndexSize = sizeof(uint16_t) * MaxCounters;
uint32_t FrameBlockSize = TotalElements * sizeof(uint64_t);
uint32_t FrameDataSize = FrameBlockSize * MICROPROFILE_MAX_FRAME_HISTORY;
S.CsvConfig.NumTimers = 0;
S.CsvConfig.NumGroups = 0;
S.CsvConfig.NumCounters = 0;
S.CsvConfig.MaxTimers = MaxTimers;
S.CsvConfig.MaxGroups = MaxGroups;
S.CsvConfig.MaxCounters = MaxCounters;
S.CsvConfig.TotalElements = TotalElements;
S.CsvConfig.TimerIndices = (uint16_t*)MicroProfileAllocInternal(TimerIndexSize, alignof(uint16_t));
S.CsvConfig.pTimerNames = (const char**)MicroProfileAllocInternal(MaxTimers * sizeof(const char*), alignof(const char*));
memset(S.CsvConfig.pTimerNames, 0, MaxTimers * sizeof(const char*));
for(uint32_t i = 0; i < MaxTimers; ++i)
S.CsvConfig.TimerIndices[i] = UINT16_MAX;
S.CsvConfig.pGroupNames = (const char**)MicroProfileAllocInternal(MaxGroups * sizeof(const char*), alignof(const char*));
memset(S.CsvConfig.pGroupNames, 0, MaxGroups * sizeof(const char*));
S.CsvConfig.GroupIndices = (uint16_t*)MicroProfileAllocInternal(GroupIndexSize, alignof(uint16_t));
for(uint32_t i = 0; i < MaxGroups; ++i)
S.CsvConfig.GroupIndices[i] = UINT16_MAX;
S.CsvConfig.pCounterNames = (const char**)MicroProfileAllocInternal(MaxCounters * sizeof(const char*), alignof(const char*));
memset(S.CsvConfig.pCounterNames, 0, MaxCounters * sizeof(const char*));
S.CsvConfig.CounterIndices = (uint16_t*)MicroProfileAllocInternal(CounterIndexSize, alignof(uint16_t));
for(uint32_t i = 0; i < MaxCounters; ++i)
S.CsvConfig.CounterIndices[i] = UINT16_MAX;
S.CsvConfig.FrameData = (uint64_t*)MicroProfileAllocInternal(FrameDataSize, alignof(uint64_t));
memset(S.CsvConfig.FrameData, 0, FrameDataSize);
S.CsvConfig.State = MicroProfileCsvConfig::CONFIG;
S.CsvConfig.Flags = Flags;
}
void MicroProfileCsvConfigAddTimer(const char* Group, const char* Timer, const char* Name, MicroProfileTokenType Type)
{
MP_ASSERT(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG);
if(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG && S.CsvConfig.NumTimers < S.CsvConfig.MaxTimers)
{
MicroProfileToken ret = MicroProfileGetToken(Group, Timer, MP_AUTO, Type, MICROPROFILE_TIMER_FLAG_PLACEHOLDER);
if(ret != MICROPROFILE_INVALID_TOKEN)
{
MP_ASSERT(S.CsvConfig.NumTimers < S.CsvConfig.MaxTimers);
uint16_t TimerIndex = MicroProfileGetTimerIndex(ret);
for(uint32_t i = 0; i < S.CsvConfig.NumTimers; ++i)
{
if(S.CsvConfig.TimerIndices[i] == TimerIndex)
return;
}
S.CsvConfig.pTimerNames[S.CsvConfig.NumTimers] = Name;
S.CsvConfig.TimerIndices[S.CsvConfig.NumTimers++] = TimerIndex;
}
}
}
void MicroProfileCsvConfigAddGroup(const char* Group, const char* Name)
{
MP_ASSERT(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG);
if(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG && S.CsvConfig.NumGroups < S.CsvConfig.MaxGroups)
{
uint16_t Index = MicroProfileFindGroup(Group);
MP_ASSERT(UINT16_MAX != Index);
if(UINT16_MAX != Index)
{
MP_ASSERT(S.CsvConfig.NumGroups < S.CsvConfig.MaxGroups);
for(uint32_t i = 0; i < S.CsvConfig.NumGroups; ++i)
{
if(S.CsvConfig.GroupIndices[i] == Index)
return;
}
S.CsvConfig.pGroupNames[S.CsvConfig.NumGroups] = Name;
S.CsvConfig.GroupIndices[S.CsvConfig.NumGroups++] = Index;
}
}
}
void MicroProfileCsvConfigAddCounter(const char* CounterName, const char* Name)
{
MP_ASSERT(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG);
if(S.CsvConfig.State == MicroProfileCsvConfig::CONFIG && S.CsvConfig.NumCounters < S.CsvConfig.MaxCounters)
{
MicroProfileToken Token = MicroProfileGetCounterToken(CounterName, 0);
if(MICROPROFILE_INVALID_TOKEN != Token)
{
MP_ASSERT(Token < UINT16_MAX);
MP_ASSERT(S.CsvConfig.NumCounters < S.CsvConfig.MaxCounters);
for(uint32_t i = 0; i < S.CsvConfig.NumCounters; ++i)
{
if(S.CsvConfig.CounterIndices[i] == (uint16_t)Token)
return;
}
S.CsvConfig.pCounterNames[S.CsvConfig.NumCounters] = Name;
S.CsvConfig.CounterIndices[S.CsvConfig.NumCounters++] = (uint16_t)Token;
}
}
}
#ifdef MICROPROFILE_IOS
inline MicroProfileThreadLog* MicroProfileGetThreadLog()
{
pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey);
return (MicroProfileThreadLog*)pthread_getspecific(g_MicroProfileThreadLogKey);
}
inline void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
{
pthread_once(&g_MicroProfileThreadLogKeyOnce, MicroProfileCreateThreadLogKey);
pthread_setspecific(g_MicroProfileThreadLogKey, pLog);
}
#else
MicroProfileThreadLog* MicroProfileGetThreadLog()
{
return g_MicroProfileThreadLogThreadLocal;
}
void MicroProfileSetThreadLog(MicroProfileThreadLog* pLog)
{
g_MicroProfileThreadLogThreadLocal = pLog;
}
#endif
MicroProfileThreadLog* MicroProfileGetThreadLog2()
{
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
if(!pLog)
{
MicroProfileInitThreadLog();
pLog = MicroProfileGetThreadLog();
}
return pLog;
}
struct MicroProfileScopeLock
{
bool bUseLock;
int nUnlock;
std::recursive_mutex& m;
MicroProfileScopeLock(std::recursive_mutex& m)
: bUseLock(g_bUseLock)
, nUnlock(0)
, m(m)
{
if(bUseLock)
m.lock();
}
~MicroProfileScopeLock()
{
MP_ASSERT(nUnlock == 0);
if(bUseLock)
m.unlock();
}
void Unlock()
{
MP_ASSERT(bUseLock);
m.unlock();
nUnlock++;
}
void Lock()
{
m.lock();
nUnlock--;
}
};
void MicroProfileLogReset(MicroProfileThreadLog* pLog);
void MicroProfileLogClearInternal(MicroProfileThreadLog* pLog);
MicroProfileThreadLog* MicroProfileCreateThreadLog(const char* pName)
{
MicroProfileScopeLock L(MicroProfileMutex());
if(S.nNumLogs == MICROPROFILE_MAX_THREADS && S.nFreeListHead == -1)
{
uprintf("recycling thread logs\n");
// reuse the oldest.
MicroProfileThreadLog* pOldest = 0;
uint32_t nIdleFrames = 0;
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
{
MicroProfileThreadLog* pLog = S.Pool[i];
uprintf("tlactive %p, %d. idle:%d\n", pLog, pLog->nActive, pLog->nIdleFrames);
if(pLog->nActive == 2)
{
if(pLog->nIdleFrames >= nIdleFrames)
{
nIdleFrames = pLog->nIdleFrames;
pOldest = pLog;
}
}
}
MP_ASSERT(pOldest);
MicroProfileLogReset(pOldest);
}
MicroProfileThreadLog* pLog = 0;
if(S.nFreeListHead != -1)
{
pLog = S.Pool[S.nFreeListHead];
MP_ASSERT(pLog->nPut.load() == 0);
MP_ASSERT(pLog->nGet.load() == 0);
S.nFreeListHead = S.Pool[S.nFreeListHead]->nFreeListNext;
}
else
{
MICROPROFILE_COUNTER_ADD("MicroProfile/ThreadLog/Allocated", 1);
MICROPROFILE_COUNTER_ADD("MicroProfile/ThreadLog/Memory", sizeof(MicroProfileThreadLog));
pLog = MP_ALLOC_OBJECT(MicroProfileThreadLog);
MicroProfileLogClearInternal(pLog);
S.nMemUsage += sizeof(MicroProfileThreadLog);
pLog->nLogIndex = S.nNumLogs;
MP_ASSERT(S.nNumLogs < MICROPROFILE_MAX_THREADS);
S.Pool[S.nNumLogs++] = pLog;
}
int len = 0;
if(pName)
{
len = (int)strlen(pName);
int maxlen = sizeof(pLog->ThreadName) - 1;
len = len < maxlen ? len : maxlen;
memcpy(&pLog->ThreadName[0], pName, len);
}
else
{
len = snprintf(&pLog->ThreadName[0], sizeof(pLog->ThreadName) - 1, "TID:[%" PRId64 "]", (int64_t)MP_GETCURRENTTHREADID());
}
pLog->ThreadName[len] = '\0';
pLog->nThreadId = MP_GETCURRENTTHREADID();
pLog->nFreeListNext = -1;
pLog->nActive = 1;
return pLog;
}
void MicroProfileOnThreadCreate(const char* pThreadName)
{
char Buffer[64];
g_bUseLock = true;
MicroProfileInit();
MP_ASSERT(MicroProfileGetThreadLog() == 0);
MicroProfileThreadLog* pLog = MicroProfileCreateThreadLog(pThreadName ? pThreadName : MicroProfileGetThreadName(Buffer));
(void)Buffer;
MP_ASSERT(pLog);
MicroProfileSetThreadLog(pLog);
}
void MicroProfileThreadLogGpuReset(MicroProfileThreadLogGpu* pLog)
{
MP_ASSERT(pLog->nAllocated);
pLog->pContext = (void*)-1;
pLog->nStart = (uint32_t)-1;
pLog->nPut = 0;
pLog->nStackScope = 0;
}
MicroProfileThreadLogGpu* MicroProfileThreadLogGpuAllocInternal()
{
MicroProfileThreadLogGpu* pLog = 0;
for(uint32_t i = 0; i < S.nNumLogsGpu; ++i)
{
MicroProfileThreadLogGpu* pNextLog = S.PoolGpu[i];
if(pNextLog && !pNextLog->nAllocated)
{
pLog = pNextLog;
break;
}
}
if(!pLog)
{
pLog = MP_ALLOC_OBJECT(MicroProfileThreadLogGpu);
int nLogIndex = S.nNumLogsGpu++;
MP_ASSERT(nLogIndex < MICROPROFILE_MAX_THREADS);
pLog->nId = nLogIndex;
S.PoolGpu[nLogIndex] = pLog;
}
pLog->nAllocated = 1;
MicroProfileThreadLogGpuReset(pLog);
return pLog;
}
MicroProfileThreadLogGpu* MicroProfileThreadLogGpuAlloc()
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
return MicroProfileThreadLogGpuAllocInternal();
}
void MicroProfileThreadLogGpuFree(MicroProfileThreadLogGpu* pLog)
{
MP_ASSERT(pLog->nAllocated);
pLog->nAllocated = 0;
}
int MicroProfileGetGpuQueue(const char* pQueueName)
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; i++)
{
MicroProfileThreadLog* pLog = S.Pool[i];
if(pLog && pLog->nGpu && pLog->nActive && 0 == MP_STRCASECMP(pQueueName, pLog->ThreadName))
{
return i;
}
}
MP_ASSERT(0); // call MicroProfileInitGpuQueue
return 0;
}
MicroProfileThreadLog* MicroProfileGetGpuQueueLog(const char* pQueueName)
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; i++)
{
MicroProfileThreadLog* pLog = S.Pool[i];
if(pLog && pLog->nGpu && pLog->nActive && 0 == MP_STRCASECMP(pQueueName, pLog->ThreadName))
{
return pLog;
}
}
MP_ASSERT(0); // call MicroProfileInitGpuQueue
return 0;
}
int MicroProfileInitGpuQueue(const char* pQueueName)
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
{
MicroProfileThreadLog* pLog = S.Pool[i];
if(pLog && 0 == MP_STRCASECMP(pQueueName, pLog->ThreadName))
{
MP_ASSERT(0); // call MicroProfileInitGpuQueue only once per CommandQueue. name must not clash with threadname
}
}
MicroProfileThreadLog* pLog = MicroProfileCreateThreadLog(pQueueName);
pLog->nGpu = 1;
pLog->nThreadId = 0;
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
{
if(S.Pool[i] == pLog)
{
return i;
}
}
MP_BREAK();
return 0;
}
void MicroProfileFreeGpuQueue(int nQueue)
{
MicroProfileThreadLog* pLog = S.Pool[nQueue];
if(pLog)
{
MP_ASSERT(pLog->nActive == 1);
pLog->nActive = 2;
}
}
MicroProfileThreadLogGpu* MicroProfileGetGlobalGpuThreadLog()
{
return S.pGpuGlobal;
}
MICROPROFILE_API int MicroProfileGetGlobalGpuQueue()
{
return S.GpuQueue;
}
void MicroProfileLogClearInternal(MicroProfileThreadLog* pLog)
{
// can't clear atomics..
void* pStart = (void*)&pLog->Log[0];
void* pEnd = (void*)(pLog + 1);
memset(pStart, 0, (uintptr_t)pEnd - (uintptr_t)pStart);
pLog->nPut.store(0);
pLog->nGet.store(0);
}
void MicroProfileLogReset(MicroProfileThreadLog* pLog)
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
int32_t nLogIndex = -1;
for(int i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
{
if(pLog == S.Pool[i])
{
nLogIndex = i;
break;
}
}
MP_ASSERT(nLogIndex < MICROPROFILE_MAX_THREADS && nLogIndex > 0);
MicroProfileLogClearInternal(pLog);
pLog->nFreeListNext = S.nFreeListHead;
S.nFreeListHead = nLogIndex;
for(int i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
{
S.Frames[i].nLogStart[nLogIndex] = 0;
}
}
void MicroProfileOnThreadExit()
{
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
if(pLog)
{
MP_ASSERT(pLog->nActive == 1);
pLog->nActive = 2;
}
}
void MicroProfileInitThreadLog()
{
MicroProfileOnThreadCreate(nullptr);
}
MicroProfileToken MicroProfileFindTokenInternal(const char* pGroup, const char* pName)
{
MicroProfileInit();
MicroProfileScopeLock L(MicroProfileMutex());
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
{
if(!MP_STRCASECMP(pName, S.TimerInfo[i].pName) && !MP_STRCASECMP(pGroup, S.GroupInfo[S.TimerToGroup[i]].pName))
{
return S.TimerInfo[i].nToken;
}
}
return MICROPROFILE_INVALID_TOKEN;
}
MicroProfileToken MicroProfileFindToken(const char* pGroup, const char* pName)
{
return MicroProfileGetToken(pGroup, pName, MP_AUTO, MicroProfileTokenTypeCpu, MICROPROFILE_TIMER_FLAG_PLACEHOLDER);
}
uint16_t MicroProfileFindGroup(const char* pGroup)
{
for(uint32_t i = 0; i < S.nGroupCount; ++i)
{
if(!MP_STRCASECMP(pGroup, S.GroupInfo[i].pName))
{
return i;
}
}
return UINT16_MAX;
}
uint16_t MicroProfileGetGroup(const char* pGroup, MicroProfileTokenType Type)
{
for(uint32_t i = 0; i < S.nGroupCount; ++i)
{
if(!MP_STRCASECMP(pGroup, S.GroupInfo[i].pName))
{
return i;
}
}
uint16_t nGroupIndex = 0xffff;
uint32_t nLen = (uint32_t)strlen(pGroup);
if(nLen > MICROPROFILE_NAME_MAX_LEN - 1)
nLen = MICROPROFILE_NAME_MAX_LEN - 1;
memcpy(&S.GroupInfo[S.nGroupCount].pName[0], pGroup, nLen);
S.GroupInfo[S.nGroupCount].pName[nLen] = '\0';
S.GroupInfo[S.nGroupCount].nNameLen = nLen;
S.GroupInfo[S.nGroupCount].nNumTimers = 0;
S.GroupInfo[S.nGroupCount].nGroupIndex = S.nGroupCount;
S.GroupInfo[S.nGroupCount].Type = Type;
S.GroupInfo[S.nGroupCount].nMaxTimerNameLen = 0;
S.GroupInfo[S.nGroupCount].nColor = 0x42;
S.GroupInfo[S.nGroupCount].nCategory = 0;
S.GroupInfo[S.nGroupCount].nWSNext = -2;
uint32_t nIndex = S.nGroupCount / 32;
uint32_t nBit = S.nGroupCount % 32;
{
S.CategoryInfo[0].nGroupMask[nIndex] |= (1 << nBit);
}
if(S.nStartEnabled)
{
S.nActiveGroupsWanted[nIndex] |= (1ll << nBit);
S.nActiveGroups[nIndex] |= (1ll << nBit);
S.AnyActive = true;
}
nGroupIndex = S.nGroupCount++;
S.nGroupMask[nIndex] |= (1 << nBit);
MP_ASSERT(S.nGroupCount < MICROPROFILE_MAX_GROUPS);
return nGroupIndex;
}
void MicroProfileRegisterGroup(const char* pGroup, const char* pCategory, uint32_t nColor)
{
MicroProfileScopeLock L(MicroProfileMutex());
int nCategoryIndex = -1;
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
{
if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName))
{
nCategoryIndex = (int)i;
break;
}
}
if(-1 == nCategoryIndex && S.nCategoryCount < MICROPROFILE_MAX_CATEGORIES)
{
MP_ASSERT(S.CategoryInfo[S.nCategoryCount].pName[0] == '\0');
nCategoryIndex = (int)S.nCategoryCount++;
uint32_t nLen = (uint32_t)strlen(pCategory);
if(nLen > MICROPROFILE_NAME_MAX_LEN - 1)
nLen = MICROPROFILE_NAME_MAX_LEN - 1;
memcpy(&S.CategoryInfo[nCategoryIndex].pName[0], pCategory, nLen);
S.CategoryInfo[nCategoryIndex].pName[nLen] = '\0';
}
uint16_t nGroup = MicroProfileGetGroup(pGroup, 0 != MP_STRCASECMP(pGroup, "gpu") ? MicroProfileTokenTypeCpu : MicroProfileTokenTypeGpu);
S.GroupInfo[nGroup].nColor = nColor;
if(nCategoryIndex >= 0)
{
uint32_t nIndex = nGroup / 32;
uint32_t nBit = nGroup % 32;
nBit = (1 << nBit);
uint32_t nOldCategory = S.GroupInfo[nGroup].nCategory;
S.CategoryInfo[nOldCategory].nGroupMask[nIndex] &= ~nBit;
S.CategoryInfo[nCategoryIndex].nGroupMask[nIndex] |= nBit;
S.GroupInfo[nGroup].nCategory = nCategoryIndex;
}
}
MicroProfileToken MicroProfileGetToken(const char* pGroup, const char* pName, uint32_t nColor, MicroProfileTokenType Type, uint32_t Flags)
{
MicroProfileInit();
MicroProfileScopeLock L(MicroProfileMutex());
MicroProfileToken ret = MicroProfileFindTokenInternal(pGroup, pName);
if(ret != MICROPROFILE_INVALID_TOKEN)
{
int idx = MicroProfileGetTimerIndex(ret);
if(S.TimerInfo[idx].Flags & MICROPROFILE_TIMER_FLAG_PLACEHOLDER)
{
S.TimerInfo[idx].nColor = nColor & 0xffffff;
S.TimerInfo[idx].Flags = Flags;
S.TimerInfo[idx].Type = Type;
}
MP_ASSERT(S.TimerInfo[idx].Flags == Flags || (Flags & MICROPROFILE_TIMER_FLAG_PLACEHOLDER));
return ret;
}
uint16_t nGroupIndex = MicroProfileGetGroup(pGroup, Type);
uint16_t nTimerIndex = (uint16_t)(S.nTotalTimers++);
MP_ASSERT(nTimerIndex < MICROPROFILE_MAX_TIMERS);
uint32_t nBitIndex = nGroupIndex / 32;
uint32_t nBit = nGroupIndex % 32;
uint32_t nGroupMask = 1ll << nBit;
MicroProfileToken nToken = MicroProfileMakeToken(nGroupMask, (uint16_t)nBitIndex, nTimerIndex);
S.GroupInfo[nGroupIndex].nNumTimers++;
S.GroupInfo[nGroupIndex].nMaxTimerNameLen = MicroProfileMax(S.GroupInfo[nGroupIndex].nMaxTimerNameLen, (uint32_t)strlen(pName));
MP_ASSERT(S.GroupInfo[nGroupIndex].Type == Type); // dont mix cpu & gpu timers in the same group
S.nMaxGroupSize = MicroProfileMax(S.nMaxGroupSize, S.GroupInfo[nGroupIndex].nNumTimers);
S.TimerInfo[nTimerIndex].nToken = nToken;
uint32_t nLen = (uint32_t)strlen(pName);
if(nLen > MICROPROFILE_NAME_MAX_LEN - 1)
nLen = MICROPROFILE_NAME_MAX_LEN - 1;
memcpy(&S.TimerInfo[nTimerIndex].pName, pName, nLen);
snprintf(&S.TimerInfo[nTimerIndex].pNameExt[0], sizeof(S.TimerInfo[nTimerIndex].pNameExt) - 1, "%s %s", S.GroupInfo[nGroupIndex].pName, pName);
S.TimerInfo[nTimerIndex].pName[nLen] = '\0';
S.TimerInfo[nTimerIndex].nNameLen = nLen;
S.TimerInfo[nTimerIndex].nColor = nColor & 0xffffff;
S.TimerInfo[nTimerIndex].nGroupIndex = nGroupIndex;
S.TimerInfo[nTimerIndex].nTimerIndex = nTimerIndex;
S.TimerInfo[nTimerIndex].nWSNext = -2;
S.TimerInfo[nTimerIndex].Type = Type;
S.TimerInfo[nTimerIndex].Flags = Flags;
// printf("*** TOKEN %08d %s\\%s .. flags %08x\n", nTimerIndex, pGroup, pName, Flags);
S.TimerToGroup[nTimerIndex] = nGroupIndex;
return nToken;
}
void MicroProfileGetTokenC(MicroProfileToken* pToken, const char* pGroup, const char* pName, uint32_t nColor, MicroProfileTokenType Type, uint32_t flags)
{
if(*pToken == MICROPROFILE_INVALID_TOKEN)
{
MicroProfileInit();
MicroProfileScopeLock L(MicroProfileMutex());
if(*pToken == MICROPROFILE_INVALID_TOKEN)
{
*pToken = MicroProfileGetToken(pGroup, pName, nColor, Type, flags);
}
}
}
const char* MicroProfileNextName(const char* pName, char* pNameOut, uint32_t* nSubNameLen)
{
int nMaxLen = MICROPROFILE_NAME_MAX_LEN - 1;
const char* pRet = 0;
bool bDone = false;
uint32_t nChars = 0;
for(int i = 0; i < nMaxLen && !bDone; ++i)
{
char c = *pName++;
switch(c)
{
case 0:
bDone = true;
break;
case '\\':
case '/':
if(nChars)
{
bDone = true;
pRet = pName;
}
break;
default:
nChars++;
*pNameOut++ = c;
}
}
*nSubNameLen = nChars;
*pNameOut = '\0';
return pRet;
}
const char* MicroProfileCounterFullName(int nCounter)
{
static char Buffer[1024];
int nNodes[32];
int nIndex = 0;
do
{
nNodes[nIndex++] = nCounter;
nCounter = S.CounterInfo[nCounter].nParent;
} while(nCounter >= 0);
int nOffset = 0;
while(nIndex >= 0 && nOffset < (int)sizeof(Buffer) - 2)
{
uint32_t nLen = S.CounterInfo[nNodes[nIndex]].nNameLen + nOffset; // < sizeof(Buffer)-1
nLen = MicroProfileMin((uint32_t)(sizeof(Buffer) - 2 - nOffset), nLen);
memcpy(&Buffer[nOffset], S.CounterInfo[nNodes[nIndex]].pName, nLen);
nOffset += S.CounterInfo[nNodes[nIndex]].nNameLen + 1;
if(nIndex)
{
Buffer[nOffset++] = '/';
}
nIndex--;
}
return &Buffer[0];
}
MicroProfileToken MicroProfileCounterTokenInit(int nParent, uint32_t nFlags)
{
MP_ASSERT(0 == (nFlags & (~MICROPROFILE_COUNTER_FLAG_TYPE_MASK)));
MicroProfileToken nResult = S.nNumCounters++;
S.CounterInfo[nResult].nParent = nParent;
S.CounterInfo[nResult].nSibling = -1;
S.CounterInfo[nResult].nFirstChild = -1;
S.CounterInfo[nResult].nFlags = nFlags;
S.CounterInfo[nResult].eFormat = MICROPROFILE_COUNTER_FORMAT_DEFAULT;
S.CounterInfo[nResult].nLimit = 0;
S.CounterInfo[nResult].ExternalAtomic = 0;
S.CounterSource[nResult].pSource = 0;
S.CounterSource[nResult].nSourceSize = 0;
S.CounterInfo[nResult].nNameLen = 0;
S.CounterInfo[nResult].pName = nullptr;
S.CounterInfo[nResult].nWSNext = -2;
if(nParent >= 0)
{
MP_ASSERT(nParent < (int)S.nNumCounters);
S.CounterInfo[nResult].nSibling = S.CounterInfo[nParent].nFirstChild;
S.CounterInfo[nResult].nLevel = S.CounterInfo[nParent].nLevel + 1;
S.CounterInfo[nParent].nFirstChild = nResult;
}
else
{
S.CounterInfo[nResult].nLevel = 0;
}
return nResult;
}
void MicroProfileCounterTokenInitName(MicroProfileToken nToken, const char* pName)
{
MP_ASSERT(0 == S.CounterInfo[nToken].pName);
S.CounterInfo[nToken].nNameLen = (uint16_t)strlen(pName);
S.CounterInfo[nToken].pName = MicroProfileStringInternLower(pName);
}
MicroProfileToken MicroProfileGetCounterTokenByParent(int nParent, const char* pName, uint32_t nFlags)
{
for(uint32_t i = 0; i < S.nNumCounters; ++i)
{
if(nParent == S.CounterInfo[i].nParent && S.CounterInfo[i].pName == pName)
{
return i;
}
}
if(0 != (MICROPROFILE_COUNTER_FLAG_TOKEN_DONT_CREATE & nFlags))
return MICROPROFILE_INVALID_TOKEN;
MicroProfileToken nResult = MicroProfileCounterTokenInit(nParent, nFlags);
MicroProfileCounterTokenInitName(nResult, pName);
return nResult;
}
// by passing in last token/parent, and a non-changing static string,
// we can quickly return in case the parent is the same as before.
// Note that this doesn't support paths, but instead must be called once per level in the tree
// String must be preinterned.
MicroProfileToken MicroProfileCounterTokenTree(MicroProfileToken* LastToken, MicroProfileToken CurrentParent, const char* pString)
{
MicroProfileToken Token = *LastToken;
if(Token != MICROPROFILE_INVALID_TOKEN)
{
if(S.CounterInfo[Token].pName == pString && S.CounterInfo[Token].nParent == CurrentParent)
{
return Token;
}
}
MicroProfileInit();
MicroProfileScopeLock L(MicroProfileMutex());
Token = MicroProfileGetCounterTokenByParent(CurrentParent, pString, 0);
*LastToken = Token;
return Token;
}
const char* MicroProfileCounterString(const char* pString)
{
MicroProfileInit();
MicroProfileScopeLock L(MicroProfileMutex());
return MicroProfileStringInternLower(pString);
}
// Same as above, but works with non-static strings. always takes a lock, and does a search, so expect this to be not cheap
MicroProfileToken MicroProfileCounterTokenTreeDynamic(MicroProfileToken* LastToken, MicroProfileToken Parent, const char* pString)
{
(void)LastToken;
MicroProfileInit();
MicroProfileScopeLock L(MicroProfileMutex());
const char* pSubNameLower = MicroProfileStringInternLower(pString);
return MicroProfileGetCounterTokenByParent(Parent, pSubNameLower, 0);
}
MicroProfileToken MicroProfileGetCounterToken(const char* pName, uint32_t CounterFlag)
{
MicroProfileInit();
MicroProfileScopeLock L(MicroProfileMutex());
char SubName[MICROPROFILE_NAME_MAX_LEN];
MicroProfileToken nResult = MICROPROFILE_INVALID_TOKEN;
do
{
uint32_t nLen = 0;
pName = MicroProfileNextName(pName, &SubName[0], &nLen);
if(0 == nLen)
{
break;
}
const char* pSubNameLower = MicroProfileStringInternLower(SubName);
nResult = MicroProfileGetCounterTokenByParent(nResult, pSubNameLower, 0);
if(MICROPROFILE_INVALID_TOKEN == nResult)
return nResult;
} while(pName != 0);
S.CounterInfo[nResult].nFlags |= MICROPROFILE_COUNTER_FLAG_LEAF;
#if MICROPROFILE_COUNTER_HISTORY
if(CounterFlag & MICROPROFILE_COUNTER_FLAG_DOUBLE)
{
S.CounterInfo[nResult].nFlags |= MICROPROFILE_COUNTER_FLAG_DOUBLE;
S.dCounterMax[nResult] = -DBL_MAX;
S.dCounterMin[nResult] = DBL_MAX;
}
#endif
MP_ASSERT((int)nResult >= 0);
return nResult;
}
MicroProfileToken MicroProfileGetChildCounterToken(MicroProfileToken Parent, const char* pName)
{
MP_ASSERT(NULL == strpbrk(pName, "\\/")); // delimiters not supported when manually building the tree.
return MicroProfileCounterTokenTreeDynamic(nullptr, Parent, pName);
}
inline void MicroProfileLogPut(MicroProfileLogEntry LE, MicroProfileThreadLog* pLog)
{
MP_ASSERT(pLog != 0); // this assert is hit if MicroProfileOnCreateThread is not called
MP_ASSERT(pLog->nActive == 1); // Dont put after calling thread exit
uint32_t nPut = pLog->nPut.load(std::memory_order_relaxed);
uint32_t nNextPos = (nPut + 1) % MICROPROFILE_BUFFER_SIZE;
uint32_t nGet = pLog->nGet.load(std::memory_order_relaxed);
uint32_t nDistance = (nGet - nNextPos) % MICROPROFILE_BUFFER_SIZE;
MP_ASSERT(nDistance < MICROPROFILE_BUFFER_SIZE);
uint32_t nStackPut = pLog->nStackPut;
if(nDistance < nStackPut + 2)
{
S.nOverflow = 100;
}
else
{
pLog->Log[nPut] = LE;
pLog->nPut.store(nNextPos, std::memory_order_release);
}
}
inline uint64_t MicroProfileLogPutEnter(MicroProfileToken nToken_, uint64_t nTick, MicroProfileThreadLog* pLog)
{
MP_ASSERT(pLog != 0); // this assert is hit if MicroProfileOnCreateThread is not called
MP_ASSERT(pLog->nActive == 1); // Dont put after calling thread exit
uint32_t nStackPut = pLog->nStackPut;
if(nStackPut < MICROPROFILE_STACK_MAX)
{
uint64_t LE = MicroProfileMakeLogIndex(MP_LOG_ENTER, nToken_, nTick);
uint32_t nPut = pLog->nPut.load(std::memory_order_relaxed);
uint32_t nNextPos = (nPut + 1) % MICROPROFILE_BUFFER_SIZE;
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
uint32_t nDistance = (nGet - nNextPos) % MICROPROFILE_BUFFER_SIZE;
MP_ASSERT(nDistance < MICROPROFILE_BUFFER_SIZE);
if(nDistance < nStackPut + 4) // 2 for ring buffer, 2 for the actual entries
{
S.nOverflow = 100;
return MICROPROFILE_INVALID_TICK;
}
else
{
#ifdef MICROPROFILE_VERIFY_BALANCED
pLog->VerifyStack[nStackPut] = LE;
#endif
pLog->nStackPut = nStackPut + 1;
pLog->Log[nPut] = LE;
pLog->nPut.store(nNextPos, std::memory_order_release);
return nTick;
}
}
else
{
S.nOverflow = 100;
pLog->nStackPut = nStackPut + 1;
return MICROPROFILE_DROPPED_TICK;
}
}
inline uint64_t MicroProfileLogPutEnterCStr(const char* pStr, uint64_t nTick, MicroProfileThreadLog* pLog)
{
MP_ASSERT(pLog != 0); // this assert is hit if MicroProfileOnCreateThread is not called
MP_ASSERT(pLog->nActive == 1); // Dont put after calling thread exit
uint32_t nStackPut = pLog->nStackPut;
if(nStackPut < MICROPROFILE_STACK_MAX)
{
uint64_t LE = MicroProfileMakeLogIndex(MP_LOG_ENTER, ETOKEN_CSTR_PTR, nTick);
uint64_t LEStr = MicroProfileMakeLogExtendedNoDataPtr((uint64_t)pStr);
MP_ASSERT(ETOKEN_CSTR_PTR == MicroProfileLogGetTimerIndex(LE));
uint32_t nPut = pLog->nPut.load(std::memory_order_relaxed);
uint32_t nNextPos = (nPut + 2) % MICROPROFILE_BUFFER_SIZE;
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
uint32_t nDistance = (nGet - nNextPos) % MICROPROFILE_BUFFER_SIZE;
MP_ASSERT(nDistance < MICROPROFILE_BUFFER_SIZE);
if(nDistance < nStackPut + 6) // 2 for ring buffer, 4 for the actual entries
{
S.nOverflow = 100;
return MICROPROFILE_INVALID_TICK;
}
else
{
pLog->nStackPut = nStackPut + 1;
pLog->Log[nPut + 0] = LE;
pLog->Log[(nPut + 1) % MICROPROFILE_BUFFER_SIZE] = LEStr;
pLog->nPut.store(nNextPos, std::memory_order_release);
return nTick;
}
}
else
{
S.nOverflow = 100;
pLog->nStackPut = nStackPut + 1;
return MICROPROFILE_DROPPED_TICK;
}
}
inline void MicroProfileLogPutLeaveCStr(const char* pStr, uint64_t nTick, MicroProfileThreadLog* pLog)
{
MP_ASSERT(pLog != 0); // this assert is hit if MicroProfileOnCreateThread is not called
MP_ASSERT(pLog->nActive);
MP_ASSERT(pLog->nStackPut != 0);
uint32_t nStackPut = --(pLog->nStackPut);
MP_ASSERT(nStackPut < 0xf0000000);
if(nStackPut < MICROPROFILE_STACK_MAX)
{
uint64_t LE = MicroProfileMakeLogIndex(MP_LOG_LEAVE, ETOKEN_CSTR_PTR, nTick);
uint64_t LEStr = MicroProfileMakeLogExtendedNoDataPtr((uint64_t)pStr);
MP_ASSERT(ETOKEN_CSTR_PTR == MicroProfileLogGetTimerIndex(LE));
uint32_t nPos = pLog->nPut.load(std::memory_order_relaxed);
uint32_t nNextPos = (nPos + 2) % MICROPROFILE_BUFFER_SIZE;
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
MP_ASSERT(nStackPut < MICROPROFILE_STACK_MAX);
MP_ASSERT(nNextPos != nGet); // should never happen
pLog->Log[nPos + 0] = LE;
pLog->Log[(nPos + 1) % MICROPROFILE_BUFFER_SIZE] = LEStr;
pLog->nPut.store(nNextPos, std::memory_order_release);
}
}
inline void MicroProfileLogPutLeave(MicroProfileToken nToken_, uint64_t nTick, MicroProfileThreadLog* pLog)
{
MP_ASSERT(pLog != 0); // this assert is hit if MicroProfileOnCreateThread is not called
MP_ASSERT(pLog->nActive);
MP_ASSERT(pLog->nStackPut != 0);
uint32_t nStackPut = --(pLog->nStackPut);
if(nStackPut < MICROPROFILE_STACK_MAX)
{
uint64_t LE = MicroProfileMakeLogIndex(MP_LOG_LEAVE, nToken_, nTick);
uint32_t nPos = pLog->nPut.load(std::memory_order_relaxed);
uint32_t nNextPos = (nPos + 1) % MICROPROFILE_BUFFER_SIZE;
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
MP_ASSERT(nStackPut < MICROPROFILE_STACK_MAX);
MP_ASSERT(nNextPos != nGet); // should never happen
#ifdef MICROPROFILE_VERIFY_BALANCED
// verify what we pop is what we push.
uint64_t Pushed = pLog->VerifyStack[nStackPut];
uint64_t TimerPopped = MicroProfileLogGetTimerIndex(LE);
uint64_t TimerOnStack = MicroProfileLogGetTimerIndex(Pushed);
if(TimerPopped != TimerOnStack)
{
uprintf("Push/Pop Mismatch %s vs %s\n", S.TimerInfo[TimerPopped].pName, S.TimerInfo[TimerOnStack].pName);
MP_ASSERT(0);
}
#endif
pLog->Log[nPos] = LE;
pLog->nPut.store(nNextPos, std::memory_order_release);
}
}
inline void MicroProfileLogPut(MicroProfileToken nToken_, uint64_t nTick, uint64_t nBegin, MicroProfileThreadLog* pLog)
{
MicroProfileLogPut(MicroProfileMakeLogIndex(nBegin, nToken_, nTick), pLog);
}
inline void MicroProfileLogPutGpu(MicroProfileLogEntry LE, MicroProfileThreadLogGpu* pLog)
{
uint32_t nPos = pLog->nPut;
if(nPos < MICROPROFILE_GPU_BUFFER_SIZE)
{
pLog->Log[nPos] = LE;
pLog->nPut = nPos + 1;
}
}
inline void MicroProfileLogPutGpuTimer(MicroProfileToken nToken_, uint64_t nTick, uint64_t nBegin, MicroProfileThreadLogGpu* pLog)
{
MicroProfileLogPutGpu(MicroProfileMakeLogIndex(nBegin, nToken_, nTick), pLog);
}
inline void MicroProfileLogPutGpuExtended(EMicroProfileTokenExtended eTokenExt, uint32_t nDataSizeQWords, uint32_t nPayload, MicroProfileThreadLogGpu* pLog)
{
MicroProfileLogEntry LE = MicroProfileMakeLogExtended(eTokenExt, nDataSizeQWords, nPayload);
MicroProfileLogPutGpu(LE, pLog);
}
inline void MicroProfileLogPutGpuExtendedNoData(EMicroProfileTokenExtended eTokenExt, uint64_t nPayload, MicroProfileThreadLogGpu* pLog)
{
MicroProfileLogEntry LE = MicroProfileMakeLogExtendedNoData(eTokenExt, nPayload);
MicroProfileLogPutGpu(LE, pLog);
}
uint32_t MicroProfileGroupTokenActive(MicroProfileToken nToken_)
{
uint32_t nMask = MicroProfileGetGroupMask(nToken_);
uint32_t nIndex = MicroProfileGetGroupMaskIndex(nToken_);
return 0 != (S.nActiveGroups[nIndex] & nMask);
}
uint64_t MicroProfileEnterInternal(MicroProfileToken nToken_)
{
if(MicroProfileGroupTokenActive(nToken_))
{
uint64_t nTick = MP_TICK();
if(MICROPROFILE_PLATFORM_MARKERS_ENABLED)
{
uint32_t idx = MicroProfileGetTimerIndex(nToken_);
MicroProfileTimerInfo& TI = S.TimerInfo[idx];
MICROPROFILE_PLATFORM_MARKER_BEGIN(TI.nColor, TI.pNameExt);
return nTick;
}
else
{
return MicroProfileLogPutEnter(nToken_, nTick, MicroProfileGetThreadLog2());
}
}
return MICROPROFILE_INVALID_TICK;
}
uint64_t MicroProfileEnterInternalCStr(const char* pStr)
{
if(S.AnyActive)
{
uint64_t nTick = MP_TICK();
if(MICROPROFILE_PLATFORM_MARKERS_ENABLED)
{
MICROPROFILE_PLATFORM_MARKER_BEGIN(0, pStr);
return nTick;
}
else
{
return MicroProfileLogPutEnterCStr(pStr, nTick, MicroProfileGetThreadLog2());
}
}
return MICROPROFILE_INVALID_TICK;
}
void MicroProfileTimelineLeave(uint32_t id)
{
if(!id)
return;
std::lock_guard<std::recursive_mutex> Lock(MicroProfileTimelineMutex());
MicroProfileThreadLog* pLog = &S.TimelineLog;
uint32_t nPut = pLog->nPut.load(std::memory_order_relaxed);
uint32_t nNextPos = (nPut + 1) % MICROPROFILE_BUFFER_SIZE;
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
uint32_t nDistance = (nGet - nNextPos) % MICROPROFILE_BUFFER_SIZE;
{
uint32_t nFrameStart = S.TimelineTokenFrameEnter[id % MICROPROFILE_TIMELINE_MAX_TOKENS];
uint32_t nFrameCurrent = S.nFrameCurrent;
if(nFrameCurrent < nFrameStart)
nFrameCurrent += MICROPROFILE_MAX_FRAME_HISTORY;
uint32_t nFrameDistance = (nFrameCurrent - nFrameStart) % MICROPROFILE_MAX_FRAME_HISTORY;
S.TimelineTokenFrameEnter[id % MICROPROFILE_TIMELINE_MAX_TOKENS] = MICROPROFILE_INVALID_FRAME;
S.TimelineTokenFrameLeave[id % MICROPROFILE_TIMELINE_MAX_TOKENS] = nFrameCurrent;
S.TimelineToken[id % MICROPROFILE_TIMELINE_MAX_TOKENS] = 0;
S.nTimelineFrameMax = MicroProfileMax(S.nTimelineFrameMax, nFrameDistance);
}
if(nDistance < 2 + 4)
{
S.nOverflow = 100;
}
else
{
uint64_t LEEnter = MicroProfileMakeLogIndex(MP_LOG_LEAVE, ETOKEN_CUSTOM_NAME, MP_TICK());
uint64_t LEId = MicroProfileMakeLogExtended(ETOKEN_CUSTOM_ID, 0, id);
pLog->Log[nPut++] = LEEnter;
nPut %= MICROPROFILE_BUFFER_SIZE;
pLog->Log[nPut++] = LEId;
nPut %= MICROPROFILE_BUFFER_SIZE;
pLog->nPut.store(nPut);
}
}
void MicroProfileTimelineEnterStatic(uint32_t nColor, const char* pStr)
{
if(!S.AnyActive)
return;
uint32_t nToken = MicroProfileTimelineEnterInternal(nColor, pStr, (uint32_t)strlen(pStr), true);
(void)nToken;
}
void MicroProfileTimelineLeaveStatic(const char* pStr)
{
if(!S.AnyActive)
return;
for(uint32_t i = 0; i < MICROPROFILE_TIMELINE_MAX_TOKENS; ++i)
{
if(S.TimelineTokenStaticString[i] && 0 == MP_STRCASECMP(pStr, S.TimelineTokenStaticString[i]))
{
MicroProfileTimelineLeave(S.TimelineToken[i]);
}
}
}
uint32_t MicroProfileTimelineEnterInternal(uint32_t nColor, const char* pStr, uint32_t nStrLen, int bIsStaticString)
{
if(!S.AnyActive)
return 0;
std::lock_guard<std::recursive_mutex> Lock(MicroProfileTimelineMutex());
MicroProfileThreadLog* pLog = &S.TimelineLog;
MP_ASSERT(pStr[nStrLen] == '\0');
nStrLen += 1;
uint32_t nStringQwords = MicroProfileGetQWordSize(nStrLen);
uint32_t nNumMessages = nStringQwords;
uint32_t nPut = pLog->nPut.load(std::memory_order_relaxed);
uint32_t nNextPos = (nPut + 1) % MICROPROFILE_BUFFER_SIZE;
uint32_t nGet = pLog->nGet.load(std::memory_order_acquire);
uint32_t nDistance = (nGet - nNextPos) % MICROPROFILE_BUFFER_SIZE;
if(nDistance < nNumMessages + 7)
{
S.nOverflow = 100;
return 0;
}
else
{
uint32_t token = pLog->nCustomId;
uint32_t nFrameLeave = S.TimelineTokenFrameLeave[token % MICROPROFILE_TIMELINE_MAX_TOKENS];
uint32_t nFrameEnter = S.TimelineTokenFrameEnter[token % MICROPROFILE_TIMELINE_MAX_TOKENS];
uint32_t nCounter = 0;
uint32_t nFrameCurrent = S.nFrameCurrent;
{
/// dont reuse tokens until their leave command has been dead for the maximum amount of frames we can generate a capture for.
while(token == 0 || nFrameEnter != MICROPROFILE_INVALID_FRAME || (nFrameCurrent - nFrameLeave < MICROPROFILE_MAX_FRAME_HISTORY + 3 && nFrameLeave != MICROPROFILE_INVALID_FRAME))
{
token = (uint32_t)pLog->nCustomId++;
nFrameLeave = S.TimelineTokenFrameLeave[token % MICROPROFILE_TIMELINE_MAX_TOKENS];
nFrameEnter = S.TimelineTokenFrameEnter[token % MICROPROFILE_TIMELINE_MAX_TOKENS];
if(++nCounter == MICROPROFILE_TIMELINE_MAX_TOKENS)
{
// MP_BREAK();
return 0;
}
}
S.TimelineTokenFrameEnter[token % MICROPROFILE_TIMELINE_MAX_TOKENS] = S.nFrameCurrent;
}
if(bIsStaticString)
{
S.TimelineTokenStaticString[token % MICROPROFILE_TIMELINE_MAX_TOKENS] = pStr;
}
else
{
S.TimelineTokenStaticString[token % MICROPROFILE_TIMELINE_MAX_TOKENS] = nullptr;
}
S.TimelineToken[token % MICROPROFILE_TIMELINE_MAX_TOKENS] = token;
uint64_t LEEnter = MicroProfileMakeLogIndex(MP_LOG_ENTER, ETOKEN_CUSTOM_NAME, MP_TICK());
uint64_t LEColor = MicroProfileMakeLogExtended(ETOKEN_CUSTOM_COLOR, 0, nColor);
uint64_t LEId = MicroProfileMakeLogExtended(ETOKEN_CUSTOM_ID, nStringQwords, token);
pLog->Log[nPut++] = LEEnter;
nPut %= MICROPROFILE_BUFFER_SIZE;
pLog->Log[nPut++] = LEColor;
nPut %= MICROPROFILE_BUFFER_SIZE;
pLog->Log[nPut++] = LEId;
nPut %= MICROPROFILE_BUFFER_SIZE;
// copy if we dont wrap
if(nPut + nStringQwords <= MICROPROFILE_BUFFER_SIZE)
{
memcpy(&pLog->Log[nPut], pStr, nStrLen + 1);
nPut += nStringQwords;
}
else
{
int nCharsLeft = (int)nStrLen;
while(nCharsLeft > 0)
{
int nCount = MicroProfileMin(nCharsLeft, 8);
memcpy(&pLog->Log[nPut++], pStr, nCount);
// uint64_t LEPayload = MicroProfileMakeLogPayload(pStr, nCount);
// pLog->Log[nPut++] = LEPayload; nPut %= MICROPROFILE_BUFFER_SIZE;
pStr += nCount;
nCharsLeft -= nCount;
}
}
pLog->nPut.store(nPut);
return token;
}
}
uint32_t MicroProfileTimelineEnter(uint32_t nColor, const char* pStr)
{
return MicroProfileTimelineEnterInternal(nColor, pStr, (uint32_t)strlen(pStr), false);
}
uint32_t MicroProfileTimelineEnterf(uint32_t nColor, const char* pStr, ...)
{
if(!S.AnyActive)
return 0;
char buffer[MICROPROFILE_MAX_STRING + 1];
va_list args;
va_start(args, pStr);
#ifdef _WIN32
size_t size = vsprintf_s(buffer, pStr, args);
#else
size_t size = vsnprintf(buffer, sizeof(buffer) - 1, pStr, args);
#endif
va_end(args);
MP_ASSERT(size < sizeof(buffer));
buffer[size] = '\0';
return MicroProfileTimelineEnterInternal(nColor, buffer, (uint32_t)size, false);
}
void MicroProfileLocalCounterAdd(int64_t* pCounter, int64_t nCount)
{
*pCounter += nCount;
}
int64_t MicroProfileLocalCounterSet(int64_t* pCounter, int64_t nCount)
{
int64_t r = *pCounter;
*pCounter = nCount;
return r;
}
void MicroProfileLocalCounterAddAtomic(MicroProfileToken nToken, int64_t nCount)
{
std::atomic<int64_t>* pCounter = &S.CounterInfo[nToken].ExternalAtomic;
pCounter->fetch_add(nCount);
}
int64_t MicroProfileLocalCounterSetAtomic(MicroProfileToken nToken, int64_t nCount)
{
std::atomic<int64_t>* pCounter = &S.CounterInfo[nToken].ExternalAtomic;
return pCounter->exchange(nCount);
}
void MicroProfileCounterAdd(MicroProfileToken nToken, int64_t nCount)
{
MP_ASSERT(nToken < S.nNumCounters);
S.Counters[nToken].fetch_add(nCount);
}
void MicroProfileCounterSet(MicroProfileToken nToken, int64_t nCount)
{
MP_ASSERT(nToken < S.nNumCounters);
S.Counters[nToken].store(nCount);
}
int64_t MicroProfileCounterGet(MicroProfileToken nToken)
{
MP_ASSERT(nToken < S.nNumCounters);
return S.Counters[nToken].load();
}
void MicroProfileCounterSetDouble(MicroProfileToken nToken, double nCount)
{
MP_ASSERT(nToken < S.nNumCounters);
MP_ASSERT((S.CounterInfo[nToken].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) == MICROPROFILE_COUNTER_FLAG_DOUBLE);
S.CountersDouble[nToken].store(nCount);
}
double MicroProfileCounterGetDouble(MicroProfileToken nToken)
{
MP_ASSERT(nToken < S.nNumCounters);
MP_ASSERT((S.CounterInfo[nToken].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) == MICROPROFILE_COUNTER_FLAG_DOUBLE);
return S.CountersDouble[nToken].load();
}
void MicroProfileCounterSetLimit(MicroProfileToken nToken, int64_t nCount)
{
MP_ASSERT(nToken < S.nNumCounters);
S.CounterInfo[nToken].nLimit = nCount;
}
void MicroProfileCounterSetLimitDouble(MicroProfileToken nToken, double dCount)
{
MP_ASSERT(nToken < S.nNumCounters);
MP_ASSERT((S.CounterInfo[nToken].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) == MICROPROFILE_COUNTER_FLAG_DOUBLE);
S.CounterInfo[nToken].dLimit = dCount;
}
void MicroProfileCounterConfigToken(MicroProfileToken nToken, uint32_t eFormat, int64_t nLimit, uint32_t nFlags)
{
S.CounterInfo[nToken].eFormat = (MicroProfileCounterFormat)eFormat;
S.CounterInfo[nToken].nLimit = nLimit;
S.CounterInfo[nToken].nFlags |= (nFlags & ~MICROPROFILE_COUNTER_FLAG_INTERNAL_MASK);
}
void MicroProfileCounterConfig(const char* pName, uint32_t eFormat, int64_t nLimit, uint32_t nFlags)
{
MicroProfileToken nToken = MicroProfileGetCounterToken(pName, 0);
MicroProfileCounterConfigToken(nToken, eFormat, nLimit, nFlags);
}
void MicroProfileCounterSetPtr(const char* pCounterName, void* pSource, uint32_t nSize)
{
MicroProfileToken nToken = MicroProfileGetCounterToken(pCounterName, 0);
S.CounterSource[nToken].pSource = pSource;
S.CounterSource[nToken].nSourceSize = nSize;
}
inline void MicroProfileFetchCounter(uint32_t i)
{
MP_ASSERT(0 == S.CounterSource[i].nSourceSize || (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) == MICROPROFILE_COUNTER_FLAG_DOUBLE);
switch(S.CounterSource[i].nSourceSize)
{
case sizeof(int32_t):
S.Counters[i] = *(int32_t*)S.CounterSource[i].pSource;
break;
case sizeof(int64_t):
S.Counters[i] = *(int64_t*)S.CounterSource[i].pSource;
break;
default:
break;
}
}
void MicroProfileCounterFetchCounters()
{
for(uint32_t i = 0; i < S.nNumCounters; ++i)
{
MicroProfileFetchCounter(i);
}
}
void MicroProfileLeaveInternal(MicroProfileToken nToken_, uint64_t nTickStart)
{
if(MICROPROFILE_INVALID_TICK != nTickStart)
{
if(MICROPROFILE_PLATFORM_MARKERS_ENABLED)
{
MICROPROFILE_PLATFORM_MARKER_END();
}
else
{
uint64_t nTick = MP_TICK();
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
MicroProfileLogPutLeave(nToken_, nTick, pLog);
}
}
}
void MicroProfileLeaveInternalCStr(const char* pStr, uint64_t nTickStart)
{
if(MICROPROFILE_INVALID_TICK != nTickStart)
{
if(MICROPROFILE_PLATFORM_MARKERS_ENABLED)
{
MICROPROFILE_PLATFORM_MARKER_END();
}
else
{
uint64_t nTick = MP_TICK();
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
MicroProfileLogPutLeaveCStr(pStr, nTick, pLog);
}
}
}
void MicroProfileEnter(MicroProfileToken nToken)
{
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
MP_ASSERT(pLog->nStackScope < MICROPROFILE_STACK_MAX); // if youre hitting this assert you probably have mismatched _ENTER/_LEAVE markers
uint32_t nStackPos = pLog->nStackScope++;
if(nStackPos < MICROPROFILE_STACK_MAX)
{
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[nStackPos];
pScopeState->Token = nToken;
pScopeState->nTick = MicroProfileEnterInternal(nToken);
}
else
{
S.nOverflow = 100;
}
}
void MicroProfileLeave()
{
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
MP_ASSERT(pLog->nStackScope > 0); // if youre hitting this assert you probably have mismatched _ENTER/_LEAVE markers
uint32_t nStackPos = --pLog->nStackScope;
if(nStackPos < MICROPROFILE_STACK_MAX)
{
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[nStackPos];
MicroProfileLeaveInternal(pScopeState->Token, pScopeState->nTick);
}
else
{
S.nOverflow = 100;
}
}
void MicroProfileEnterGpu(MicroProfileToken nToken, MicroProfileThreadLogGpu* pLog)
{
// MP_ASSERT(pLog->nStackScope < MICROPROFILE_STACK_MAX); // if youre hitting this assert you probably have mismatched _ENTER/_LEAVE markers
uint32_t nStackPos = pLog->nStackScope++;
if(nStackPos < MICROPROFILE_STACK_MAX)
{
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[nStackPos];
pScopeState->Token = nToken;
pScopeState->nTick = MicroProfileGpuEnterInternal(pLog, nToken);
}
else
{
S.nOverflow = 100;
}
}
void MicroProfileLeaveGpu(MicroProfileThreadLogGpu* pLog)
{
uint32_t nStackPos = --pLog->nStackScope;
if(nStackPos < MICROPROFILE_STACK_MAX)
{
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[nStackPos];
MicroProfileGpuLeaveInternal(pLog, pScopeState->Token, pScopeState->nTick);
}
}
void MicroProfileGpuBegin(void* pContext, MicroProfileThreadLogGpu* pLog)
{
MP_ASSERT(pLog->pContext == (void*)-1); // dont call begin without calling end
MP_ASSERT(pLog->nStart == (uint32_t)-1);
MP_ASSERT(pContext != (void*)-1);
pLog->pContext = pContext;
pLog->nStart = pLog->nPut;
MicroProfileLogPutGpu(0, pLog);
}
void MicroProfileGpuSetContext(void* pContext, MicroProfileThreadLogGpu* pLog)
{
MP_ASSERT(pLog->pContext != (void*)-1); // dont call begin without calling end
MP_ASSERT(pLog->nStart != (uint32_t)-1);
pLog->pContext = pContext;
}
uint64_t MicroProfileGpuEnd(MicroProfileThreadLogGpu* pLog)
{
uint64_t nStart = pLog->nStart;
uint32_t nEnd = pLog->nPut;
uint64_t nId = pLog->nId;
if(nStart < MICROPROFILE_GPU_BUFFER_SIZE)
{
pLog->Log[nStart] = nEnd - nStart - 1;
}
pLog->pContext = (void*)-1;
pLog->nStart = (uint32_t)-1;
return nStart | (nId << 32);
}
void MicroProfileGpuSubmit(int nQueue, uint64_t nWork)
{
MP_ASSERT(nQueue >= 0 && nQueue < MICROPROFILE_MAX_THREADS);
MICROPROFILE_SCOPE(g_MicroProfileGpuSubmit);
uint32_t nStart = (uint32_t)nWork;
uint32_t nThreadLog = uint32_t(nWork >> 32);
MicroProfileThreadLog* pQueueLog = S.Pool[nQueue];
MP_ASSERT(nQueue < MICROPROFILE_MAX_THREADS);
MicroProfileThreadLogGpu* pGpuLog = S.PoolGpu[nThreadLog];
MP_ASSERT(pGpuLog);
int64_t nCount = 0;
if(nStart < MICROPROFILE_GPU_BUFFER_SIZE)
{
nCount = pGpuLog->Log[nStart];
}
MP_ASSERT(nCount < (int64_t)MICROPROFILE_GPU_BUFFER_SIZE);
nStart++;
for(int32_t i = 0; i < nCount; ++i)
{
MP_ASSERT(nStart < MICROPROFILE_GPU_BUFFER_SIZE);
MicroProfileLogEntry LE = pGpuLog->Log[nStart++];
MicroProfileLogPut(LE, pQueueLog);
}
}
uint64_t MicroProfileGpuEnterInternal(MicroProfileThreadLogGpu* pGpuLog, MicroProfileToken nToken_)
{
if(MicroProfileGroupTokenActive(nToken_))
{
if(!MicroProfileGetThreadLog())
{
MicroProfileInitThreadLog();
}
MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
MicroProfileLogPutGpuTimer(nToken_, nTimer, MP_LOG_ENTER, pGpuLog);
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), pGpuLog);
MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, pGpuLog);
// MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
// uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
// MicroProfileLogPutGpu(nToken_, nTimer, MP_LOG_ENTER, pGpuLog);
// MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
// MicroProfileLogPutGpu(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), MP_LOG_EXTRA_DATA, pGpuLog);
// MicroProfileLogPutGpu(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, MP_LOG_EXTRA_DATA, pGpuLog);
return 1;
}
return 0;
}
uint64_t MicroProfileGpuEnterInternalCStr(MicroProfileThreadLogGpu* pGpuLog, const char* pStr)
{
MP_BREAK(); // not implemented
return 0;
// if(S.AnyGpuActive)
// {
// if(!MicroProfileGetThreadLog())
// {
// MicroProfileInitThreadLog();
// }
// MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
// uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
// MicroProfileLogPutGpuTimer(nToken_, nTimer, MP_LOG_ENTER, pGpuLog);
// MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
// MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), pGpuLog);
// MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, pGpuLog);
// // MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
// // uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
// // MicroProfileLogPutGpu(nToken_, nTimer, MP_LOG_ENTER, pGpuLog);
// // MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
// // MicroProfileLogPutGpu(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), MP_LOG_EXTRA_DATA, pGpuLog);
// // MicroProfileLogPutGpu(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, MP_LOG_EXTRA_DATA, pGpuLog);
// return 1;
// }
// return 0;
}
void MicroProfileGpuLeaveInternal(MicroProfileThreadLogGpu* pGpuLog, MicroProfileToken nToken_, uint64_t nTickStart)
{
if(nTickStart)
{
if(!MicroProfileGetThreadLog())
{
MicroProfileInitThreadLog();
}
MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
MicroProfileLogPutGpuTimer(nToken_, nTimer, MP_LOG_LEAVE, pGpuLog);
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), pGpuLog);
MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, pGpuLog);
}
}
void MicroProfileGpuLeaveInternalCStr(MicroProfileThreadLogGpu* pGpuLog, uint64_t nTickStart)
{
MP_BREAK(); // not implemented
return;
// if(nTickStart)
// {
// if(!MicroProfileGetThreadLog())
// {
// MicroProfileInitThreadLog();
// }
// MP_ASSERT(pGpuLog->pContext != (void*)-1); // must be called between GpuBegin/GpuEnd
// uint64_t nTimer = MicroProfileGpuInsertTimeStamp(pGpuLog->pContext);
// MicroProfileLogPutGpuTimer(nToken_, nTimer, MP_LOG_LEAVE, pGpuLog);
// MicroProfileThreadLog* pLog = MicroProfileGetThreadLog();
// MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_TIMESTAMP, MP_TICK(), pGpuLog);
// MicroProfileLogPutGpuExtendedNoData(ETOKEN_GPU_CPU_SOURCE_THREAD, pLog->nLogIndex, pGpuLog);
// }
}
void MicroProfileContextSwitchPut(MicroProfileContextSwitch* pContextSwitch)
{
if(0 == S.nPauseTicks || (S.nPauseTicks - pContextSwitch->nTicks) > 0)
{
uint32_t nPut = S.nContextSwitchPut;
S.ContextSwitch[nPut] = *pContextSwitch;
S.nContextSwitchPut = (S.nContextSwitchPut + 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
// if(S.nContextSwitchPut < nPut)
//{
// float fMsDelay = MicroProfileTickToMsMultiplierCpu() * ((int64_t)S.nFlipStartTick - pContextSwitch->nTicks);
// uprintf("context switch wrap .. %7.3fms\n", fMsDelay);
// }
// if(S.nContextSwitchPut % 1024 == 0)
//{
// float fMsDelay = MicroProfileTickToMsMultiplierCpu() * ((int64_t)S.nFlipStartTick - pContextSwitch->nTicks);
// uprintf("cswitch tick %x ... %7.3fms\n", S.nContextSwitchPut, fMsDelay);
// }
S.nContextSwitchLastPushed = pContextSwitch->nTicks;
}
else
{
S.nContextSwitchStalledTick = MP_TICK();
}
}
void MicroProfileGetRange(uint32_t nPut, uint32_t nGet, uint32_t nRange[2][2])
{
if(nPut > nGet)
{
nRange[0][0] = nGet;
nRange[0][1] = nPut;
nRange[1][0] = nRange[1][1] = 0;
}
else if(nPut != nGet)
{
MP_ASSERT(nGet != MICROPROFILE_BUFFER_SIZE);
uint32_t nCountEnd = MICROPROFILE_BUFFER_SIZE - nGet;
nRange[0][0] = nGet;
nRange[0][1] = nGet + nCountEnd;
nRange[1][0] = 0;
nRange[1][1] = nPut;
}
}
void MicroProfileToggleFrozen()
{
S.nFrozen = !S.nFrozen;
}
int MicroProfileIsFrozen()
{
return S.nFrozen != 0 ? 1 : 0;
}
int MicroProfileEnabled()
{
return MicroProfileAnyGroupActive();
}
void* MicroProfileAllocInternal(size_t nSize, size_t nAlign)
{
nAlign = MicroProfileMax(4 * sizeof(uint32_t), nAlign);
nSize += nAlign;
intptr_t nPtr = (intptr_t)MICROPROFILE_ALLOC(nSize, nAlign);
nPtr += nAlign;
uint32_t* pVal = (uint32_t*)nPtr;
MP_ASSERT(nSize < 0xffffffff);
MP_ASSERT(nAlign < 0xffffffff);
pVal[-1] = (uint32_t)nSize;
pVal[-2] = (uint32_t)nAlign;
pVal[-3] = (uint32_t)0x28586813;
MicroProfileCounterAdd(S.CounterToken_Alloc_Memory, nSize);
MicroProfileCounterAdd(S.CounterToken_Alloc_Count, 1);
return (void*)nPtr;
}
void MicroProfileFreeInternal(void* pPtr)
{
intptr_t p = (intptr_t)pPtr;
uint32_t* p4 = (uint32_t*)pPtr;
uint32_t nSize = p4[-1];
uint32_t nAlign = p4[-2];
uint32_t nMagic = p4[-3];
MP_ASSERT(nMagic == 0x28586813);
MICROPROFILE_FREE((void*)(p - nAlign));
MicroProfileCounterAdd(S.CounterToken_Alloc_Memory, -(int)nSize);
MicroProfileCounterAdd(S.CounterToken_Alloc_Count, -1);
}
void* MicroProfileReallocInternal(void* pPtr, size_t nSize)
{
intptr_t p = (intptr_t)pPtr;
uint32_t nAlignBase;
if(p)
{
uint32_t* p4 = (uint32_t*)pPtr;
uint32_t nSizeBase = p4[-1];
nAlignBase = p4[-2];
uint32_t nMagicBase = p4[-3];
MP_ASSERT(nMagicBase == 0x28586813);
MicroProfileCounterAdd(S.CounterToken_Alloc_Memory, nSize - nSizeBase);
}
else
{
nAlignBase = 4 * sizeof(uint32_t);
MicroProfileCounterAdd(S.CounterToken_Alloc_Memory, nSize + nAlignBase);
MicroProfileCounterAdd(S.CounterToken_Alloc_Count, 1);
}
nSize += nAlignBase;
MP_ASSERT(nAlignBase >= 4 * sizeof(uint32_t));
if(p)
{
p = (intptr_t)MICROPROFILE_REALLOC((void*)(p - nAlignBase), nSize);
}
else
{
p = (intptr_t)MICROPROFILE_REALLOC((void*)(p), nSize);
}
p += nAlignBase;
uint32_t* pVal = (uint32_t*)p;
MP_ASSERT(nSize < 0xffffffff);
MP_ASSERT(nAlignBase < 0xffffffff);
pVal[-1] = (uint32_t)nSize;
pVal[-2] = (uint32_t)nAlignBase;
pVal[-3] = (uint32_t)0x28586813;
return (void*)p;
}
static void MicroProfileFlipEnabled()
{
if(S.nFrozen)
{
memset(S.nActiveGroups, 0, sizeof(S.nActiveGroups));
S.AnyActive = false;
}
else
{
bool AnyActive = false;
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
{
uint32_t nNew = S.nActiveGroupsWanted[i];
nNew |= S.nForceGroups[i];
if(nNew)
AnyActive = true;
if(S.nActiveGroups[i] != nNew)
{
S.nActiveGroups[i] = nNew;
}
}
S.AnyActive = AnyActive;
}
}
void MicroProfileFlip(void* pContext, uint32_t FlipFlag)
{
MicroProfileFlip_CB(pContext, nullptr, FlipFlag);
}
#define MICROPROFILE_TICK_VALIDATE_FRAME_TIME 0
void MicroProfileFlip_CB(void* pContext, MicroProfileOnFreeze FreezeCB, uint32_t FlipFlag)
{
MICROPROFILE_COUNTER_LOCAL_UPDATE_SET_ATOMIC(g_MicroProfileBytesPerFlip);
#if 0
//verify LogEntry wraps correctly
MicroProfileLogEntry c = MP_LOG_TICK_MASK-5000;
for(int i = 0; i < 10000; ++i, c += 1)
{
MicroProfileLogEntry l2 = (c+2500) & MP_LOG_TICK_MASK;
MP_ASSERT(2500 == MicroProfileLogTickDifference(c, l2));
}
#endif
MICROPROFILE_SCOPE(g_MicroProfileFlip);
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
if(S.nDumpFileNextFrame)
{
if(0 == S.nDumpFileCountDown)
{
MicroProfileDumpToFile();
S.nDumpFileNextFrame = 0;
S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; // hide spike from dumping webpage
}
else
{
S.nDumpFileCountDown--;
}
}
#if MICROPROFILE_WEBSERVER
if(MICROPROFILE_FLIP_FLAG_START_WEBSERVER == (MICROPROFILE_FLIP_FLAG_START_WEBSERVER & FlipFlag) && S.nWebServerDataSent == (uint64_t)-1)
{
MicroProfileWebServerStart();
S.nWebServerDataSent = 0;
if(!S.WebSocketThreadRunning)
{
S.WebSocketThreadRunning = 1;
MicroProfileThreadStart(&S.WebSocketSendThread, MicroProfileSocketSenderThread);
}
}
#endif
int nLoop = 0;
do
{
#if MICROPROFILE_WEBSERVER
if(MicroProfileWebServerUpdate())
{
S.nAutoClearFrames = MICROPROFILE_GPU_FRAME_DELAY + 3; // hide spike from dumping webpage
}
#endif
if(nLoop++)
{
MicroProfileSleep(100);
if((nLoop % 10) == 0)
{
uprintf("microprofile frozen %d\n", nLoop);
}
}
} while(S.nFrozen);
uint32_t nAggregateClear = S.nAggregateClear || S.nAutoClearFrames, nAggregateFlip = 0;
if(S.nAutoClearFrames)
{
nAggregateClear = 1;
nAggregateFlip = 1;
S.nAutoClearFrames -= 1;
}
bool nRunning = MicroProfileAnyGroupActive();
if(nRunning)
{
S.nFlipStartTick = MP_TICK();
int64_t nGpuWork = MicroProfileGpuEnd(S.pGpuGlobal);
MicroProfileGpuSubmit(S.GpuQueue, nGpuWork);
MicroProfileThreadLogGpuReset(S.pGpuGlobal);
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
{
if(S.PoolGpu[i])
{
S.PoolGpu[i]->nPut = 0;
}
}
MicroProfileGpuBegin(pContext, S.pGpuGlobal);
uint32_t nGpuTimeStamp = MicroProfileGpuFlip(pContext);
uint64_t nFrameIdx = S.nFramePutIndex++;
S.nFramePut = (S.nFramePut + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
S.Frames[S.nFramePut].nFrameId = nFrameIdx;
MP_ASSERT((S.nFramePutIndex % MICROPROFILE_MAX_FRAME_HISTORY) == S.nFramePut);
S.nFrameCurrent = (S.nFramePut + MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 1) % MICROPROFILE_MAX_FRAME_HISTORY;
S.nFrameCurrentIndex++;
uint32_t nFrameNext = (S.nFrameCurrent + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
S.nFrameNext = nFrameNext;
uint32_t nContextSwitchPut = S.nContextSwitchPut;
if(S.nContextSwitchLastPut < nContextSwitchPut)
{
S.nContextSwitchUsage = (nContextSwitchPut - S.nContextSwitchLastPut);
}
else
{
S.nContextSwitchUsage = MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - S.nContextSwitchLastPut + nContextSwitchPut;
}
S.nContextSwitchLastPut = nContextSwitchPut;
MicroProfileFrameState* pFramePut = &S.Frames[S.nFramePut];
MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent];
MicroProfileFrameState* pFrameNext = &S.Frames[nFrameNext];
const int64_t nTickStartFrame = pFrameCurrent->nFrameStartCpu;
const int64_t nTickEndFrame = pFrameNext->nFrameStartCpu;
pFrameCurrent->nGpuPending = 0;
pFramePut->nGpuPending = 1;
pFramePut->nFrameStartCpu = MP_TICK();
pFramePut->nFrameStartGpu = nGpuTimeStamp;
{
const float fDumpTimeThreshold = 1000.f * 60 * 60 * 24 * 365.f; // if time above this, then we're handling uninitialized counters
int nDumpNextFrame = 0;
float fTimeGpu = 0.f;
if(pFrameNext->nFrameStartGpu != MICROPROFILE_INVALID_TICK)
{
uint64_t nTickCurrent = pFrameCurrent->nFrameStartGpu;
uint64_t nTickNext = pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu);
nTickCurrent = MicroProfileLogTickMin(nTickCurrent, nTickNext);
float fTime = 1000.f * (nTickNext - nTickCurrent) / (MicroProfileTicksPerSecondGpu());
fTime = fTimeGpu;
if(S.fDumpGpuSpike > 0.f && fTime > S.fDumpGpuSpike && fTime < fDumpTimeThreshold)
{
nDumpNextFrame = 1;
}
}
float fTimeCpu = 1000.f * (pFrameNext->nFrameStartCpu - pFrameCurrent->nFrameStartCpu) / MicroProfileTicksPerSecondCpu();
if(S.fDumpCpuSpike > 0.f && fTimeCpu > S.fDumpCpuSpike && fTimeCpu < fDumpTimeThreshold)
{
nDumpNextFrame = 1;
}
if(nDumpNextFrame)
{
S.nDumpFileNextFrame = S.nDumpSpikeMask;
S.nDumpSpikeMask = 0;
S.nDumpFileCountDown = 5;
}
}
const uint64_t nTickEndFrameGpu_ = pFrameNext->nFrameStartGpu;
const uint64_t nTickStartFrameGpu_ = pFrameCurrent->nFrameStartGpu;
const bool bGpuFrameInvalid = nTickEndFrameGpu_ == MICROPROFILE_INVALID_TICK || nTickStartFrameGpu_ == MICROPROFILE_INVALID_TICK;
const uint64_t nTickEndFrameGpu = bGpuFrameInvalid ? 1 : nTickEndFrameGpu_;
const uint64_t nTickStartFrameGpu = bGpuFrameInvalid ? 2 : nTickStartFrameGpu_;
MicroProfileFrameExtraCounterData* ExtraData = S.FrameExtraCounterData;
bool UsingExtraData = false;
if(ExtraData)
{
if((intptr_t)ExtraData == 1)
{
size_t Bytes = sizeof(MicroProfileFrameExtraCounterData) * MICROPROFILE_MAX_FRAME_HISTORY;
printf(" allocating %d bytes %f\n", (int)Bytes, Bytes / (1024.0 * 1024.0));
ExtraData = S.FrameExtraCounterData = (MicroProfileFrameExtraCounterData*)MicroProfileAllocInternal(Bytes, alignof(uint64_t));
memset(ExtraData, 0, Bytes);
}
ExtraData = ExtraData + S.nFrameCurrent;
UsingExtraData = true;
}
#define MP_ASSERT_LE_WRAP(l, g) MP_ASSERT(uint64_t(g - l) < 0x8000000000000000)
{
MP_ASSERT_LE_WRAP(nTickStartFrame, nTickEndFrame);
uint64_t nTick = nTickEndFrame - nTickStartFrame;
S.nFlipTicks = nTick;
S.nFlipAggregate += nTick;
S.nFlipMax = MicroProfileMax(S.nFlipMax, nTick);
}
uint32_t* pTimerToGroup = &S.TimerToGroup[0];
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
{
MicroProfileThreadLog* pLog = S.Pool[i];
if(!pLog)
{
pFramePut->nLogStart[i] = 0;
}
else
{
uint32_t nPut = pLog->nPut.load(std::memory_order_acquire);
pFramePut->nLogStart[i] = nPut;
if(!pLog->nGpu)
{
uint32_t nStart = pFrameCurrent->nLogStart[i];
while(nStart != nPut)
{
int64_t LE = pLog->Log[nStart];
int64_t nDifference = MicroProfileLogTickDifference(LE, nTickEndFrame);
uint32_t Ext = MicroProfileLogGetType(LE);
if(nDifference > 0 || 0 != (0x2 & Ext))
{
nStart = (nStart + 1) % MICROPROFILE_BUFFER_SIZE;
}
else
{
break;
}
}
pFrameNext->nLogStart[i] = nStart;
}
}
}
{
pFramePut->nLogStartTimeline = S.TimelineLog.nPut.load(std::memory_order_acquire);
uint32_t nFrameCurrent = S.nFrameCurrent;
uint32_t nTimelineFrameDeltaMax = S.nTimelineFrameMax;
for(uint32_t i = 0; i != MICROPROFILE_TIMELINE_MAX_TOKENS; ++i)
{
uint32_t nFrameStart = S.TimelineTokenFrameEnter[i];
if(nFrameStart != MICROPROFILE_INVALID_FRAME)
{
uint32_t nCur = nFrameCurrent;
if(nCur < nFrameStart)
nCur += MICROPROFILE_MAX_FRAME_HISTORY;
if(nCur >= nFrameStart)
{
uint32_t D = nCur - nFrameStart;
nTimelineFrameDeltaMax = MicroProfileMax(nTimelineFrameDeltaMax, D);
}
}
}
pFramePut->nTimelineFrameMax = nTimelineFrameDeltaMax;
S.nTimelineFrameMax = 0;
}
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
{
MicroProfileThreadLog* pLog = S.Pool[i];
if(!pLog)
continue;
if(pLog->nGpu)
{
uint32_t nPut = pFrameNext->nLogStart[i];
uint32_t nGet = pFrameCurrent->nLogStart[i];
uint32_t nRange[2][2] = {
{ 0, 0 },
{ 0, 0 },
};
MicroProfileGetRange(nPut, nGet, nRange);
for(uint32_t j = 0; j < 2; ++j)
{
uint32_t nStart = nRange[j][0];
uint32_t nEnd = nRange[j][1];
for(uint32_t k = nStart; k < nEnd; ++k)
{
MicroProfileLogEntry L = pLog->Log[k];
if(MicroProfileLogGetType(L) < MP_LOG_EXTENDED)
{
pLog->Log[k] = MicroProfileLogSetTick(L, MicroProfileGpuGetTimeStamp((uint32_t)MicroProfileLogGetTick(L)));
}
k += MicroProfileLogGetDataSize(L);
}
}
}
}
}
{
MicroProfile::GroupTime* pFrameGroup = &S.FrameGroup[0];
{
MICROPROFILE_SCOPE(g_MicroProfileClear);
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
{
S.Frame[i].nTicks = 0;
S.Frame[i].nCount = 0;
S.FrameExclusive[i] = 0;
}
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
{
pFrameGroup[i].nTicks = 0;
pFrameGroup[i].nTicksExclusive = 0;
pFrameGroup[i].nCount = 0;
}
}
{
MICROPROFILE_SCOPE(g_MicroProfileThreadLoop);
memset(S.FrameGroupThreadValid, 0, sizeof(S.FrameGroupThreadValid));
for(uint32_t idx_thread = 0; idx_thread < MICROPROFILE_MAX_THREADS; ++idx_thread)
{
MicroProfileThreadLog* pLog = S.Pool[idx_thread];
if(!pLog)
continue;
bool bGpu = pLog->nGpu != 0;
int64_t nTickStartLog = bGpu ? nTickStartFrameGpu : nTickStartFrame;
int64_t nTickEndLog = bGpu ? nTickEndFrameGpu : nTickEndFrame;
float fToMs = bGpu ? MicroProfileTickToMsMultiplierGpu() : MicroProfileTickToMsMultiplierCpu();
float fFrameTime = fToMs * (nTickEndLog - nTickStartLog);
MicroProfile::GroupTime* pFrameGroupThread = &S.FrameGroupThread[idx_thread][0];
uint32_t nPut = pFrameNext->nLogStart[idx_thread];
uint32_t nGet = pFrameCurrent->nLogStart[idx_thread];
uint32_t nRange[2][2] = {
{ 0, 0 },
{ 0, 0 },
};
MicroProfileGetRange(nPut, nGet, nRange);
if(nPut != nGet)
{
S.FrameGroupThreadValid[idx_thread / 32] |= 1 << (idx_thread % 32);
memset(pFrameGroupThread, 0, sizeof(S.FrameGroupThread[idx_thread]));
}
uint64_t* pStackLog = &pLog->nStackLogEntry[0];
uint64_t* pChildTickStack = &pLog->nChildTickStack[1];
int32_t nStackPos = pLog->nStackPos;
uint8_t TimerStackPos[MICROPROFILE_MAX_TIMERS];
uint8_t GroupStackPos[MICROPROFILE_MAX_GROUPS];
memset(TimerStackPos, 0, sizeof(TimerStackPos));
memset(GroupStackPos, 0, sizeof(GroupStackPos));
// restore group and timer stack pos.
for(int32_t i = 0; i < nStackPos; ++i)
{
uint64_t nTimer = MicroProfileLogGetTimerIndex(pStackLog[i]);
uint32_t nGroup = pTimerToGroup[nTimer];
MP_ASSERT(nTimer < MICROPROFILE_MAX_TIMERS);
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
TimerStackPos[nTimer]++;
GroupStackPos[nGroup]++;
}
for(uint32_t j = 0; j < 2; ++j)
{
uint32_t nStart = nRange[j][0];
uint32_t nEnd = nRange[j][1];
for(uint32_t k = nStart; k < nEnd; ++k)
{
MicroProfileLogEntry LE = pLog->Log[k];
uint32_t nType = MicroProfileLogGetType(LE);
switch(nType)
{
case MP_LOG_ENTER:
{
uint64_t nTimer = MicroProfileLogGetTimerIndex(LE);
if(nTimer != ETOKEN_CSTR_PTR)
{
MP_ASSERT(nTimer < S.nTotalTimers);
uint32_t nGroup = pTimerToGroup[nTimer];
MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX);
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
// When we aggretate the total time, we have to count if the timers & groups are layered, to avoid summing them twice when calculating the total time.
// Averages become nonsense regardless.
TimerStackPos[nTimer]++;
GroupStackPos[nGroup]++;
pStackLog[nStackPos] = LE;
pChildTickStack[nStackPos] = 0;
nStackPos++;
}
break;
}
case MP_LOG_LEAVE:
{
uint64_t nTimer = MicroProfileLogGetTimerIndex(LE);
if(nTimer != ETOKEN_CSTR_PTR)
{
MP_ASSERT(nTimer < S.nTotalTimers);
uint32_t nGroup = pTimerToGroup[nTimer];
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
MP_ASSERT(nStackPos);
uint64_t nTicks;
bool bGroupRoot = 0 == GroupStackPos[nGroup] || 0 == --GroupStackPos[nGroup];
bool bTimerRoot = 0 == TimerStackPos[nTimer] || 0 == --TimerStackPos[nTimer];
{
nStackPos--;
MicroProfileLogEntry LEStack = pStackLog[nStackPos];
MP_ASSERT(MicroProfileLogGetTimerIndex(LEStack) == nTimer); // unbalanced timers are not supported
uint64_t nTickStart = MicroProfileLogTickClamp(LEStack, nTickStartLog, nTickEndLog);
uint64_t nClamped = MicroProfileLogTickClamp(LE, nTickStartLog, nTickEndLog);
nTicks = MicroProfileLogTickDifference(nTickStart, nClamped);
MP_ASSERT(nTicks < 0x8000000000000000);
uint64_t nChildTicks = pChildTickStack[nStackPos];
MP_ASSERT(nStackPos < MICROPROFILE_STACK_MAX);
if(nStackPos)
{
pChildTickStack[nStackPos - 1] += nTicks;
}
MP_ASSERT(nTicks >= nChildTicks);
uint64_t nTicksExclusive = (nTicks - nChildTicks);
S.FrameExclusive[nTimer] += nTicksExclusive;
pFrameGroupThread[nGroup].nTicksExclusive += nTicksExclusive;
if(bTimerRoot) // dont count this if its below another instance of the same timer.
{
S.Frame[nTimer].nTicks += nTicks;
S.Frame[nTimer].nCount += 1;
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
if(bGroupRoot)
{
pFrameGroupThread[nGroup].nTicks += nTicks;
pFrameGroupThread[nGroup].nCount += 1;
}
}
}
}
break;
}
case MP_LOG_EXTENDED:
{
k += MicroProfileLogGetDataSize(LE);
break;
}
case MP_LOG_EXTENDED_NO_DATA:
break;
}
}
}
for(int32_t i = nStackPos - 1; i >= 0; --i)
{
MicroProfileLogEntry LE = pStackLog[i];
uint64_t nTickStart = MicroProfileLogTickClamp(LE, nTickStartLog, nTickEndLog);
uint64_t nTicks = MicroProfileLogTickDifference(nTickStart, nTickEndLog);
int64_t nChildTicks = pChildTickStack[i];
pChildTickStack[i] = 0; // consume..
MP_ASSERT(i < MICROPROFILE_STACK_MAX && i >= 0);
if(i)
{
pChildTickStack[i - 1] += nTicks;
}
MP_ASSERT(nTicks >= (uint64_t)nChildTicks);
uint32_t nTimer = (uint32_t)MicroProfileLogGetTimerIndex(LE);
uint32_t nGroup = pTimerToGroup[nTimer];
bool bGroupRoot = 0 == GroupStackPos[nGroup] || 0 == --GroupStackPos[nGroup];
bool bTimerRoot = 0 == TimerStackPos[nTimer] || 0 == --TimerStackPos[nTimer];
uint64_t nTicksExclusive = (nTicks - nChildTicks);
S.FrameExclusive[nTimer] += nTicksExclusive;
pFrameGroupThread[nGroup].nTicksExclusive += nTicksExclusive;
if(bTimerRoot)
{
S.Frame[nTimer].nTicks += nTicks;
S.Frame[nTimer].nCount += 1;
MP_ASSERT(nGroup < MICROPROFILE_MAX_GROUPS);
if(bGroupRoot)
{
pFrameGroupThread[nGroup].nTicks += nTicks;
pFrameGroupThread[nGroup].nCount += 1;
}
}
}
#ifdef MP_ASSERT
for(uint8_t& g : GroupStackPos)
{
MP_ASSERT(g == 0);
}
for(uint8_t& g : TimerStackPos)
{
MP_ASSERT(g == 0);
}
#endif
pLog->nStackPos = nStackPos;
for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
{
pLog->nGroupTicks[j] += pFrameGroupThread[j].nTicks;
if((S.FrameGroupThreadValid[idx_thread / 32] & (1 << (idx_thread % 32))) != 0)
{
pFrameGroup[j].nTicks += pFrameGroupThread[j].nTicks;
pFrameGroup[j].nTicksExclusive += pFrameGroupThread[j].nTicksExclusive;
pFrameGroup[j].nCount += pFrameGroupThread[j].nCount;
}
}
if(pLog->nPut == pLog->nGet && pLog->nActive == 2)
{
pLog->nIdleFrames++;
}
else
{
pLog->nIdleFrames = 0;
}
if(pLog->nActive == 2 && pLog->nIdleFrames > MICROPROFILE_THREAD_LOG_FRAMES_REUSE)
{
MicroProfileLogReset(pLog);
}
}
}
{
MICROPROFILE_SCOPE(g_MicroProfileAccumulate);
uint64_t* ExtraPut = nullptr;
if(UsingExtraData)
{
ExtraPut = &ExtraData->Timers[0];
ExtraData->NumTimers = S.nTotalTimers;
}
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
{
S.AccumTimers[i].nTicks += S.Frame[i].nTicks;
S.AccumTimers[i].nCount += S.Frame[i].nCount;
S.AccumMaxTimers[i] = MicroProfileMax(S.AccumMaxTimers[i], S.Frame[i].nTicks);
S.AccumMinTimers[i] = MicroProfileMin(S.AccumMinTimers[i], S.Frame[i].nTicks);
S.AccumTimersExclusive[i] += S.FrameExclusive[i];
S.AccumMaxTimersExclusive[i] = MicroProfileMax(S.AccumMaxTimersExclusive[i], S.FrameExclusive[i]);
if(ExtraPut)
*ExtraPut++ = S.Frame[i].nTicks;
}
ExtraPut = nullptr;
if(UsingExtraData)
{
ExtraPut = &ExtraData->Groups[0];
ExtraData->NumGroups = S.nGroupCount;
}
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
{
S.AccumGroup[i] += pFrameGroup[i].nTicks;
S.AccumGroupMax[i] = MicroProfileMax(S.AccumGroupMax[i], pFrameGroup[i].nTicks);
if(ExtraPut)
*ExtraPut++ = pFrameGroup[i].nTicks;
}
#if MICROPROFILE_IMGUI
void MicroProfileImguiGather();
MicroProfileImguiGather();
#endif
if(S.CsvConfig.State == MicroProfileCsvConfig::ACTIVE)
{
uint32_t FrameIndex = S.nFrameCurrent % MICROPROFILE_MAX_FRAME_HISTORY;
uint64_t* FrameData = S.CsvConfig.FrameData + S.CsvConfig.TotalElements * FrameIndex;
{
uint16_t* TimerIndices = S.CsvConfig.TimerIndices;
for(uint32_t i = 0; i < S.CsvConfig.NumTimers; ++i)
{
uint16_t Index = TimerIndices[i];
if(Index != UINT16_MAX)
{
*FrameData = S.Frame[Index].nTicks;
}
else
{
*FrameData = 0;
}
FrameData++;
}
}
{
uint16_t* GroupIndices = S.CsvConfig.GroupIndices;
for(uint32_t i = 0; i < S.CsvConfig.NumGroups; ++i)
{
uint16_t Index = GroupIndices[i];
if(Index != UINT16_MAX)
{
*FrameData = pFrameGroup[Index].nTicks;
}
else
{
*FrameData = 0;
}
FrameData++;
}
}
{
uint16_t* CounterIndices = S.CsvConfig.CounterIndices;
for(uint32_t i = 0; i < S.CsvConfig.NumCounters; ++i)
{
uint16_t Index = CounterIndices[i];
if(Index != UINT16_MAX)
{
if(S.CounterInfo[Index].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE)
{
double d = S.CountersDouble[Index].load();
memcpy(FrameData, &d, sizeof(d));
}
else
{
*FrameData = S.Counters[Index].load();
}
}
else
{
*FrameData = 0;
}
FrameData++;
}
}
}
}
for(uint32_t i = 0; i < MICROPROFILE_MAX_GRAPHS; ++i)
{
if(S.Graph[i].nToken != MICROPROFILE_INVALID_TOKEN)
{
MicroProfileToken nToken = S.Graph[i].nToken;
S.Graph[i].nHistory[S.nGraphPut] = S.Frame[MicroProfileGetTimerIndex(nToken)].nTicks;
}
}
S.nGraphPut = (S.nGraphPut + 1) % MICROPROFILE_GRAPH_HISTORY;
}
if(S.nAggregateFlip <= ++S.nAggregateFlipCount)
{
nAggregateFlip = 1;
if(S.nAggregateFlip) // if 0 accumulate indefinitely
{
nAggregateClear = 1;
}
}
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
{
MicroProfileThreadLog* pLog = S.Pool[i];
uint32_t nNewGet = pFrameNext->nLogStart[i];
if(pLog && nNewGet != (uint32_t)-1)
{
pLog->nGet.store(nNewGet);
}
}
if(pFrameNext->nLogStartTimeline != (uint32_t)-1)
{
S.TimelineLog.nGet.store(pFrameNext->nLogStartTimeline);
}
}
if(nAggregateFlip)
{
memcpy(&S.Aggregate[0], &S.AccumTimers[0], sizeof(S.Aggregate[0]) * S.nTotalTimers);
memcpy(&S.AggregateMax[0], &S.AccumMaxTimers[0], sizeof(S.AggregateMax[0]) * S.nTotalTimers);
memcpy(&S.AggregateMin[0], &S.AccumMinTimers[0], sizeof(S.AggregateMin[0]) * S.nTotalTimers);
memcpy(&S.AggregateExclusive[0], &S.AccumTimersExclusive[0], sizeof(S.AggregateExclusive[0]) * S.nTotalTimers);
memcpy(&S.AggregateMaxExclusive[0], &S.AccumMaxTimersExclusive[0], sizeof(S.AggregateMaxExclusive[0]) * S.nTotalTimers);
memcpy(&S.AggregateGroup[0], &S.AccumGroup[0], sizeof(S.AggregateGroup));
memcpy(&S.AggregateGroupMax[0], &S.AccumGroupMax[0], sizeof(S.AggregateGroup));
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
{
MicroProfileThreadLog* pLog = S.Pool[i];
if(!pLog)
continue;
memcpy(&pLog->nAggregateGroupTicks[0], &pLog->nGroupTicks[0], sizeof(pLog->nAggregateGroupTicks));
if(nAggregateClear)
{
memset(&pLog->nGroupTicks[0], 0, sizeof(pLog->nGroupTicks));
}
}
S.nAggregateFrames = S.nAggregateFlipCount;
S.nFlipAggregateDisplay = S.nFlipAggregate;
S.nFlipMaxDisplay = S.nFlipMax;
if(nAggregateClear)
{
memset(&S.AccumTimers[0], 0, sizeof(S.Aggregate[0]) * S.nTotalTimers);
memset(&S.AccumMaxTimers[0], 0, sizeof(S.AccumMaxTimers[0]) * S.nTotalTimers);
memset(&S.AccumMinTimers[0], 0xFF, sizeof(S.AccumMinTimers[0]) * S.nTotalTimers);
memset(&S.AccumTimersExclusive[0], 0, sizeof(S.AggregateExclusive[0]) * S.nTotalTimers);
memset(&S.AccumMaxTimersExclusive[0], 0, sizeof(S.AccumMaxTimersExclusive[0]) * S.nTotalTimers);
memset(&S.AccumGroup[0], 0, sizeof(S.AggregateGroup));
memset(&S.AccumGroupMax[0], 0, sizeof(S.AggregateGroup));
S.nAggregateFlipCount = 0;
S.nFlipAggregate = 0;
S.nFlipMax = 0;
S.nAggregateFlipTick = MP_TICK();
}
#if MICROPROFILE_COUNTER_HISTORY
int64_t* pDest = &S.nCounterHistory[S.nCounterHistoryPut][0];
S.nCounterHistoryPut = (S.nCounterHistoryPut + 1) % MICROPROFILE_GRAPH_HISTORY;
for(uint32_t i = 0; i < S.nNumCounters; ++i)
{
if(0 != (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DETAILED))
{
MicroProfileFetchCounter(i);
bool IsDouble = (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) != 0;
if(IsDouble)
{
double dValue = S.CountersDouble[i].load(std::memory_order_relaxed);
memcpy(&pDest[i], &dValue, sizeof(dValue));
S.dCounterMin[i] = MicroProfileMin(S.dCounterMin[i], dValue);
S.dCounterMax[i] = MicroProfileMax(S.dCounterMax[i], dValue);
}
else
{
uint64_t nValue = S.Counters[i].load(std::memory_order_relaxed);
pDest[i] = nValue;
S.nCounterMin[i] = MicroProfileMin(S.nCounterMin[i], (int64_t)nValue);
S.nCounterMax[i] = MicroProfileMax(S.nCounterMax[i], (int64_t)nValue);
}
}
}
#endif
}
S.nAggregateClear = 0;
MicroProfileFlipEnabled();
}
void MicroProfileSetEnableAllGroups(int bEnable)
{
if(bEnable)
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
{
S.nActiveGroupsWanted[i] = S.nGroupMask[i];
}
S.nStartEnabled = 1;
MicroProfileFlipEnabled();
}
else
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
{
S.nActiveGroupsWanted[i] = 0;
}
S.nStartEnabled = 0;
MicroProfileFlipEnabled();
}
}
void MicroProfileEnableCategory(const char* pCategory, int bEnabled)
{
int nCategoryIndex = -1;
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
{
if(!MP_STRCASECMP(pCategory, S.CategoryInfo[i].pName))
{
nCategoryIndex = (int)i;
break;
}
}
if(nCategoryIndex >= 0)
{
if(bEnabled)
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
{
S.nActiveGroupsWanted[i] |= S.CategoryInfo[nCategoryIndex].nGroupMask[i];
}
}
else
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
{
S.nActiveGroupsWanted[i] &= ~S.CategoryInfo[nCategoryIndex].nGroupMask[i];
}
}
}
}
void MicroProfileEnableCategory(const char* pCategory)
{
MicroProfileEnableCategory(pCategory, true);
}
void MicroProfileDisableCategory(const char* pCategory)
{
MicroProfileEnableCategory(pCategory, false);
}
int MicroProfileGetEnableAllGroups()
{
return 0 == memcmp(S.nGroupMask, S.nActiveGroupsWanted, sizeof(S.nGroupMask));
}
void MicroProfileSetForceMetaCounters(int bForce)
{
}
int MicroProfileGetForceMetaCounters()
{
return 0;
}
void MicroProfileEnableMetaCounter(const char* pMeta)
{
}
void MicroProfileDisableMetaCounter(const char* pMeta)
{
}
void MicroProfileSetAggregateFrames(int nFrames)
{
S.nAggregateFlip = (uint32_t)nFrames;
if(0 == nFrames)
{
S.nAggregateClear = 1;
}
}
int MicroProfileGetAggregateFrames()
{
return S.nAggregateFlip;
}
int MicroProfileGetCurrentAggregateFrames()
{
return int(S.nAggregateFlip ? S.nAggregateFlip : S.nAggregateFlipCount);
}
void MicroProfileForceEnableGroup(const char* pGroup, MicroProfileTokenType Type)
{
MicroProfileInit();
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
uint16_t nGroup = MicroProfileGetGroup(pGroup, Type);
uint32_t nIndex = nGroup / 32;
uint32_t nBit = nGroup % 32;
S.nForceGroups[nIndex] |= (1ll << nBit);
}
void MicroProfileForceDisableGroup(const char* pGroup, MicroProfileTokenType Type)
{
MicroProfileInit();
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
uint16_t nGroup = MicroProfileGetGroup(pGroup, Type);
uint32_t nIndex = nGroup / 32;
uint32_t nBit = nGroup % 32;
S.nForceGroups[nIndex] &= ~(1ll << nBit);
}
struct MicroProfileTimerValues
{
float TimeMs;
float AverageMs;
float MaxMs;
float MinMs;
float CallAverageMs;
float ExclusiveMs;
float AverageExclusiveMs;
float MaxExclusiveMs;
float TotalMs;
uint32_t nCount;
};
void MicroProfileCalcTimers(int nTimer, MicroProfileTimerValues& Out)
{
const uint32_t nGroupId = S.TimerInfo[nTimer].nGroupIndex;
const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1;
Out.nCount = S.Aggregate[nTimer].nCount;
float fToPrc = S.fRcpReferenceTime;
float fMs = fToMs * (S.Frame[nTimer].nTicks);
float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames);
float fMaxMs = fToMs * (S.AggregateMax[nTimer]);
float fMinMs = fToMs * (S.AggregateMin[nTimer] != uint64_t(-1) ? S.AggregateMin[nTimer] : 0);
float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount);
float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]);
float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames);
float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]);
float fTotalMs = fToMs * S.Aggregate[nTimer].nTicks;
Out.TimeMs = fMs;
Out.AverageMs = fAverageMs;
Out.MaxMs = fMaxMs;
Out.MinMs = fMinMs;
Out.CallAverageMs = fCallAverageMs;
Out.ExclusiveMs = fMsExclusive;
Out.AverageExclusiveMs = fAverageMsExclusive;
Out.MaxExclusiveMs = fMaxMsExclusive;
Out.TotalMs = fTotalMs;
}
void MicroProfileCalcAllTimers(
float* pTimers, float* pAverage, float* pMax, float* pMin, float* pCallAverage, float* pExclusive, float* pAverageExclusive, float* pMaxExclusive, float* pTotal, uint32_t nSize)
{
for(uint32_t i = 0; i < S.nTotalTimers && i < nSize; ++i)
{
const uint32_t nGroupId = S.TimerInfo[i].nGroupIndex;
const float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupId].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
uint32_t nTimer = i;
uint32_t nIdx = i * 2;
uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
uint32_t nAggregateCount = S.Aggregate[nTimer].nCount ? S.Aggregate[nTimer].nCount : 1;
float fToPrc = S.fRcpReferenceTime;
float fMs = fToMs * (S.Frame[nTimer].nTicks);
float fPrc = MicroProfileMin(fMs * fToPrc, 1.f);
float fAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateFrames);
float fAveragePrc = MicroProfileMin(fAverageMs * fToPrc, 1.f);
float fMaxMs = fToMs * (S.AggregateMax[nTimer]);
float fMaxPrc = MicroProfileMin(fMaxMs * fToPrc, 1.f);
float fMinMs = fToMs * (S.AggregateMin[nTimer] != uint64_t(-1) ? S.AggregateMin[nTimer] : 0);
float fMinPrc = MicroProfileMin(fMinMs * fToPrc, 1.f);
float fCallAverageMs = fToMs * (S.Aggregate[nTimer].nTicks / nAggregateCount);
float fCallAveragePrc = MicroProfileMin(fCallAverageMs * fToPrc, 1.f);
float fMsExclusive = fToMs * (S.FrameExclusive[nTimer]);
float fPrcExclusive = MicroProfileMin(fMsExclusive * fToPrc, 1.f);
float fAverageMsExclusive = fToMs * (S.AggregateExclusive[nTimer] / nAggregateFrames);
float fAveragePrcExclusive = MicroProfileMin(fAverageMsExclusive * fToPrc, 1.f);
float fMaxMsExclusive = fToMs * (S.AggregateMaxExclusive[nTimer]);
float fMaxPrcExclusive = MicroProfileMin(fMaxMsExclusive * fToPrc, 1.f);
float fTotalMs = fToMs * S.Aggregate[nTimer].nTicks;
pTimers[nIdx] = fMs;
pTimers[nIdx + 1] = fPrc;
pAverage[nIdx] = fAverageMs;
pAverage[nIdx + 1] = fAveragePrc;
pMax[nIdx] = fMaxMs;
pMax[nIdx + 1] = fMaxPrc;
pMin[nIdx] = fMinMs;
pMin[nIdx + 1] = fMinPrc;
pCallAverage[nIdx] = fCallAverageMs;
pCallAverage[nIdx + 1] = fCallAveragePrc;
pExclusive[nIdx] = fMsExclusive;
pExclusive[nIdx + 1] = fPrcExclusive;
pAverageExclusive[nIdx] = fAverageMsExclusive;
pAverageExclusive[nIdx + 1] = fAveragePrcExclusive;
pMaxExclusive[nIdx] = fMaxMsExclusive;
pMaxExclusive[nIdx + 1] = fMaxPrcExclusive;
pTotal[nIdx] = fTotalMs;
pTotal[nIdx + 1] = 0.f;
}
}
float MicroProfileGetTime(const char* pGroup, const char* pName)
{
MicroProfileToken nToken = MicroProfileFindTokenInternal(pGroup, pName);
if(nToken == MICROPROFILE_INVALID_TOKEN)
{
return 0.f;
}
uint32_t nTimerIndex = MicroProfileGetTimerIndex(nToken);
uint32_t nGroupIndex = MicroProfileGetGroupIndex(nToken);
float fToMs = MicroProfileTickToMsMultiplier(S.GroupInfo[nGroupIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTicksPerSecondGpu() : MicroProfileTicksPerSecondCpu());
return S.Frame[nTimerIndex].nTicks * fToMs;
}
int MicroProfilePlatformMarkersGetEnabled()
{
return S.nPlatformMarkersEnabled != 0 ? 1 : 0;
}
void MicroProfilePlatformMarkersSetEnabled(int bEnabled)
{
S.nPlatformMarkersEnabled = bEnabled ? 1 : 0;
}
#define MICROPROFILE_CONTEXT_SWITCH_SEARCH_DEBUG MICROPROFILE_DEBUG
void MicroProfileContextSwitchSearch(uint32_t* pContextSwitchStart, uint32_t* pContextSwitchEnd, uint64_t nBaseTicksCpu, uint64_t nBaseTicksEndCpu)
{
MICROPROFILE_SCOPE(g_MicroProfileContextSwitchSearch);
uint32_t nContextSwitchPut = S.nContextSwitchPut;
uint64_t nContextSwitchStart, nContextSwitchEnd;
nContextSwitchStart = nContextSwitchEnd = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
int64_t nSearchEnd = nBaseTicksEndCpu + MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu());
int64_t nSearchBegin = nBaseTicksCpu - MicroProfileMsToTick(30.f, MicroProfileTicksPerSecondCpu());
#if MICROPROFILE_CONTEXT_SWITCH_SEARCH_DEBUG
int64_t lp = S.nContextSwitchLastPushed;
uprintf("cswitch-search\n");
uprintf("Begin %" PRId64 " End %" PRId64 " Last %" PRId64 "\n", nSearchBegin, nSearchEnd, lp);
float fToMs = MicroProfileTickToMsMultiplierCpu();
uprintf("E %6.2fms\n", fToMs * (nSearchEnd - nSearchBegin));
uprintf("LAST %6.2fms\n", fToMs * (lp - nSearchBegin));
#endif
int64_t nMax = INT64_MIN;
int64_t nMin = INT64_MAX;
for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; ++i)
{
uint32_t nIndex = (nContextSwitchPut + MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE - (i + 1)) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE;
MicroProfileContextSwitch& CS = S.ContextSwitch[nIndex];
if(nMax < CS.nTicks)
nMax = CS.nTicks;
if(nMin > CS.nTicks && CS.nTicks != 0)
nMin = CS.nTicks;
if(CS.nTicks > nSearchEnd)
{
nContextSwitchEnd = nIndex;
}
if(CS.nTicks > nSearchBegin)
{
nContextSwitchStart = nIndex;
}
}
*pContextSwitchStart = nContextSwitchStart;
*pContextSwitchEnd = nContextSwitchEnd;
#if MICROPROFILE_CONTEXT_SWITCH_SEARCH_DEBUG
{
uprintf("contextswitch start %" PRId64 " %" PRId64 "\n", nContextSwitchStart, nContextSwitchEnd);
MicroProfileContextSwitch& CS0 = S.ContextSwitch[0];
int64_t nMax = CS0.nTicks;
int64_t nMin = CS0.nTicks;
int64_t nBegin = 0;
int64_t nEnd = 0;
int nRanges = 0;
for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; i += 1024)
{
int64_t MinTick = INT64_MAX;
int64_t MaxTick = INT64_MIN;
for(int j = 0; j < 1024; ++j)
{
MicroProfileContextSwitch& CS = S.ContextSwitch[i + j];
int64_t nTicks = CS.nTicks;
MinTick = MicroProfileMin(nTicks, MinTick);
MaxTick = MicroProfileMax(nTicks, MaxTick);
}
uprintf("XX range [%5" PRIx64 ":%5" PRIx64 "] :: [%6.2f:%6.2f] [%p :: %p] .. ref %p\n",
i,
i + 1024,
fToMs * (MinTick - nSearchBegin),
fToMs * (MaxTick - nSearchBegin),
(void*)MinTick,
(void*)MaxTick,
(void*)nSearchBegin
);
}
uprintf("\n\n");
for(uint32_t i = 0; i < MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE; ++i)
{
MicroProfileContextSwitch& CS = S.ContextSwitch[i];
int64_t nTicks = CS.nTicks;
float fMs = (nTicks - nMax) * fToMs;
if(fMs < 0 || fMs > 50)
{
// dump range here
uprintf("range [%5" PRId64 ":%5" PRId64 "] :: [%6.2f:%6.2f] [%p :: %p] .. ref %p\n",
nBegin,
nEnd,
fToMs * (nMin - nSearchBegin),
fToMs * (nMax - nSearchBegin),
(void*)nMin,
(void*)nMax,
(void*)nSearchBegin
);
nEnd = nBegin = i;
nMax = nMin = CS.nTicks;
nRanges++;
}
else
{
nEnd = i;
nMax = MicroProfileMax(nTicks, nMax);
}
}
}
lp = S.nContextSwitchLastPushed;
uprintf("E %6.2fms\n", fToMs * (nSearchEnd - nSearchBegin));
uprintf("LP2 %6.2fms\n", fToMs * (lp - nSearchBegin));
#endif
}
int MicroProfileFormatCounter(int eFormat, int64_t nCounter, char* pOut, uint32_t nBufferSize)
{
if(!nCounter)
{
pOut[0] = '0';
pOut[1] = '\0';
return 1;
}
int nLen = 0;
char* pBase = pOut;
char* pTmp = pOut;
char* pEnd = pOut + nBufferSize;
int nNegative = 0;
if(nCounter < 0)
{
nCounter = -nCounter;
nNegative = 1;
if(nCounter < 0) // handle INT_MIN
{
nCounter = -(nCounter + 1);
}
}
switch(eFormat)
{
case MICROPROFILE_COUNTER_FORMAT_DEFAULT:
{
int nSeperate = 0;
while(nCounter)
{
if(nSeperate)
{
*pTmp++ = '.';
}
nSeperate = 1;
for(uint32_t i = 0; nCounter && i < 3; ++i)
{
int nDigit = nCounter % 10;
nCounter /= 10;
*pTmp++ = '0' + nDigit;
}
}
if(nNegative)
{
*pTmp++ = '-';
}
nLen = pTmp - pOut;
--pTmp;
MP_ASSERT(pTmp <= pEnd);
while(pTmp > pOut) // reverse string
{
char c = *pTmp;
*pTmp = *pOut;
*pOut = c;
pTmp--;
pOut++;
}
}
break;
case MICROPROFILE_COUNTER_FORMAT_BYTES:
{
const char* pExt[] = { "b", "kb", "mb", "gb", "tb", "pb", "eb", "zb", "yb" };
size_t nNumExt = sizeof(pExt) / sizeof(pExt[0]);
int64_t nShift = 0;
int64_t nDivisor = 1;
int64_t nCountShifted = nCounter >> 10;
while(nCountShifted)
{
nDivisor <<= 10;
nCountShifted >>= 10;
nShift++;
}
MP_ASSERT(nShift < (int64_t)nNumExt);
if(nShift)
{
nLen = snprintf(pOut, nBufferSize - 1, "%c%3.2f%s", nNegative ? '-' : ' ', (double)nCounter / nDivisor, pExt[nShift]);
}
else
{
nLen = snprintf(pOut, nBufferSize - 1, "%c%" PRId64 "%s", nNegative ? '-' : ' ', nCounter, pExt[nShift]);
}
}
break;
}
pBase[nLen] = '\0';
return nLen;
}
int MicroProfileFormatCounterDouble(int eFormat, double dCounter, char* pOut, uint32_t nBufferSize)
{
int nLen = 0;
switch(eFormat)
{
case MICROPROFILE_COUNTER_FORMAT_DEFAULT:
{
nLen = stbsp_snprintf(pOut, nBufferSize - 1, "%f", dCounter);
}
break;
case MICROPROFILE_COUNTER_FORMAT_BYTES:
{
const char* pExt[] = { "b", "kb", "mb", "gb", "tb", "pb", "eb", "zb", "yb" };
double scale = 1.f;
int offset = 0;
int end = sizeof(pExt) / sizeof(pExt[0]);
double d = dCounter;
while(d / scale > 1024.f && offset + 1 < end)
{
scale *= 1024.f;
offset += 1;
}
nLen = stbsp_snprintf(pOut, nBufferSize - 1, "%.3f%s", d / scale, pExt[offset]);
}
break;
}
pOut[nLen] = '\0';
return nLen;
}
bool MicroProfileAnyGroupActive()
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
{
if(S.nActiveGroups[i] != 0)
return true;
}
return false;
}
bool MicroProfileGroupActive(uint32_t nGroupIndex)
{
MP_ASSERT(nGroupIndex < MICROPROFILE_MAX_GROUPS);
uint32_t nIndex = nGroupIndex / 32;
uint32_t nBit = nGroupIndex % 32;
return ((S.nActiveGroups[nIndex] >> nBit) & 1) == 1;
}
void MicroProfileToggleGroup(uint32_t nGroup)
{
if(nGroup < S.nGroupCount)
{
uint32_t nIndex = nGroup / 32;
uint32_t nBit = nGroup % 32;
S.nActiveGroupsWanted[nIndex] ^= (1ll << nBit);
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
}
}
void MicroProfileGroupSetEnabled(uint32_t nGroup)
{
if(nGroup < S.nGroupCount)
{
uint32_t nIndex = nGroup / 32;
uint32_t nBit = nGroup % 32;
S.nActiveGroupsWanted[nIndex] |= (1ll << nBit);
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
}
}
bool MicroProfileGroupEnabled(uint32_t nGroup)
{
if(nGroup < S.nGroupCount)
{
uint32_t nIndex = nGroup / 32;
uint32_t nBit = nGroup % 32;
return 0 != (S.nActiveGroupsWanted[nIndex] & (1ll << nBit));
}
return false;
}
bool MicroProfileCategoryEnabled(uint32_t nCategory)
{
if(nCategory < S.nCategoryCount)
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
{
if(S.CategoryInfo[nCategory].nGroupMask[i] != (S.CategoryInfo[nCategory].nGroupMask[i] & S.nActiveGroupsWanted[i]))
{
return false;
}
}
return true;
}
return false;
}
bool MicroProfileCategoryDisabled(uint32_t nCategory)
{
if(nCategory < S.nCategoryCount)
{
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
{
uint32_t ActiveMask = S.nActiveGroupsWanted[i];
uint32_t CategoryMask = S.CategoryInfo[nCategory].nGroupMask[i];
if(0 != (ActiveMask & CategoryMask))
{
return false;
}
}
return true;
}
return false;
}
void MicroProfileToggleCategory(uint32_t nCategory)
{
if(nCategory < S.nCategoryCount)
{
bool bAllSet = true;
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
{
bAllSet = bAllSet && S.CategoryInfo[nCategory].nGroupMask[i] == (S.CategoryInfo[nCategory].nGroupMask[i] & S.nActiveGroupsWanted[i]);
}
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUP_INTS; ++i)
{
if(bAllSet)
{
S.nActiveGroupsWanted[i] &= ~S.CategoryInfo[nCategory].nGroupMask[i];
}
else
{
S.nActiveGroupsWanted[i] |= S.CategoryInfo[nCategory].nGroupMask[i];
}
}
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
}
}
void MicroProfileSleep(uint32_t nMs)
{
#ifdef _WIN32
Sleep(nMs);
#else
usleep(nMs * 1000);
#endif
}
#if MICROPROFILE_WEBSERVER
#define MICROPROFILE_EMBED_HTML
extern const char* g_MicroProfileHtml_begin[];
extern size_t g_MicroProfileHtml_begin_sizes[];
extern size_t g_MicroProfileHtml_begin_count;
extern const char* g_MicroProfileHtml_end[];
extern size_t g_MicroProfileHtml_end_sizes[];
extern size_t g_MicroProfileHtml_end_count;
extern const char* g_MicroProfileHtmlLive_begin[];
extern size_t g_MicroProfileHtmlLive_begin_sizes[];
extern size_t g_MicroProfileHtmlLive_begin_count;
extern const char* g_MicroProfileHtmlLive_end[];
extern size_t g_MicroProfileHtmlLive_end_sizes[];
extern size_t g_MicroProfileHtmlLive_end_count;
extern const uint32_t uprof_16[];
extern const uint32_t uprof_16_len;
extern const uint32_t uprof_32[];
extern const uint32_t uprof_32_len;
extern const uint32_t uprof_192[];
extern const uint32_t uprof_192_len;
extern const uint32_t uprof_512[];
extern const uint32_t uprof_512_len;
typedef void (*MicroProfileWriteCallback)(void* Handle, size_t size, const char* pData);
uint32_t MicroProfileWebServerPort()
{
return S.nWebServerPort;
}
void MicroProfileSetWebServerPort(uint32_t nPort)
{
if(S.nWebServerPort != nPort)
{
MicroProfileWebServerJoin();
MicroProfileWebServerStop();
S.nWebServerPort = nPort;
S.nWebServerDataSent = (uint64_t)-1; // Will cause the web server and its thread to be restarted next time MicroProfileFlip() is called.
}
}
void MicroProfileDumpFileImmediately(const char* pHtml, const char* pCsv, void* pGpuContext, uint32_t FrameCount)
{
for(uint32_t i = 0; i < 2; ++i)
{
MicroProfileFlip(pGpuContext);
}
for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY + 1; ++i)
{
MicroProfileFlip(pGpuContext);
}
uint32_t nDumpMask = 0;
if(pHtml)
{
size_t nLen = strlen(pHtml);
if(nLen > sizeof(S.HtmlDumpPath) - 1)
{
return;
}
const size_t ExtSize = sizeof(".html") - 1;
if(nLen > ExtSize && 0 == memcmp(".html", pHtml + nLen - ExtSize, ExtSize))
nLen -= ExtSize;
memcpy(S.HtmlDumpPath, pHtml, nLen);
S.HtmlDumpPath[nLen] = '\0';
nDumpMask |= 1;
}
if(pCsv)
{
size_t nLen = strlen(pCsv);
if(nLen > sizeof(S.CsvDumpPath) - 1)
{
return;
}
const size_t ExtSize = sizeof(".csv") - 1;
if(nLen > ExtSize && 0 == memcmp(".csv", pCsv + nLen - ExtSize, ExtSize))
nLen -= ExtSize;
memcpy(S.CsvDumpPath, pCsv, nLen + 1);
S.CsvDumpPath[nLen] = '\0';
nDumpMask |= 2;
}
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
S.nDumpFileNextFrame = nDumpMask;
S.nDumpSpikeMask = 0;
S.nDumpFileCountDown = 0;
S.DumpFrameCount = FrameCount;
MicroProfileDumpToFile();
}
void MicroProfileDumpFile(const char* pHtml, const char* pCsv, float fCpuSpike, float fGpuSpike, uint32_t FrameCount)
{
S.fDumpCpuSpike = fCpuSpike;
S.fDumpGpuSpike = fGpuSpike;
S.DumpFrameCount = FrameCount;
uint32_t nDumpMask = 0;
if(pHtml)
{
size_t nLen = strlen(pHtml);
if(nLen > sizeof(S.HtmlDumpPath) - 1)
{
return;
}
const size_t ExtSize = sizeof(".html") - 1;
if(nLen > ExtSize && 0 == memcmp(".html", pHtml + nLen - ExtSize, ExtSize))
nLen -= ExtSize;
memcpy(S.HtmlDumpPath, pHtml, nLen);
S.HtmlDumpPath[nLen] = '\0';
nDumpMask |= 1;
}
if(pCsv)
{
size_t nLen = strlen(pCsv);
if(nLen > sizeof(S.CsvDumpPath) - 1)
{
return;
}
const size_t ExtSize = sizeof(".csv") - 1;
if(nLen > ExtSize && 0 == memcmp(".csv", pCsv + nLen - ExtSize, ExtSize))
nLen -= ExtSize;
memcpy(S.CsvDumpPath, pCsv, nLen);
S.CsvDumpPath[nLen] = '\0';
nDumpMask |= 2;
}
if(fCpuSpike > 0.f || fGpuSpike > 0.f)
{
S.nDumpFileNextFrame = 0;
S.nDumpSpikeMask = nDumpMask;
}
else
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
S.nDumpFileNextFrame = nDumpMask;
S.nDumpSpikeMask = 0;
S.nDumpFileCountDown = 0;
MicroProfileDumpToFile();
}
}
struct MicroProfilePrintfArgs
{
MicroProfileWriteCallback CB;
void* Handle;
};
char* MicroProfilePrintfCallback(const char* buf, void* user, int len)
{
MicroProfilePrintfArgs* A = (MicroProfilePrintfArgs*)user;
(A->CB)(A->Handle, len, buf);
return const_cast<char*>(buf);
};
void MicroProfilePrintf(MicroProfileWriteCallback CB, void* Handle, const char* pFmt, ...)
{
va_list args;
va_start(args, pFmt);
MicroProfilePrintfArgs A;
A.CB = CB;
A.Handle = Handle;
char Buffer[STB_SPRINTF_MIN];
int size = stbsp_vsprintfcb(MicroProfilePrintfCallback, (void*)&A, Buffer, pFmt, args);
(void)size;
va_end(args);
}
void MicroProfileGetFramesToDump(uint64_t nStartFrameId, uint32_t nMaxFrames, uint32_t& nFirstFrame, uint32_t& nLastFrame, uint32_t& nNumFrames)
{
nFirstFrame = (uint32_t)-1;
nNumFrames = 0;
if(nStartFrameId != (uint64_t)-1)
{
// search for the frane
for(uint32_t i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
{
if(S.Frames[i].nFrameId == nStartFrameId)
{
nFirstFrame = i;
break;
}
}
if(nFirstFrame != (uint32_t)-1)
{
nLastFrame = S.nFrameCurrent;
uint32_t nDistance = (MICROPROFILE_MAX_FRAME_HISTORY + nFirstFrame - nLastFrame) % MICROPROFILE_MAX_FRAME_HISTORY;
nNumFrames = MicroProfileMin(nDistance, (uint32_t)nMaxFrames);
}
}
if(nNumFrames == 0)
{
nNumFrames = (MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3); // leave a few to not overwrite
nNumFrames = MicroProfileMin(nNumFrames, (uint32_t)nMaxFrames);
nFirstFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
}
nLastFrame = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
}
#define printf(...) MicroProfilePrintf(CB, Handle, __VA_ARGS__)
void MicroProfileDumpCsvWithConfig(MicroProfileWriteCallback CB, void* Handle, uint32_t nFirstFrame, uint32_t nLastFrame, uint32_t nNumFrames)
{
uint32_t NumTimers = S.CsvConfig.NumTimers;
uint32_t NumGroups = S.CsvConfig.NumGroups;
uint32_t NumCounters = S.CsvConfig.NumCounters;
uint16_t* TimerIndices = S.CsvConfig.TimerIndices;
uint16_t* GroupIndices = S.CsvConfig.GroupIndices;
uint64_t* FrameData = S.CsvConfig.FrameData;
uint16_t* CounterIndices = S.CsvConfig.CounterIndices;
uint32_t TotalElements = S.CsvConfig.TotalElements;
uint32_t Offset = 0;
bool UseFrameTime = 0 != (MICROPROFILE_CSV_FLAG_FRAME_TIME & S.CsvConfig.Flags);
const char** pTimerNames = S.CsvConfig.pTimerNames;
const char** pGroupNames = S.CsvConfig.pGroupNames;
const char** pCounterNames = S.CsvConfig.pCounterNames;
if(UseFrameTime)
printf("Time");
else
printf("FrameNumber");
for(uint32_t i = 0; i < NumTimers; ++i, ++Offset)
printf(", %s", pTimerNames[i] ? pTimerNames[i] : S.TimerInfo[TimerIndices[i]].pName);
for(uint32_t i = 0; i < NumGroups; ++i, ++Offset)
printf(", %s", pGroupNames[i] ? pGroupNames[i] : S.GroupInfo[GroupIndices[i]].pName);
for(uint32_t i = 0; i < NumCounters; ++i, ++Offset)
printf(", %s", pCounterNames[i] ? pCounterNames[i] : S.CounterInfo[CounterIndices[i]].pName);
printf("\n");
float* fToMsTimer = (float*)alloca(sizeof(float) * NumTimers);
float* fToMsGroup = (float*)alloca(sizeof(float) * NumGroups);
float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
for(uint32_t i = 0; i < NumTimers; ++i)
fToMsTimer[i] = S.TimerInfo[TimerIndices[i]].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
for(uint32_t i = 0; i < NumGroups; ++i)
fToMsGroup[i] = S.GroupInfo[GroupIndices[i]].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
uint64_t TickStart = S.Frames[nFirstFrame % MICROPROFILE_MAX_FRAME_HISTORY].nFrameStartCpu;
for(uint32_t i = 0; i < nNumFrames; ++i)
{
uint32_t FrameIndex = ((nFirstFrame + i) % MICROPROFILE_MAX_FRAME_HISTORY);
uint64_t TickFrame = S.Frames[FrameIndex].nFrameStartCpu;
uint64_t* Data = FrameData + TotalElements * FrameIndex;
if(UseFrameTime)
printf("%f", (TickFrame - TickStart) * fToMsCPU);
else
printf("%d", i);
Offset = 0;
for(uint32_t j = 0; j < NumTimers; ++j)
printf(", %f", Data[Offset++] * fToMsTimer[j]);
for(uint32_t j = 0; j < NumGroups; ++j)
printf(", %f", Data[Offset++] * fToMsGroup[j]);
for(uint32_t j = 0; j < NumCounters; ++j)
{
if(S.CounterInfo[CounterIndices[j]].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE)
{
printf(", %f", ((double*)Data)[Offset++]);
}
else
{
printf(", %lld", Data[Offset++]);
}
}
printf("\n");
}
}
void MicroProfileDumpCsvTimerFrames(MicroProfileWriteCallback CB, void* Handle, uint32_t nFirstFrame, uint32_t nLastFrame, uint32_t nNumFrames)
{
MP_ASSERT(S.FrameExtraCounterData);
uint32_t TotalTimers = S.nTotalTimers;
float* fToMs = (float*)alloca(sizeof(float) * TotalTimers);
float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
for(uint32_t i = 0; i < TotalTimers; ++i)
fToMs[i] = S.TimerInfo[i].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
for(uint32_t i = 0; i < TotalTimers; ++i)
{
printf(i == 0 ? "FrameNumber, \"%s\"" : ",\"%s\"", S.TimerInfo[i].pName);
}
printf("\n");
for(uint32_t i = 0; i < nNumFrames; ++i)
{
// printf("%d", i) MicroProfileFrame& F = S.Frames[(i + nFirstFrame) % MICROPROFILE_MAX_FRAME_HISTORY];
MicroProfileFrameExtraCounterData* Data = S.FrameExtraCounterData;
uint32_t NumTimers = 0;
uint32_t j;
printf("%d", i);
Data += ((i + nFirstFrame) % MICROPROFILE_MAX_FRAME_HISTORY);
NumTimers = MicroProfileMin(TotalTimers, (uint32_t)Data->NumTimers);
for(j = 0; j < NumTimers; ++j)
{
printf(",%f", Data->Timers[j] * fToMs[j]);
}
for(; j < TotalTimers; ++j)
printf(",0");
printf("\n");
}
}
void MicroProfileDumpCsvGroupFrames(MicroProfileWriteCallback CB, void* Handle, uint32_t nFirstFrame, uint32_t nLastFrame, uint32_t nNumFrames)
{
MP_ASSERT(S.FrameExtraCounterData);
float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
uint32_t nGroupCount = S.nGroupCount;
float* fToMs = (float*)alloca(sizeof(float) * nGroupCount);
for(uint32_t i = 0; i < nGroupCount; ++i)
fToMs[i] = S.GroupInfo[i].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
for(uint32_t i = 0; i < nGroupCount; ++i)
{
printf(i == 0 ? "FrameNumber, \"%s\"" : ",\"%s\"", S.GroupInfo[i].pName);
}
printf("\n");
for(uint32_t i = 0; i < nNumFrames; ++i)
{
MicroProfileFrameExtraCounterData* Data = S.FrameExtraCounterData;
uint32_t NumGroups = 0;
uint32_t j;
printf("%d", i);
Data += ((i + nFirstFrame) % MICROPROFILE_MAX_FRAME_HISTORY);
NumGroups = MicroProfileMin(nGroupCount, (uint32_t)Data->NumGroups);
for(j = 0; j < NumGroups; ++j)
{
printf(",%f", Data->Groups[j] * fToMs[j]);
}
for(; j < nGroupCount; ++j)
printf(",0");
printf("\n");
}
}
void MicroProfileDumpCsv(uint32_t nDumpFrameCount)
{
uint32_t nNumFrames, nFirstFrame, nLastFrame;
MicroProfileGetFramesToDump((uint64_t)-1, nDumpFrameCount, nFirstFrame, nLastFrame, nNumFrames);
char Path[MICROPROFILE_MAX_PATH];
int Length;
if(S.FrameExtraCounterData)
{
Length = snprintf(Path, sizeof(S.CsvDumpPath), "%s_timer_frames.csv", S.CsvDumpPath);
if(Length > 0 && Length < MICROPROFILE_MAX_PATH)
{
FILE* F = fopen(Path, "w");
if(F)
{
MicroProfileDumpCsvTimerFrames(MicroProfileWriteFile, F, nFirstFrame, nLastFrame, nNumFrames);
fclose(F);
}
}
Length = snprintf(Path, sizeof(S.CsvDumpPath), "%s_group_frames.csv", S.CsvDumpPath);
if(Length > 0 && Length < MICROPROFILE_MAX_PATH)
{
FILE* F = fopen(Path, "w");
if(F)
{
MicroProfileDumpCsvGroupFrames(MicroProfileWriteFile, F, nFirstFrame, nLastFrame, nNumFrames);
fclose(F);
}
}
}
if(S.CsvConfig.State == MicroProfileCsvConfig::ACTIVE)
{
Length = snprintf(Path, sizeof(S.CsvDumpPath), "%s_custom.csv", S.CsvDumpPath);
if(Length > 0 && Length < MICROPROFILE_MAX_PATH)
{
FILE* F = fopen(Path, "w");
if(F)
{
MicroProfileDumpCsvWithConfig(MicroProfileWriteFile, F, nFirstFrame, nLastFrame, nNumFrames);
fclose(F);
}
}
}
}
void MicroProfileDumpCsvLegacy(MicroProfileWriteCallback CB, void* Handle)
{
uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
printf("frames,%d\n", nAggregateFrames);
printf("group,name,average,max,callaverage\n");
uint32_t nNumTimers = S.nTotalTimers;
uint32_t nBlockSize = 2 * nNumTimers;
float* pTimers = (float*)alloca(nBlockSize * 9 * sizeof(float));
float* pAverage = pTimers + nBlockSize;
float* pMax = pTimers + 2 * nBlockSize;
float* pMin = pTimers + 3 * nBlockSize;
float* pCallAverage = pTimers + 4 * nBlockSize;
float* pTimersExclusive = pTimers + 5 * nBlockSize;
float* pAverageExclusive = pTimers + 6 * nBlockSize;
float* pMaxExclusive = pTimers + 7 * nBlockSize;
float* pTotal = pTimers + 8 * nBlockSize;
MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pMin, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers);
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
{
uint32_t nIdx = i * 2;
printf("\"%s\",\"%s\",%f,%f,%f\n", S.TimerInfo[i].pName, S.GroupInfo[S.TimerInfo[i].nGroupIndex].pName, pAverage[nIdx], pMax[nIdx], pCallAverage[nIdx]);
}
printf("\n\n");
printf("group,average,max,total\n");
for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
{
const char* pGroupName = S.GroupInfo[j].pName;
float fToMs = S.GroupInfo[j].Type == MicroProfileTokenTypeGpu ? fToMsGPU : fToMsCPU;
if(pGroupName[0] != '\0')
{
printf("\"%s\",%.3f,%.3f,%.3f\n", pGroupName, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j] / nAggregateFrames, fToMs * S.AggregateGroup[j]);
}
}
printf("\n\n");
printf("group,thread,average,total\n");
for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
{
for(uint32_t i = 0; i < S.nNumLogs; ++i)
{
if(S.Pool[i])
{
const char* pThreadName = &S.Pool[i]->ThreadName[0];
// MicroProfilePrintf(CB, Handle, "var ThreadGroupTime%d = [", i);
float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
{
uint64_t nTicks = S.Pool[i]->nAggregateGroupTicks[j];
float fTime = nTicks / nAggregateFrames * fToMs;
float fTimeTotal = nTicks * fToMs;
if(fTimeTotal > 0.01f)
{
const char* pGroupName = S.GroupInfo[j].pName;
printf("\"%s\",\"%s\",%.3f,%.3f\n", pGroupName, pThreadName, fTime, fTimeTotal);
}
}
}
}
}
printf("\n\n");
printf("frametimecpu\n");
const uint32_t nCount = MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3;
const uint32_t nStart = S.nFrameCurrent;
for(uint32_t i = nCount; i > 0; i--)
{
uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
uint64_t nTicks = S.Frames[nFrameNext].nFrameStartCpu - S.Frames[nFrame].nFrameStartCpu;
printf("%f,", nTicks * fToMsCPU);
}
printf("\n");
printf("\n\n");
printf("frametimegpu\n");
for(uint32_t i = nCount; i > 0; i--)
{
uint32_t nFrame = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i) % MICROPROFILE_MAX_FRAME_HISTORY;
uint32_t nFrameNext = (nStart + MICROPROFILE_MAX_FRAME_HISTORY - i + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
uint64_t nTicks = S.Frames[nFrameNext].nFrameStartGpu - S.Frames[nFrame].nFrameStartGpu;
printf("%f,", nTicks * fToMsGPU);
}
printf("\n\n");
}
#undef printf
void MicroProfileDumpCsvLegacy()
{
char Path[MICROPROFILE_MAX_PATH];
int Length = snprintf(Path, sizeof(S.CsvDumpPath), "%s.csv", S.CsvDumpPath);
if(Length > 0 && Length < MICROPROFILE_MAX_PATH)
{
FILE* F = fopen(Path, "w");
if(F)
{
MicroProfileDumpCsvLegacy(MicroProfileWriteFile, F);
fclose(F);
}
}
}
void MicroProfileDumpHtmlLive(MicroProfileWriteCallback CB, void* Handle)
{
for(size_t i = 0; i < g_MicroProfileHtmlLive_begin_count; ++i)
{
CB(Handle, g_MicroProfileHtmlLive_begin_sizes[i] - 1, g_MicroProfileHtmlLive_begin[i]);
}
for(size_t i = 0; i < g_MicroProfileHtmlLive_end_count; ++i)
{
CB(Handle, g_MicroProfileHtmlLive_end_sizes[i] - 1, g_MicroProfileHtmlLive_end[i]);
}
}
void MicroProfileGetCoreInformation()
{
#ifdef _WIN32
unsigned long BufferSize;
HANDLE Process = GetCurrentProcess();
GetSystemCpuSetInformation(nullptr, 0, &BufferSize, Process, 0);
char* Buffer = (char*)alloca(BufferSize);
if(!GetSystemCpuSetInformation((PSYSTEM_CPU_SET_INFORMATION)Buffer, BufferSize, &BufferSize, Process, 0))
{
return;
}
for(ULONG Size = 0; Size < BufferSize;)
{
PSYSTEM_CPU_SET_INFORMATION CpuSet = reinterpret_cast<PSYSTEM_CPU_SET_INFORMATION>(Buffer);
if(CpuSet->Type == CPU_SET_INFORMATION_TYPE::CpuSetInformation)
{
if(CpuSet->CpuSet.CoreIndex < MICROPROFILE_MAX_CPU_CORES)
{
S.CoreEfficiencyClass[CpuSet->CpuSet.LogicalProcessorIndex] = CpuSet->CpuSet.EfficiencyClass;
}
}
Buffer += CpuSet->Size;
Size += CpuSet->Size;
}
#endif
}
void MicroProfileDumpHtml(MicroProfileWriteCallback CB, void* Handle, uint64_t nMaxFrames, const char* pHost, uint64_t nStartFrameId = (uint64_t)-1)
{
// Stall pushing of timers
uint64_t nActiveGroup[MICROPROFILE_MAX_GROUP_INTS];
memcpy(nActiveGroup, S.nActiveGroups, sizeof(S.nActiveGroups));
memset(S.nActiveGroups, 0, sizeof(S.nActiveGroups));
bool AnyActive = S.AnyActive;
S.AnyActive = false;
S.nPauseTicks = MP_TICK();
MicroProfileGetCoreInformation();
if(S.bContextSwitchRunning)
{
auto StallForContextSwitchThread = []()
{
int64_t nPauseTicks = S.nPauseTicks;
int64_t nContextSwitchStalledTick = S.nContextSwitchStalledTick;
return (nPauseTicks - nContextSwitchStalledTick) > 0;
};
int SleepMs = 1;
while(S.bContextSwitchRunning && !S.bContextSwitchStop && StallForContextSwitchThread())
{
MicroProfileSleep(SleepMs);
SleepMs = SleepMs * 2 / 3;
SleepMs = MicroProfileMin(128, SleepMs);
}
int64_t TicksAfterStall = MP_TICK();
uprintf("Stalled %7.2fms for context switch data\n", MicroProfileTickToMsMultiplierCpu() * (TicksAfterStall - S.nPauseTicks));
}
MicroProfileHashTable StringsHashTable;
MicroProfileHashTableInit(&StringsHashTable, 50, 25, MicroProfileHashTableCompareString, MicroProfileHashTableHashString);
defer
{
MicroProfileHashTableDestroy(&StringsHashTable);
};
MicroProfileCounterFetchCounters();
for(size_t i = 0; i < g_MicroProfileHtml_begin_count; ++i)
{
CB(Handle, g_MicroProfileHtml_begin_sizes[i] - 1, g_MicroProfileHtml_begin[i]);
}
// dump info
uint64_t nTicks = MP_TICK();
float fToMsCPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
float fToMsGPU = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
float fAggregateMs = fToMsCPU * (nTicks - S.nAggregateFlipTick);
uint32_t nNumFrames = 0;
uint32_t nFirstFrame = (uint32_t)-1;
if(nStartFrameId != (uint64_t)-1)
{
// search for the frane
for(uint32_t i = 0; i < MICROPROFILE_MAX_FRAME_HISTORY; ++i)
{
if(S.Frames[i].nFrameId == nStartFrameId)
{
nFirstFrame = i;
break;
}
}
if(nFirstFrame != (uint32_t)-1)
{
uint32_t nLastFrame = S.nFrameCurrent;
uint32_t nDistance = (MICROPROFILE_MAX_FRAME_HISTORY + nFirstFrame - nLastFrame) % MICROPROFILE_MAX_FRAME_HISTORY;
nNumFrames = MicroProfileMin(nDistance, (uint32_t)nMaxFrames);
}
}
if(nNumFrames == 0)
{
nNumFrames = (MICROPROFILE_MAX_FRAME_HISTORY - MICROPROFILE_GPU_FRAME_DELAY - 3); // leave a few to not overwrite
nNumFrames = MicroProfileMin(nNumFrames, (uint32_t)nMaxFrames);
nFirstFrame = (S.nFrameCurrent + MICROPROFILE_MAX_FRAME_HISTORY - nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
}
uint32_t nLastFrame = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
MP_ASSERT(nFirstFrame < MICROPROFILE_MAX_FRAME_HISTORY);
MP_ASSERT(nLastFrame < MICROPROFILE_MAX_FRAME_HISTORY);
MicroProfilePrintf(CB, Handle, "S.DumpHost = '%s';\n", pHost ? pHost : "");
time_t CaptureTime;
time(&CaptureTime);
MicroProfilePrintf(CB, Handle, "S.DumpUtcCaptureTime = %ld;\n", CaptureTime);
MicroProfilePrintf(CB, Handle, "S.AggregateInfo = {'Frames':%d, 'Time':%f};\n", S.nAggregateFrames, fAggregateMs);
// categories
MicroProfilePrintf(CB, Handle, "S.CategoryInfo = Array(%d);\n", S.nCategoryCount);
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
{
MicroProfilePrintf(CB, Handle, "S.CategoryInfo[%d] = \"%s\";\n", i, S.CategoryInfo[i].pName);
}
// groups
MicroProfilePrintf(CB, Handle, "S.GroupInfo = Array(%d);\n\n", S.nGroupCount + 1);
uint32_t nAggregateFrames = S.nAggregateFrames ? S.nAggregateFrames : 1;
float fRcpAggregateFrames = 1.f / nAggregateFrames;
(void)fRcpAggregateFrames;
char ColorString[32];
for(uint32_t i = 0; i < S.nGroupCount; ++i)
{
MP_ASSERT(i == S.GroupInfo[i].nGroupIndex);
float fToMs = S.GroupInfo[i].Type == MicroProfileTokenTypeCpu ? fToMsCPU : fToMsGPU;
const char* pColorStr = "";
if(S.GroupInfo[i].nColor != 0x42)
{
stbsp_snprintf(ColorString,
sizeof(ColorString) - 1,
"#%02x%02x%02x",
MICROPROFILE_UNPACK_RED(S.GroupInfo[i].nColor) & 0xff,
MICROPROFILE_UNPACK_GREEN(S.GroupInfo[i].nColor) & 0xff,
MICROPROFILE_UNPACK_BLUE(S.GroupInfo[i].nColor) & 0xff);
pColorStr = &ColorString[0];
}
MicroProfilePrintf(CB,
Handle,
"S.GroupInfo[%d] = MakeGroup(%d, \"%s\", %d, %d, %d, %f, %f, %f, '%s');\n",
S.GroupInfo[i].nGroupIndex,
S.GroupInfo[i].nGroupIndex,
S.GroupInfo[i].pName,
S.GroupInfo[i].nCategory,
S.GroupInfo[i].nNumTimers,
S.GroupInfo[i].Type == MicroProfileTokenTypeGpu ? 1 : 0,
fToMs * S.AggregateGroup[i],
fToMs * S.AggregateGroup[i] / nAggregateFrames,
fToMs * S.AggregateGroupMax[i],
pColorStr);
}
uint32_t nUncategorized = S.nGroupCount;
MicroProfilePrintf(CB,
Handle,
"S.GroupInfo[%d] = MakeGroup(%d, \"%s\", %d, %d, %d, %f, %f, %f, 'grey');\n",
nUncategorized,
nUncategorized,
"Uncategorized",
-1,
1,
// S.GroupInfo[i].Type == MicroProfileTokenTypeGpu ? 1 :
0,
0,
0,
0);
// timers
uint32_t nNumTimers = S.nTotalTimers;
uint32_t nBlockSize = 2 * nNumTimers;
float* pTimers = (float*)alloca(nBlockSize * 9 * sizeof(float));
float* pAverage = pTimers + nBlockSize;
float* pMax = pTimers + 2 * nBlockSize;
float* pMin = pTimers + 3 * nBlockSize;
float* pCallAverage = pTimers + 4 * nBlockSize;
float* pTimersExclusive = pTimers + 5 * nBlockSize;
float* pAverageExclusive = pTimers + 6 * nBlockSize;
float* pMaxExclusive = pTimers + 7 * nBlockSize;
float* pTotal = pTimers + 8 * nBlockSize;
MicroProfileCalcAllTimers(pTimers, pAverage, pMax, pMin, pCallAverage, pTimersExclusive, pAverageExclusive, pMaxExclusive, pTotal, nNumTimers);
MicroProfilePrintf(CB, Handle, "\nS.TimerInfo = Array(%d);\n\n", S.nTotalTimers);
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
{
uint32_t nIdx = i * 2;
MP_ASSERT(i == S.TimerInfo[i].nTimerIndex);
MicroProfilePrintf(CB, Handle, "S.Meta%d = [];\n", i);
MicroProfilePrintf(CB, Handle, "S.MetaAvg%d = [];\n", i);
MicroProfilePrintf(CB, Handle, "S.MetaMax%d = [];\n", i);
uint32_t nColor = S.TimerInfo[i].nColor;
uint32_t nColorDark = (nColor >> 1) & ~0x80808080;
MicroProfilePrintf(CB,
Handle,
"S.TimerInfo[%d] = MakeTimer(%d, \"%s\", %d, '#%02x%02x%02x','#%02x%02x%02x', %f, %f, %f, %f, %f, %f, %d, %f, S.Meta%d, S.MetaAvg%d, S.MetaMax%d, %d);\n",
S.TimerInfo[i].nTimerIndex,
S.TimerInfo[i].nTimerIndex,
S.TimerInfo[i].pName,
S.TimerInfo[i].nGroupIndex,
MICROPROFILE_UNPACK_RED(nColor) & 0xff,
MICROPROFILE_UNPACK_GREEN(nColor) & 0xff,
MICROPROFILE_UNPACK_BLUE(nColor) & 0xff,
MICROPROFILE_UNPACK_RED(nColorDark) & 0xff,
MICROPROFILE_UNPACK_GREEN(nColorDark) & 0xff,
MICROPROFILE_UNPACK_BLUE(nColorDark) & 0xff,
pAverage[nIdx],
pMax[nIdx],
pMin[nIdx],
pAverageExclusive[nIdx],
pMaxExclusive[nIdx],
pCallAverage[nIdx],
S.Aggregate[i].nCount,
pTotal[nIdx],
i,
i,
i,
S.TimerInfo[i].Flags);
}
uint32_t nTotalTimersExt = S.nTotalTimers;
{
for(uint32_t j = 0; j < S.nNumLogs; ++j)
{
MicroProfileThreadLog* pLog = S.Pool[j];
uint32_t nLogStart = S.Frames[nFirstFrame].nLogStart[j];
uint32_t nLogEnd = S.Frames[nLastFrame].nLogStart[j];
uint64_t nLogType;
if(nLogStart != nLogEnd)
{
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
{
uint64_t v = pLog->Log[k];
nLogType = MicroProfileLogGetType(v);
uint32_t tidx = MicroProfileLogGetTimerIndex(v);
if((nLogType == MP_LOG_ENTER || nLogType == MP_LOG_LEAVE) && tidx == ETOKEN_CSTR_PTR)
{
MP_ASSERT(k + 1 != nLogEnd);
uint64_t v1 = pLog->Log[(k + 1) % MICROPROFILE_BUFFER_SIZE];
const char* pString = (const char*)MicroProfileLogGetExtendedPayloadNoDataPtr(v1);
uintptr_t value;
if(!MicroProfileHashTableGet(&StringsHashTable, (uint64_t)pString, &value))
{
uintptr_t nTimerIndex = nTotalTimersExt++;
MicroProfileHashTableSet(&StringsHashTable, (uint64_t)pString, nTimerIndex);
MicroProfilePrintf(
CB, Handle, "S.TimerInfo.push(MakeTimer(%d, \"%s\", %d, '#000000','#000000', 0, 0, 0, 0, 0, 0, 0, 0, null, null, null, 0));\n", nTimerIndex, pString, nUncategorized);
}
}
}
}
}
}
MicroProfilePrintf(CB, Handle, "\nS.ThreadNames = [");
for(uint32_t i = 0; i < S.nNumLogs; ++i)
{
if(S.Pool[i])
{
MicroProfilePrintf(CB, Handle, "'%s',", S.Pool[i]->ThreadName);
}
else
{
MicroProfilePrintf(CB, Handle, "'Thread %d',", i);
}
}
MicroProfilePrintf(CB, Handle, "];\n\n");
MicroProfilePrintf(CB, Handle, "\nS.ISGPU = [");
for(uint32_t i = 0; i < S.nNumLogs; ++i)
{
MicroProfilePrintf(CB, Handle, "%d,", (S.Pool[i] && S.Pool[i]->nGpu) ? 1 : 0);
}
MicroProfilePrintf(CB, Handle, "];\n\n");
for(uint32_t i = 0; i < S.nNumLogs; ++i)
{
if(S.Pool[i])
{
MicroProfilePrintf(CB, Handle, "S.ThreadGroupTime%d = [", i);
float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
{
MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j] / nAggregateFrames * fToMs);
}
MicroProfilePrintf(CB, Handle, "];\n");
}
}
MicroProfilePrintf(CB, Handle, "\nS.ThreadGroupTimeArray = [");
for(uint32_t i = 0; i < S.nNumLogs; ++i)
{
if(S.Pool[i])
{
MicroProfilePrintf(CB, Handle, "S.ThreadGroupTime%d,", i);
}
}
MicroProfilePrintf(CB, Handle, "];\n");
for(uint32_t i = 0; i < S.nNumLogs; ++i)
{
if(S.Pool[i])
{
MicroProfilePrintf(CB, Handle, "S.ThreadGroupTimeTotal%d = [", i);
float fToMs = S.Pool[i]->nGpu ? fToMsGPU : fToMsCPU;
for(uint32_t j = 0; j < MICROPROFILE_MAX_GROUPS; ++j)
{
MicroProfilePrintf(CB, Handle, "%f,", S.Pool[i]->nAggregateGroupTicks[j] * fToMs);
}
MicroProfilePrintf(CB, Handle, "];\n");
}
}
MicroProfilePrintf(CB, Handle, "\nS.ThreadGroupTimeTotalArray = [");
for(uint32_t i = 0; i < S.nNumLogs; ++i)
{
if(S.Pool[i])
{
MicroProfilePrintf(CB, Handle, "S.ThreadGroupTimeTotal%d,", i);
}
}
MicroProfilePrintf(CB, Handle, "];");
MicroProfilePrintf(CB, Handle, "\nS.ThreadIds = [");
for(uint32_t i = 0; i < S.nNumLogs; ++i)
{
if(S.Pool[i])
{
MicroProfileThreadIdType ThreadId = S.Pool[i]->nThreadId;
if(!ThreadId)
{
ThreadId = (MicroProfileThreadIdType)-1;
}
MicroProfilePrintf(CB, Handle, "%" PRIu64 ",", (uint64_t)ThreadId);
}
else
{
MicroProfilePrintf(CB, Handle, "-1,");
}
}
MicroProfilePrintf(CB, Handle, "];\n\n");
for(int i = 0; i < (int)S.nNumCounters; ++i)
{
bool IsDouble = (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) != 0;
if(0 != (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DETAILED) && !IsDouble)
{
int64_t nCounterMax = S.nCounterMax[i];
int64_t nCounterMin = S.nCounterMin[i];
uint32_t nBaseIndex = S.nCounterHistoryPut;
MicroProfilePrintf(CB, Handle, "\nS.CounterHistoryArray%d =[", i);
for(uint32_t j = 0; j < MICROPROFILE_GRAPH_HISTORY; ++j)
{
uint32_t nHistoryIndex = (nBaseIndex + j) % MICROPROFILE_GRAPH_HISTORY;
int64_t nValue = MicroProfileClamp(S.nCounterHistory[nHistoryIndex][i], nCounterMin, nCounterMax);
MicroProfilePrintf(CB, Handle, "%lld,", nValue);
}
MicroProfilePrintf(CB, Handle, "];\n");
int64_t nCounterHeightBase = nCounterMax;
int64_t nCounterOffset = 0;
if(nCounterMin < 0)
{
nCounterHeightBase = nCounterMax - nCounterMin;
nCounterOffset = -nCounterMin;
}
double fRcp = nCounterHeightBase ? (1.0 / nCounterHeightBase) : 0;
MicroProfilePrintf(CB, Handle, "\nS.CounterHistoryArrayPrc%d =[", i);
for(uint32_t j = 0; j < MICROPROFILE_GRAPH_HISTORY; ++j)
{
uint32_t nHistoryIndex = (nBaseIndex + j) % MICROPROFILE_GRAPH_HISTORY;
int64_t nValue = MicroProfileClamp(S.nCounterHistory[nHistoryIndex][i], nCounterMin, nCounterMax);
float fPrc = (nValue + nCounterOffset) * fRcp;
MicroProfilePrintf(CB, Handle, "%f,", fPrc);
}
MicroProfilePrintf(CB, Handle, "];\n");
MicroProfilePrintf(CB, Handle, "S.CounterHistory%d = MakeCounterHistory(%d, S.CounterHistoryArray%d, S.CounterHistoryArrayPrc%d)\n", i, i, i, i);
}
else
{
MicroProfilePrintf(CB, Handle, "S.CounterHistory%d;\n", i);
}
}
MicroProfilePrintf(CB, Handle, "\nS.CounterInfo = [");
for(int i = 0; i < (int)S.nNumCounters; ++i)
{
bool IsDouble = (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) != 0;
float fCounterPrc = 0.f;
float fBoxPrc = 1.f;
double dCounter, dLimit, dMax, dMin;
char Formatted[64];
char FormattedLimit[64];
if(!IsDouble)
{
uint64_t nCounter = S.Counters[i].load();
uint64_t nLimit = S.CounterInfo[i].nLimit;
fCounterPrc = 0.f;
if(nLimit)
{
fCounterPrc = (float)nCounter / nLimit;
if(fCounterPrc > 1.f)
{
fBoxPrc = 1.f / fCounterPrc;
fCounterPrc = 1.f;
}
}
MicroProfileFormatCounter(S.CounterInfo[i].eFormat, nCounter, Formatted, sizeof(Formatted) - 1);
MicroProfileFormatCounter(S.CounterInfo[i].eFormat, S.CounterInfo[i].nLimit, FormattedLimit, sizeof(FormattedLimit) - 1);
dCounter = (double)nCounter;
dMin = (double)S.nCounterMin[i];
dMax = (double)S.nCounterMax[i];
dLimit = (double)nLimit;
}
else
{
dCounter = S.CountersDouble[i].load();
dLimit = S.CounterInfo[i].dLimit;
fCounterPrc = 0.f;
if(dLimit > 0.f)
{
fCounterPrc = (float)(dCounter / dLimit);
if(fCounterPrc > 1.f)
{
fBoxPrc = 1.f / fCounterPrc;
fCounterPrc = 1.f;
}
}
MicroProfileFormatCounterDouble(S.CounterInfo[i].eFormat, dCounter, Formatted, sizeof(Formatted) - 1);
MicroProfileFormatCounterDouble(S.CounterInfo[i].eFormat, S.CounterInfo[i].dLimit, FormattedLimit, sizeof(FormattedLimit) - 1);
dMin = (double)S.dCounterMin[i];
dMax = (double)S.dCounterMax[i];
}
MicroProfilePrintf(CB,
Handle,
"MakeCounter(%d, %d, %d, %d, %d, '%s', %f, %f, %f, '%s', %f, '%s', %f, %f, %d, S.CounterHistory%d),",
i,
S.CounterInfo[i].nParent,
S.CounterInfo[i].nSibling,
S.CounterInfo[i].nFirstChild,
S.CounterInfo[i].nLevel,
S.CounterInfo[i].pName,
dCounter,
dMin,
dMax,
Formatted,
dLimit,
FormattedLimit,
fCounterPrc,
fBoxPrc,
S.CounterInfo[i].eFormat == MICROPROFILE_COUNTER_FORMAT_BYTES ? 1 : 0,
i);
}
MicroProfilePrintf(CB, Handle, "];\n\n");
const int64_t nTickStart = S.Frames[nFirstFrame].nFrameStartCpu;
const int64_t nTickEnd = S.Frames[nLastFrame].nFrameStartCpu;
int64_t nTickStartGpu = S.Frames[nFirstFrame].nFrameStartGpu;
int64_t nTickReferenceCpu, nTickReferenceGpu;
int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
int nTickReference = 0;
if(MicroProfileGetGpuTickReference(&nTickReferenceCpu, &nTickReferenceGpu))
{
nTickStartGpu = (nTickStart - nTickReferenceCpu) * nTicksPerSecondGpu / nTicksPerSecondCpu + nTickReferenceGpu;
nTickReference = 1;
}
uprintf("dumping %d frames\n", nNumFrames);
uprintf("dumping frame %d to %d\n", nFirstFrame, nLastFrame);
uint32_t* nTimerCounter = (uint32_t*)alloca(sizeof(uint32_t) * S.nTotalTimers);
memset(nTimerCounter, 0, sizeof(uint32_t) * S.nTotalTimers);
{
MicroProfilePrintf(CB, Handle, " //Timeline begin\n");
MicroProfileThreadLog* pLog = &S.TimelineLog;
uint32_t nFrameIndexFirst = (nFirstFrame) % MICROPROFILE_MAX_FRAME_HISTORY;
uint32_t nFrameIndexLast = (nFirstFrame + nNumFrames) % MICROPROFILE_MAX_FRAME_HISTORY;
{
// find the frame that has an active marker the furtest distance from the selected range
int nDelta = 0;
int nOffset = 0;
for(uint32_t i = nFrameIndexFirst; i != nFrameIndexLast; i = (i + 1) % MICROPROFILE_MAX_FRAME_HISTORY)
{
int D = (int)S.Frames[i].nTimelineFrameMax - nOffset;
nDelta = MicroProfileMax(D, nDelta);
nOffset++;
}
nFrameIndexFirst = (nFirstFrame - nDelta) % MICROPROFILE_MAX_FRAME_HISTORY;
}
uint32_t nLogStart = S.Frames[nFrameIndexFirst].nLogStartTimeline;
uint32_t nLogEnd = S.Frames[nFrameIndexLast].nLogStartTimeline;
float fToMs = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
#define pp(...) MicroProfilePrintf(CB, Handle, __VA_ARGS__)
if(nLogStart != nLogEnd)
{
uint32_t nLogType;
float fTime;
int f = 0;
pp("S.TimelineColorArray=[");
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
{
uint64_t v = pLog->Log[k];
uint64_t nIndex = MicroProfileLogGetTimerIndex(v);
uint64_t nTick = MicroProfileLogGetTick(v);
(void)nTick;
nLogType = MicroProfileLogGetType(v);
switch(nLogType)
{
case MP_LOG_ENTER:
break;
case MP_LOG_LEAVE:
pp("%c'%s'", f++ ? ',' : ' ', "#ff8080");
break;
case MP_LOG_EXTENDED:
case MP_LOG_EXTENDED_NO_DATA:
uint32_t payload = MicroProfileLogGetExtendedPayload(v);
if(nIndex == ETOKEN_CUSTOM_COLOR)
{
uint32_t nColor = payload;
pp("%c'#%02x%02x%02x'", f++ ? ',' : ' ', MICROPROFILE_UNPACK_RED(nColor) & 0xff, MICROPROFILE_UNPACK_GREEN(nColor) & 0xff, MICROPROFILE_UNPACK_BLUE(nColor) & 0xff);
}
k += MicroProfileLogGetDataSize(v);
break;
}
}
pp("];\n");
f = 0;
pp("S.TimelineIdArray=[");
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
{
uint64_t v = pLog->Log[k];
uint64_t nIndex = MicroProfileLogGetTimerIndex(v);
uint64_t nTick = MicroProfileLogGetTick(v);
(void)nTick;
nLogType = MicroProfileLogGetType(v);
switch(nLogType)
{
case MP_LOG_ENTER:
case MP_LOG_LEAVE:
case MP_LOG_EXTENDED_NO_DATA:
break;
case MP_LOG_EXTENDED:
if(nIndex == ETOKEN_CUSTOM_ID)
{
pp("%c%d", f++ ? ',' : ' ', (uint32_t)nTick);
}
k += MicroProfileLogGetDataSize(v);
break;
}
}
pp("];\n");
f = 0;
pp("S.TimelineArray=[");
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
{
uint64_t v = pLog->Log[k];
nLogType = MicroProfileLogGetType(v);
switch(nLogType)
{
case MP_LOG_ENTER:
case MP_LOG_LEAVE:
fTime = MicroProfileLogTickDifference(nTickStart, v) * fToMs;
pp("%c%f", f++ ? ',' : ' ', fTime);
break;
case MP_LOG_EXTENDED:
k += MicroProfileLogGetDataSize(v);
break;
case MP_LOG_EXTENDED_NO_DATA:
break;
}
}
pp("];\n");
pp("S.TimelineNames=[");
f = 0;
char String[MICROPROFILE_MAX_STRING + 1];
for(uint32_t k = nLogStart; k != nLogEnd;)
{
uint64_t v = pLog->Log[k];
nLogType = MicroProfileLogGetType(v);
uint64_t nIndex = MicroProfileLogGetTimerIndex(v);
uint64_t nTick = MicroProfileLogGetTick(v);
(void)nTick;
switch(nLogType)
{
case MP_LOG_ENTER:
case MP_LOG_LEAVE:
if(nIndex == ETOKEN_CUSTOM_NAME && nLogType == MP_LOG_LEAVE)
{
// pp(f++ ? ",''" : "''");
}
k = (k + 1) % MICROPROFILE_BUFFER_SIZE;
break;
case MP_LOG_EXTENDED_NO_DATA:
k = (k + 1) % MICROPROFILE_BUFFER_SIZE;
break;
case MP_LOG_EXTENDED:
uint32_t nSize = MicroProfileLogGetDataSize(v);
if(nIndex == ETOKEN_CUSTOM_ID)
{
char* pSource = (char*)&pLog->Log[(k + 1) % MICROPROFILE_BUFFER_SIZE];
const char* pOut = nullptr;
if(nSize == 0)
{
pOut = "";
}
else if(k + nSize <= MICROPROFILE_BUFFER_SIZE)
{
pOut = pSource;
}
else
{
pOut = &String[0];
char* pDest = &String[0];
MP_ASSERT(nSize * 8 < sizeof(MICROPROFILE_MAX_STRING) + 1);
uint32_t Index = (k + 1) % MICROPROFILE_BUFFER_SIZE;
for(uint32_t l = 0; l < nSize; ++l)
{
memcpy(pDest, (char*)pLog->Log[Index], sizeof(uint64_t));
Index = (Index + 1) % MICROPROFILE_BUFFER_SIZE;
}
}
if(f++)
{
pp(",'%s'", pOut);
}
else
{
pp("'%s'", pOut);
}
}
k = (k + 1 + nSize) % MICROPROFILE_BUFFER_SIZE;
break;
}
}
pp("];\n");
}
MicroProfilePrintf(CB, Handle, " //Timeline end\n");
}
MicroProfilePrintf(CB, Handle, "S.Frames = Array(%d);\n", nNumFrames);
for(uint32_t i = 0; i < nNumFrames; ++i)
{
uint32_t nFrameIndex = (nFirstFrame + i) % MICROPROFILE_MAX_FRAME_HISTORY;
uint32_t nFrameIndexNext = (nFrameIndex + 1) % MICROPROFILE_MAX_FRAME_HISTORY;
for(uint32_t j = 0; j < S.nNumLogs; ++j)
{
MicroProfileThreadLog* pLog = S.Pool[j];
int64_t nStartTickBase = pLog->nGpu ? nTickStartGpu : nTickStart;
uint32_t nLogStart = S.Frames[nFrameIndex].nLogStart[j];
uint32_t nLogEnd = S.Frames[nFrameIndexNext].nLogStart[j];
uint32_t nLogType;
float fToMs;
uint64_t nStartTick;
float fToMsCpu = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
float fToMsBase = MicroProfileTickToMsMultiplier(pLog->nGpu ? nTicksPerSecondGpu : nTicksPerSecondCpu);
MicroProfilePrintf(CB, Handle, "S.ts_%d_%d = [", i, j);
if(nLogStart != nLogEnd)
{
int f = 0;
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
{
float fTime;
MicroProfileLogEntry v = pLog->Log[k];
nLogType = MicroProfileLogGetType(v);
fToMs = fToMsBase;
nStartTick = nStartTickBase;
switch(nLogType)
{
case MP_LOG_EXTENDED:
{
fTime = 0.f;
k += MicroProfileLogGetDataSize(v);
break;
}
case MP_LOG_EXTENDED_NO_DATA:
{
uint32_t nTimerIndex = (uint32_t)MicroProfileLogGetTimerIndex(v);
if(nTimerIndex == ETOKEN_GPU_CPU_TIMESTAMP)
{
fToMs = fToMsCpu;
nStartTick = nTickStart;
fTime = MicroProfileLogTickDifference(nStartTick, v) * fToMs;
}
else
{
fTime = 0.f;
}
break;
}
default:
fTime = MicroProfileLogTickDifference(nStartTick, pLog->Log[k]) * fToMs;
}
MicroProfilePrintf(CB, Handle, f++ ? ",%f" : "%f", fTime);
}
}
MicroProfilePrintf(CB, Handle, "];\n");
MicroProfilePrintf(CB, Handle, "S.tt_%d_%d = [", i, j);
if(nLogStart != nLogEnd)
{
uint32_t k = nLogStart;
MicroProfilePrintf(CB, Handle, "%d", MicroProfileLogGetType(pLog->Log[k]));
for(k = (k + 1) % MICROPROFILE_BUFFER_SIZE; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
{
uint64_t v = pLog->Log[k];
uint32_t nLogType2 = MicroProfileLogGetType(v);
if(nLogType2 > MP_LOG_ENTER)
nLogType2 |= (MicroProfileLogGetExtendedToken(v))
<< 2; // pack extended token here.. this way all code can check agains ENTER/LEAVE, and only the ext code needs to care about the top bits.
MicroProfilePrintf(CB, Handle, ",%d", nLogType2);
if(nLogType2 == MP_LOG_EXTENDED)
k += MicroProfileLogGetDataSize(v);
}
}
MicroProfilePrintf(CB, Handle, "];\n");
MicroProfilePrintf(CB, Handle, "S.ti_%d_%d = [", i, j);
if(nLogStart != nLogEnd)
{
for(uint32_t k = nLogStart; k != nLogEnd; k = (k + 1) % MICROPROFILE_BUFFER_SIZE)
{
uint64_t v = pLog->Log[k];
nLogType = MicroProfileLogGetType(v);
const char* pFormat = k == nLogStart ? "%d" : ",%d";
if(nLogType == MP_LOG_ENTER || nLogType == MP_LOG_LEAVE)
{
uint32_t nTimerIndex = (uint32_t)MicroProfileLogGetTimerIndex(pLog->Log[k]);
if(ETOKEN_CSTR_PTR == nTimerIndex)
{
MP_ASSERT(k + 1 != nLogEnd);
uint64_t v1 = pLog->Log[(k + 1) % MICROPROFILE_BUFFER_SIZE];
const char* pString = (const char*)MicroProfileLogGetExtendedPayloadNoDataPtr(v1);
uintptr_t value;
if(!MicroProfileHashTableGet(&StringsHashTable, (uint64_t)pString, &value))
{
MP_BREAK(); // should be covered earlier.
}
MicroProfilePrintf(CB, Handle, pFormat, value);
}
else
{
if(nTimerIndex < S.nTotalTimers)
{
nTimerCounter[nTimerIndex]++;
}
MicroProfilePrintf(CB, Handle, pFormat, nTimerIndex);
}
}
else
{
uint64_t ExtendedToken = MicroProfileLogGetExtendedToken(v);
uint64_t PayloadNoData = MicroProfileLogGetExtendedPayloadNoData(v);
switch(ExtendedToken)
{
case ETOKEN_GPU_CPU_SOURCE_THREAD:
MicroProfilePrintf(CB, Handle, pFormat, PayloadNoData);
break;
default:
MicroProfilePrintf(CB, Handle, pFormat, -1);
}
if(nLogType == MP_LOG_EXTENDED)
k += MicroProfileLogGetDataSize(v);
}
}
}
MicroProfilePrintf(CB, Handle, "];\n");
}
MicroProfilePrintf(CB, Handle, "S.ts%d = [", i);
for(uint32_t j = 0; j < S.nNumLogs; ++j)
{
MicroProfilePrintf(CB, Handle, "S.ts_%d_%d,", i, j);
}
MicroProfilePrintf(CB, Handle, "];\n");
MicroProfilePrintf(CB, Handle, "S.tt%d = [", i);
for(uint32_t j = 0; j < S.nNumLogs; ++j)
{
MicroProfilePrintf(CB, Handle, "S.tt_%d_%d,", i, j);
}
MicroProfilePrintf(CB, Handle, "];\n");
MicroProfilePrintf(CB, Handle, "S.ti%d = [", i);
for(uint32_t j = 0; j < S.nNumLogs; ++j)
{
MicroProfilePrintf(CB, Handle, "S.ti_%d_%d,", i, j);
}
MicroProfilePrintf(CB, Handle, "];\n");
int64_t nFrameStart = S.Frames[nFrameIndex].nFrameStartCpu;
int64_t nFrameEnd = S.Frames[nFrameIndexNext].nFrameStartCpu;
float fToMs = MicroProfileTickToMsMultiplier(nTicksPerSecondCpu);
float fFrameMs = MicroProfileLogTickDifference(nTickStart, nFrameStart) * fToMs;
float fFrameEndMs = MicroProfileLogTickDifference(nTickStart, nFrameEnd) * fToMs;
float fFrameGpuMs = 0;
float fFrameGpuEndMs = 0;
if(nTickReference)
{
fFrameGpuMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndex].nFrameStartGpu) * fToMsGPU;
fFrameGpuEndMs = MicroProfileLogTickDifference(nTickStartGpu, S.Frames[nFrameIndexNext].nFrameStartGpu) * fToMsGPU;
}
MicroProfilePrintf(CB, Handle, "S.Frames[%d] = MakeFrame(%d, %f, %f, %f, %f, S.ts%d, S.tt%d, S.ti%d);\n", i, 0, fFrameMs, fFrameEndMs, fFrameGpuMs, fFrameGpuEndMs, i, i, i);
}
uint32_t nContextSwitchStart = 0;
uint32_t nContextSwitchEnd = 0;
MicroProfileContextSwitchSearch(&nContextSwitchStart, &nContextSwitchEnd, nTickStart, nTickEnd);
uprintf("CONTEXT SWITCH SEARCH .... %d %d %d .... %lld, %lld\n", nContextSwitchStart, nContextSwitchEnd, nContextSwitchEnd - nContextSwitchStart, nTickStart, nTickEnd);
uint32_t nWrittenBefore = S.nWebServerDataSent;
MicroProfilePrintf(CB, Handle, "S.CSwitchThreadInOutCpu = [");
for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j + 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
{
MicroProfileContextSwitch CS = S.ContextSwitch[j];
int nCpu = CS.nCpu;
MicroProfilePrintf(CB, Handle, "%d,%d,%d,", CS.nThreadIn, CS.nThreadOut, nCpu);
}
MicroProfilePrintf(CB, Handle, "];\n");
MicroProfilePrintf(CB, Handle, "S.CSwitchTime = [");
float fToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
for(uint32_t j = nContextSwitchStart; j != nContextSwitchEnd; j = (j + 1) % MICROPROFILE_CONTEXT_SWITCH_BUFFER_SIZE)
{
MicroProfileContextSwitch CS = S.ContextSwitch[j];
float fTime = MicroProfileLogTickDifference(nTickStart, CS.nTicks) * fToMsCpu;
MicroProfilePrintf(CB, Handle, "%f,", fTime);
}
MicroProfilePrintf(CB, Handle, "];\n");
MicroProfilePrintf(CB, Handle, "S.CSwitchThreads = {");
MicroProfileThreadInfo* pThreadInfo = nullptr;
uint32_t nNumThreads = MicroProfileGetThreadInfoArray(&pThreadInfo);
for(uint32_t i = 0; i < nNumThreads; ++i)
{
const char* p1 = pThreadInfo[i].pThreadModule ? pThreadInfo[i].pThreadModule : "?";
const char* p2 = pThreadInfo[i].pProcessModule ? pThreadInfo[i].pProcessModule : "?";
MicroProfilePrintf(CB,
Handle,
"%" PRId64 ":{\'tid\':%" PRId64 ",\'pid\':%" PRId64 ",\'t\':\'%s\',\'p\':\'%s\'},",
(uint64_t)pThreadInfo[i].tid,
(uint64_t)pThreadInfo[i].tid,
(uint64_t)pThreadInfo[i].pid,
p1,
p2);
}
MicroProfilePrintf(CB, Handle, "};\n");
MicroProfilePrintf(CB, Handle, "S.CoreEfficiencyClass = [");
for(uint32_t i = 0; i < MICROPROFILE_MAX_CPU_CORES; ++i)
{
MicroProfilePrintf(CB, Handle, "%d,", S.CoreEfficiencyClass[i]);
}
MicroProfilePrintf(CB, Handle, "];\n");
{
MicroProfilePrintf(CB, Handle, "//String Table\n");
MicroProfilePrintf(CB, Handle, "S.StringTable = {}\n");
// dump string table
MicroProfileHashTableIterator beg = MicroProfileGetHashTableIteratorBegin(&StringsHashTable);
MicroProfileHashTableIterator end = MicroProfileGetHashTableIteratorEnd(&StringsHashTable);
while(beg != end)
{
uint64_t Key = beg->Key;
uint64_t Value = beg->Value;
MicroProfilePrintf(CB, Handle, "S.StringTable[%d] = '%s';\n", Value, (const char*)Key);
beg++;
}
}
uint32_t nWrittenAfter = S.nWebServerDataSent;
MicroProfilePrintf(CB, Handle, "//CSwitch Size %d\n", nWrittenAfter - nWrittenBefore);
for(size_t i = 0; i < g_MicroProfileHtml_end_count; ++i)
{
CB(Handle, g_MicroProfileHtml_end_sizes[i] - 1, g_MicroProfileHtml_end[i]);
}
uint32_t* nGroupCounter = (uint32_t*)alloca(sizeof(uint32_t) * S.nGroupCount);
memset(nGroupCounter, 0, sizeof(uint32_t) * S.nGroupCount);
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
{
uint32_t nGroupIndex = S.TimerInfo[i].nGroupIndex;
nGroupCounter[nGroupIndex] += nTimerCounter[i];
}
uint32_t* nGroupCounterSort = (uint32_t*)alloca(sizeof(uint32_t) * S.nGroupCount);
uint32_t* nTimerCounterSort = (uint32_t*)alloca(sizeof(uint32_t) * S.nTotalTimers);
for(uint32_t i = 0; i < S.nGroupCount; ++i)
{
nGroupCounterSort[i] = i;
}
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
{
nTimerCounterSort[i] = i;
}
std::sort(nGroupCounterSort, nGroupCounterSort + S.nGroupCount, [nGroupCounter](const uint32_t l, const uint32_t r) { return nGroupCounter[l] > nGroupCounter[r]; });
std::sort(nTimerCounterSort, nTimerCounterSort + S.nTotalTimers, [nTimerCounter](const uint32_t l, const uint32_t r) { return nTimerCounter[l] > nTimerCounter[r]; });
MicroProfilePrintf(CB, Handle, "\n<!--\nMarker Per Group\n");
for(uint32_t i = 0; i < S.nGroupCount; ++i)
{
uint32_t idx = nGroupCounterSort[i];
MicroProfilePrintf(CB, Handle, "%8d:%s\n", nGroupCounter[idx], S.GroupInfo[idx].pName);
}
MicroProfilePrintf(CB, Handle, "Marker Per Timer\n");
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
{
uint32_t idx = nTimerCounterSort[i];
MicroProfilePrintf(CB, Handle, "%8d:%s(%s)\n", nTimerCounter[idx], S.TimerInfo[idx].pName, S.GroupInfo[S.TimerInfo[idx].nGroupIndex].pName);
}
MicroProfilePrintf(CB, Handle, "\n-->\n");
memcpy(S.nActiveGroups, nActiveGroup, sizeof(S.nActiveGroups));
S.AnyActive = AnyActive;
#if MICROPROFILE_DEBUG
int64_t nTicksEnd = MP_TICK();
float fMs = fToMsCpu * (nTicksEnd - S.nPauseTicks);
uprintf("html dump took %6.2fms\n", fMs);
#endif
#undef pp
S.nPauseTicks = 0;
}
void MicroProfileWriteFile(void* Handle, size_t nSize, const char* pData)
{
fwrite(pData, nSize, 1, (FILE*)Handle);
}
void MicroProfileDumpToFile()
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
if(S.nDumpFileNextFrame & 1)
{
char Path[MICROPROFILE_MAX_PATH];
int Length = snprintf(Path, sizeof(S.HtmlDumpPath), "%s.html", S.HtmlDumpPath);
if(Length > 0 && Length < MICROPROFILE_MAX_PATH)
{
FILE* F = fopen(Path, "w");
if(F)
{
MicroProfileDumpHtml(MicroProfileWriteFile, F, S.DumpFrameCount, S.HtmlDumpPath);
fclose(F);
}
}
}
if(S.nDumpFileNextFrame & 2)
{
#if MICROPROFILE_LEGACY_CSV
MicroProfileDumpCsvLegacy();
#else
MicroProfileDumpCsv(S.DumpFrameCount);
#endif
}
}
void MicroProfileFlushSocket(MpSocket Socket)
{
send(Socket, &S.WebServerBuffer[0], S.WebServerPut, 0);
S.WebServerPut = 0;
}
void MicroProfileWriteSocket(void* Handle, size_t nSize, const char* pData)
{
S.nWebServerDataSent += nSize;
MpSocket Socket = *(MpSocket*)Handle;
if(nSize > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE / 2)
{
MicroProfileFlushSocket(Socket);
send(Socket, pData, (int)nSize, 0);
}
else
{
memcpy(&S.WebServerBuffer[S.WebServerPut], pData, nSize);
S.WebServerPut += (uint32_t)nSize;
if(S.WebServerPut > MICROPROFILE_WEBSERVER_SOCKET_BUFFER_SIZE / 2)
{
MicroProfileFlushSocket(Socket);
}
}
}
#if MICROPROFILE_MINIZ
#ifndef MICROPROFILE_COMPRESS_BUFFER_SIZE
#define MICROPROFILE_COMPRESS_BUFFER_SIZE (256 << 10)
#endif
#define MICROPROFILE_COMPRESS_CHUNK (MICROPROFILE_COMPRESS_BUFFER_SIZE / 2)
struct MicroProfileCompressedSocketState
{
unsigned char DeflateOut[MICROPROFILE_COMPRESS_CHUNK];
unsigned char DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
mz_stream Stream;
MpSocket Socket;
uint32_t nSize;
uint32_t nCompressedSize;
uint32_t nFlushes;
uint32_t nMemmoveBytes;
};
void MicroProfileCompressedSocketFlush(MicroProfileCompressedSocketState* pState)
{
mz_stream& Stream = pState->Stream;
unsigned char* pSendStart = &pState->DeflateOut[0];
unsigned char* pSendEnd = &pState->DeflateOut[MICROPROFILE_COMPRESS_CHUNK - Stream.avail_out];
if(pSendStart != pSendEnd)
{
send(pState->Socket, (const char*)pSendStart, pSendEnd - pSendStart, 0);
pState->nCompressedSize += pSendEnd - pSendStart;
}
Stream.next_out = &pState->DeflateOut[0];
Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK;
}
void MicroProfileCompressedSocketStart(MicroProfileCompressedSocketState* pState, MpSocket Socket)
{
mz_stream& Stream = pState->Stream;
memset(&Stream, 0, sizeof(Stream));
Stream.next_out = &pState->DeflateOut[0];
Stream.avail_out = MICROPROFILE_COMPRESS_CHUNK;
Stream.next_in = &pState->DeflateIn[0];
Stream.avail_in = 0;
mz_deflateInit(&Stream, Z_DEFAULT_COMPRESSION);
pState->Socket = Socket;
pState->nSize = 0;
pState->nCompressedSize = 0;
pState->nFlushes = 0;
pState->nMemmoveBytes = 0;
}
void MicroProfileCompressedSocketFinish(MicroProfileCompressedSocketState* pState)
{
mz_stream& Stream = pState->Stream;
MicroProfileCompressedSocketFlush(pState);
int r = mz_deflate(&Stream, MZ_FINISH);
MP_ASSERT(r == MZ_STREAM_END);
MicroProfileCompressedSocketFlush(pState);
r = mz_deflateEnd(&Stream);
MP_ASSERT(r == MZ_OK);
}
void MicroProfileCompressedWriteSocket(void* Handle, size_t nSize, const char* pData)
{
MicroProfileCompressedSocketState* pState = (MicroProfileCompressedSocketState*)Handle;
mz_stream& Stream = pState->Stream;
const unsigned char* pDeflateInEnd = Stream.next_in + Stream.avail_in;
const unsigned char* pDeflateInStart = &pState->DeflateIn[0];
const unsigned char* pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
pState->nSize += (uint32_t)nSize;
if((ptrdiff_t)nSize <= pDeflateInRealEnd - pDeflateInEnd)
{
memcpy((void*)pDeflateInEnd, pData, nSize);
Stream.avail_in += (uint32_t)nSize;
MP_ASSERT(Stream.next_in + Stream.avail_in <= pDeflateInRealEnd);
return;
}
int Flush = 0;
while(nSize)
{
pDeflateInEnd = Stream.next_in + Stream.avail_in;
if(Flush)
{
pState->nFlushes++;
MicroProfileCompressedSocketFlush(pState);
pDeflateInRealEnd = &pState->DeflateIn[MICROPROFILE_COMPRESS_CHUNK];
if(pDeflateInEnd == pDeflateInRealEnd)
{
if(Stream.avail_in)
{
MP_ASSERT(pDeflateInStart != Stream.next_in);
memmove((void*)pDeflateInStart, Stream.next_in, Stream.avail_in);
pState->nMemmoveBytes += Stream.avail_in;
}
Stream.next_in = pDeflateInStart;
pDeflateInEnd = Stream.next_in + Stream.avail_in;
}
}
size_t nSpace = pDeflateInRealEnd - pDeflateInEnd;
size_t nBytes = MicroProfileMin(nSpace, nSize);
MP_ASSERT(nBytes + pDeflateInEnd <= pDeflateInRealEnd);
memcpy((void*)pDeflateInEnd, pData, nBytes);
Stream.avail_in += (uint32_t)nBytes;
nSize -= nBytes;
pData += nBytes;
int r = mz_deflate(&Stream, MZ_NO_FLUSH);
Flush = r == MZ_BUF_ERROR || nBytes == 0 || Stream.avail_out == 0 ? 1 : 0;
MP_ASSERT(r == MZ_BUF_ERROR || r == MZ_OK);
if(r == MZ_BUF_ERROR)
{
r = mz_deflate(&Stream, MZ_SYNC_FLUSH);
}
}
}
#endif
#ifndef MicroProfileSetNonBlocking // fcntl doesnt work on a some unix like platforms..
void MicroProfileSetNonBlocking(MpSocket Socket, int NonBlocking)
{
#ifdef _WIN32
u_long nonBlocking = NonBlocking ? 1 : 0;
ioctlsocket(Socket, FIONBIO, &nonBlocking);
#else
int Options = fcntl(Socket, F_GETFL);
if(NonBlocking)
{
fcntl(Socket, F_SETFL, Options | O_NONBLOCK);
}
else
{
fcntl(Socket, F_SETFL, Options & (~O_NONBLOCK));
}
#endif
}
#endif
void MicroProfileWebServerStart()
{
#ifdef _WIN32
WSADATA wsa;
if(WSAStartup(MAKEWORD(2, 2), &wsa))
{
S.ListenerSocket = (MpSocket)-1;
return;
}
#endif
S.ListenerSocket = socket(PF_INET, SOCK_STREAM, 6);
MP_ASSERT(!MP_INVALID_SOCKET(S.ListenerSocket));
MicroProfileSetNonBlocking(S.ListenerSocket, 1);
{
int r = 0;
int on = 1;
#if defined(_WIN32)
r = setsockopt(S.ListenerSocket, SOL_SOCKET, SO_REUSEADDR, (const char*)&on, sizeof(on));
#else
r = setsockopt(S.ListenerSocket, SOL_SOCKET, SO_REUSEADDR, (void*)&on, sizeof(on));
#endif
(void)r;
}
int nStartPort = S.nWebServerPort;
struct sockaddr_in Addr;
Addr.sin_family = AF_INET;
Addr.sin_addr.s_addr = INADDR_ANY;
for(int i = 0; i < 20; ++i)
{
Addr.sin_port = htons(nStartPort + i);
if(0 == bind(S.ListenerSocket, (sockaddr*)&Addr, sizeof(Addr)))
{
S.nWebServerPort = (uint32_t)(nStartPort + i);
break;
}
}
listen(S.ListenerSocket, 8);
}
void MicroProfileWebServerJoin()
{
if(S.WebSocketThreadRunning)
{
MicroProfileThreadJoin(&S.WebSocketSendThread);
}
S.WebSocketThreadJoined = 1;
}
void MicroProfileWebServerStop()
{
MP_ASSERT(S.WebSocketThreadJoined);
#ifdef _WIN32
closesocket(S.ListenerSocket);
WSACleanup();
#else
close(S.ListenerSocket);
#endif
}
enum MicroProfileGetCommand
{
EMICROPROFILE_GET_COMMAND_DUMP,
EMICROPROFILE_GET_COMMAND_DUMP_RANGE,
EMICROPROFILE_GET_COMMAND_LIVE,
EMICROPROFILE_GET_COMMAND_FAVICON,
EMICROPROFILE_GET_COMMAND_SERVICE_WORKER,
EMICROPROFILE_GET_COMMAND_UNKNOWN,
};
struct MicroProfileParseGetResult
{
uint64_t nFrames;
uint64_t nFrameStart;
};
MicroProfileGetCommand MicroProfileParseGet(const char* pGet, MicroProfileParseGetResult* pResult)
{
if(0 == strlen(pGet))
{
return EMICROPROFILE_GET_COMMAND_LIVE;
}
if(0 == strcmp(pGet, "favicon.ico"))
{
return EMICROPROFILE_GET_COMMAND_FAVICON;
}
if(0 == strcmp(pGet, "favicon.png"))
{
return EMICROPROFILE_GET_COMMAND_FAVICON;
}
if(0 == strcmp(pGet, "service-worker.js"))
{
return EMICROPROFILE_GET_COMMAND_SERVICE_WORKER;
}
const char* pStart = pGet;
if(*pStart == 'b' || *pStart == 'p')
{
S.nWSWasConnected = 1; // do not load default when url has one specified.
return EMICROPROFILE_GET_COMMAND_LIVE;
}
if(*pStart == 'r') // range
{
// very very manual parsing
if('/' != *++pStart)
return EMICROPROFILE_GET_COMMAND_UNKNOWN;
++pStart;
char* pEnd = nullptr;
uint64_t nFrameStart = strtoll(pStart, &pEnd, 10);
if(pEnd == pStart || *pEnd != '/' || *pEnd == '\0')
{
return EMICROPROFILE_GET_COMMAND_UNKNOWN;
}
pStart = pEnd + 1;
uint64_t nFrameEnd = strtoll(pStart, &pEnd, 10);
if(pEnd == pStart || nFrameEnd <= nFrameStart)
{
return EMICROPROFILE_GET_COMMAND_UNKNOWN;
}
pResult->nFrames = nFrameEnd - nFrameStart;
pResult->nFrameStart = nFrameStart;
return EMICROPROFILE_GET_COMMAND_DUMP_RANGE;
}
while(*pGet != '\0')
{
if(*pGet < '0' || *pGet > '9')
return EMICROPROFILE_GET_COMMAND_UNKNOWN;
pGet++;
}
int nFrames = atoi(pStart);
pResult->nFrameStart = (uint64_t)-1;
if(nFrames)
{
pResult->nFrames = nFrames;
}
else
{
pResult->nFrames = MICROPROFILE_WEBSERVER_DEFAULT_FRAMES;
}
return EMICROPROFILE_GET_COMMAND_DUMP;
}
void MicroProfileBase64Encode(char* pOut, const uint8_t* pIn, uint32_t nLen)
{
static const char* CODES = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
//..straight from wikipedia.
int b;
char* o = pOut;
for(uint32_t i = 0; i < nLen; i += 3)
{
b = (pIn[i] & 0xfc) >> 2;
*o++ = CODES[b];
b = (pIn[i] & 0x3) << 4;
if(i + 1 < nLen)
{
b |= (pIn[i + 1] & 0xF0) >> 4;
*o++ = CODES[b];
b = (pIn[i + 1] & 0x0F) << 2;
if(i + 2 < nLen)
{
b |= (pIn[i + 2] & 0xC0) >> 6;
*o++ = CODES[b];
b = pIn[i + 2] & 0x3F;
*o++ = CODES[b];
}
else
{
*o++ = CODES[b];
*o++ = '=';
}
}
else
{
*o++ = CODES[b];
*o++ = '=';
*o++ = '=';
}
}
}
// begin: SHA-1 in C
// ftp://ftp.funet.fi/pub/crypt/hash/sha/sha1.c
// SHA-1 in C
// By Steve Reid <steve@edmweb.com>
// 100% Public Domain
typedef struct
{
uint32_t state[5];
uint32_t count[2];
unsigned char buffer[64];
} MicroProfile_SHA1_CTX;
#include <string.h>
#ifndef _WIN32
#include <netinet/in.h>
#endif
static void MicroProfile_SHA1_Transform(uint32_t[5], const unsigned char[64]);
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
#define blk0(i) (block->l[i] = htonl(block->l[i]))
#define blk(i) (block->l[i & 15] = rol(block->l[(i + 13) & 15] ^ block->l[(i + 8) & 15] ^ block->l[(i + 2) & 15] ^ block->l[i & 15], 1))
#define R0(v, w, x, y, z, i) \
z += ((w & (x ^ y)) ^ y) + blk0(i) + 0x5A827999 + rol(v, 5); \
w = rol(w, 30);
#define R1(v, w, x, y, z, i) \
z += ((w & (x ^ y)) ^ y) + blk(i) + 0x5A827999 + rol(v, 5); \
w = rol(w, 30);
#define R2(v, w, x, y, z, i) \
z += (w ^ x ^ y) + blk(i) + 0x6ED9EBA1 + rol(v, 5); \
w = rol(w, 30);
#define R3(v, w, x, y, z, i) \
z += (((w | x) & y) | (w & x)) + blk(i) + 0x8F1BBCDC + rol(v, 5); \
w = rol(w, 30);
#define R4(v, w, x, y, z, i) \
z += (w ^ x ^ y) + blk(i) + 0xCA62C1D6 + rol(v, 5); \
w = rol(w, 30);
// Hash a single 512-bit block. This is the core of the algorithm.
static void MicroProfile_SHA1_Transform(uint32_t state[5], const unsigned char buffer[64])
{
uint32_t a, b, c, d, e;
typedef union
{
unsigned char c[64];
uint32_t l[16];
} CHAR64LONG16;
CHAR64LONG16* block;
block = (CHAR64LONG16*)buffer;
// Copy context->state[] to working vars
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
// 4 rounds of 20 operations each. Loop unrolled.
R0(a, b, c, d, e, 0);
R0(e, a, b, c, d, 1);
R0(d, e, a, b, c, 2);
R0(c, d, e, a, b, 3);
R0(b, c, d, e, a, 4);
R0(a, b, c, d, e, 5);
R0(e, a, b, c, d, 6);
R0(d, e, a, b, c, 7);
R0(c, d, e, a, b, 8);
R0(b, c, d, e, a, 9);
R0(a, b, c, d, e, 10);
R0(e, a, b, c, d, 11);
R0(d, e, a, b, c, 12);
R0(c, d, e, a, b, 13);
R0(b, c, d, e, a, 14);
R0(a, b, c, d, e, 15);
R1(e, a, b, c, d, 16);
R1(d, e, a, b, c, 17);
R1(c, d, e, a, b, 18);
R1(b, c, d, e, a, 19);
R2(a, b, c, d, e, 20);
R2(e, a, b, c, d, 21);
R2(d, e, a, b, c, 22);
R2(c, d, e, a, b, 23);
R2(b, c, d, e, a, 24);
R2(a, b, c, d, e, 25);
R2(e, a, b, c, d, 26);
R2(d, e, a, b, c, 27);
R2(c, d, e, a, b, 28);
R2(b, c, d, e, a, 29);
R2(a, b, c, d, e, 30);
R2(e, a, b, c, d, 31);
R2(d, e, a, b, c, 32);
R2(c, d, e, a, b, 33);
R2(b, c, d, e, a, 34);
R2(a, b, c, d, e, 35);
R2(e, a, b, c, d, 36);
R2(d, e, a, b, c, 37);
R2(c, d, e, a, b, 38);
R2(b, c, d, e, a, 39);
R3(a, b, c, d, e, 40);
R3(e, a, b, c, d, 41);
R3(d, e, a, b, c, 42);
R3(c, d, e, a, b, 43);
R3(b, c, d, e, a, 44);
R3(a, b, c, d, e, 45);
R3(e, a, b, c, d, 46);
R3(d, e, a, b, c, 47);
R3(c, d, e, a, b, 48);
R3(b, c, d, e, a, 49);
R3(a, b, c, d, e, 50);
R3(e, a, b, c, d, 51);
R3(d, e, a, b, c, 52);
R3(c, d, e, a, b, 53);
R3(b, c, d, e, a, 54);
R3(a, b, c, d, e, 55);
R3(e, a, b, c, d, 56);
R3(d, e, a, b, c, 57);
R3(c, d, e, a, b, 58);
R3(b, c, d, e, a, 59);
R4(a, b, c, d, e, 60);
R4(e, a, b, c, d, 61);
R4(d, e, a, b, c, 62);
R4(c, d, e, a, b, 63);
R4(b, c, d, e, a, 64);
R4(a, b, c, d, e, 65);
R4(e, a, b, c, d, 66);
R4(d, e, a, b, c, 67);
R4(c, d, e, a, b, 68);
R4(b, c, d, e, a, 69);
R4(a, b, c, d, e, 70);
R4(e, a, b, c, d, 71);
R4(d, e, a, b, c, 72);
R4(c, d, e, a, b, 73);
R4(b, c, d, e, a, 74);
R4(a, b, c, d, e, 75);
R4(e, a, b, c, d, 76);
R4(d, e, a, b, c, 77);
R4(c, d, e, a, b, 78);
R4(b, c, d, e, a, 79);
// Add the working vars back into context.state[]
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
// Wipe variables
a = b = c = d = e = 0;
}
void MicroProfile_SHA1_Init(MicroProfile_SHA1_CTX* context)
{
// SHA1 initialization constants
context->state[0] = 0x67452301;
context->state[1] = 0xEFCDAB89;
context->state[2] = 0x98BADCFE;
context->state[3] = 0x10325476;
context->state[4] = 0xC3D2E1F0;
context->count[0] = context->count[1] = 0;
}
// Run your data through this.
void MicroProfile_SHA1_Update(MicroProfile_SHA1_CTX* context, const unsigned char* data, unsigned int len)
{
unsigned int i, j;
j = (context->count[0] >> 3) & 63;
if((context->count[0] += len << 3) < (len << 3))
context->count[1]++;
context->count[1] += (len >> 29);
i = 64 - j;
while(len >= i)
{
memcpy(&context->buffer[j], data, i);
MicroProfile_SHA1_Transform(context->state, context->buffer);
data += i;
len -= i;
i = 64;
j = 0;
}
memcpy(&context->buffer[j], data, len);
}
// Add padding and return the message digest.
void MicroProfile_SHA1_Final(unsigned char digest[20], MicroProfile_SHA1_CTX* context)
{
uint32_t i, j;
unsigned char finalcount[8];
for(i = 0; i < 8; i++)
{
finalcount[i] = (unsigned char)((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); // Endian independent
}
MicroProfile_SHA1_Update(context, (unsigned char*)"\200", 1);
while((context->count[0] & 504) != 448)
{
MicroProfile_SHA1_Update(context, (unsigned char*)"\0", 1);
}
MicroProfile_SHA1_Update(context, finalcount, 8); // Should cause a SHA1Transform()
for(i = 0; i < 20; i++)
{
digest[i] = (unsigned char)((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
}
// Wipe variables
i = j = 0;
memset(context->buffer, 0, 64);
memset(context->state, 0, 20);
memset(context->count, 0, 8);
memset(&finalcount, 0, 8);
}
#undef rol
#undef blk0
#undef blk
#undef R0
#undef R1
#undef R2
#undef R3
#undef R4
// end: SHA-1 in C
void MicroProfileWebSocketSendState(MpSocket C);
void MicroProfileWebSocketSendEnabled(MpSocket C);
void MicroProfileWSPrintStart(MpSocket C);
void MicroProfileWSPrintf(const char* pFmt, ...);
void MicroProfileWSPrintEnd();
void MicroProfileWSFlush();
bool MicroProfileWebSocketReceive(MpSocket C);
enum
{
TYPE_NONE = 0,
TYPE_TIMER = 1,
TYPE_GROUP = 2,
TYPE_CATEGORY = 3,
TYPE_SETTING = 4,
TYPE_COUNTER = 5,
};
enum
{
SETTING_FORCE_ENABLE = 0,
SETTING_CONTEXT_SWITCH_TRACE = 1,
SETTING_PLATFORM_MARKERS = 2,
};
enum
{
MSG_TIMER_TREE = 1,
MSG_ENABLED = 2,
MSG_FRAME = 3,
MSG_LOADSETTINGS = 4,
MSG_PRESETS = 5,
MSG_CURRENTSETTINGS = 6,
MSG_COUNTERS = 7,
MSG_FUNCTION_RESULTS = 8,
MSG_INACTIVE_FRAME = 9,
MSG_FUNCTION_NAMES = 10,
MSG_INSTRUMENT_ERROR = 11,
MSG_QUERY_INDEX = 12,
// MSG_MODULE_NAME = 12,
};
enum
{
VIEW_GRAPH_SPLIT = 0,
VIEW_GRAPH_PERCENTILE = 1,
VIEW_GRAPH_THREAD_GROUP = 2,
VIEW_BAR = 3,
VIEW_BAR_ALL = 4,
VIEW_BAR_SINGLE = 5,
VIEW_COUNTERS = 6,
VIEW_SIZE = 7,
};
void MicroProfileSocketDumpState()
{
fd_set Read, Write, Error;
FD_ZERO(&Read);
FD_ZERO(&Write);
FD_ZERO(&Error);
MpSocket LastSocket = 1;
for(uint32_t i = 0; i < S.nNumWebSockets; ++i)
{
LastSocket = MicroProfileMax(LastSocket, S.WebSockets[i] + 1);
FD_SET(S.WebSockets[i], &Read);
FD_SET(S.WebSockets[i], &Write);
FD_SET(S.WebSockets[i], &Error);
}
timeval tv;
tv.tv_sec = 0;
tv.tv_usec = 0;
if(-1 == select(LastSocket, &Read, &Write, &Error, &tv))
{
MP_ASSERT(0);
}
for(uint32_t i = 0; i < S.nNumWebSockets; i++)
{
MpSocket s = S.WebSockets[i];
uprintf("%" PRId64 " ", (uint64_t)s);
if(FD_ISSET(s, &Error))
{
uprintf("e");
}
else
{
uprintf("_");
}
if(FD_ISSET(s, &Read))
{
uprintf("r");
}
else
{
uprintf(" ");
}
if(FD_ISSET(s, &Write))
{
uprintf("w");
}
else
{
uprintf(" ");
}
}
uprintf("\n");
for(uint32_t i = 1; i < S.nNumWebSockets; i++)
{
MpSocket s = S.WebSockets[i];
int error_code;
socklen_t error_code_size = sizeof(error_code);
int r = getsockopt(s, SOL_SOCKET, SO_ERROR, (char*)&error_code, &error_code_size);
MP_ASSERT(r >= 0);
if(error_code != 0)
{
#ifdef _WIN32
char buffer[1024];
strerror_s(buffer, sizeof(buffer) - 1, error_code);
fprintf(stderr, "socket error: %d %s\n", (int)s, buffer);
#else
fprintf(stderr, "socket error: %d %s\n", (int)s, strerror(error_code));
#endif
MP_ASSERT(0);
}
}
}
bool MicroProfileSocketSend2(MpSocket Connection, const void* pMessage, int nLen);
void* MicroProfileSocketSenderThread(void*)
{
MicroProfileOnThreadCreate("MicroProfileSocketSenderThread");
while(!S.nMicroProfileShutdown)
{
if(S.nSocketFail)
{
MicroProfileSleep(100);
continue;
}
uint32_t nEnd = MICROPROFILE_WEBSOCKET_BUFFER_SIZE;
uint32_t nGet = S.WSBuf.nSendGet.load();
uint32_t nPut = S.WSBuf.nSendPut.load();
uint32_t nSendStart = 0;
uint32_t nSendAmount = 0;
if(nGet > nPut)
{
nSendStart = nGet;
nSendAmount = nEnd - nGet;
}
else if(nGet < nPut)
{
nSendStart = nGet;
nSendAmount = nPut - nGet;
}
if(nSendAmount)
{
MICROPROFILE_SCOPE(g_MicroProfileSendLoop);
MICROPROFILE_COUNTER_LOCAL_ADD_ATOMIC(g_MicroProfileBytesPerFlip, nSendAmount);
if(!MicroProfileSocketSend2(S.WebSockets[0], &S.WSBuf.SendBuffer[nSendStart], nSendAmount))
{
S.nSocketFail = 1;
}
else
{
S.WSBuf.nSendGet.store((nGet + nSendAmount) % MICROPROFILE_WEBSOCKET_BUFFER_SIZE);
}
}
else
{
MicroProfileSleep(20);
}
}
MicroProfileOnThreadExit();
return 0;
}
void MicroProfileSocketSend(MpSocket Connection, const void* pMessage, int nLen)
{
if(S.nSocketFail || nLen <= 0)
{
return;
}
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSocketSend", MP_GREEN4);
while(nLen != 0)
{
MP_ASSERT(nLen > 0);
uint32_t nEnd = MICROPROFILE_WEBSOCKET_BUFFER_SIZE;
uint32_t nGet = S.WSBuf.nSendGet.load();
uint32_t nPut = S.WSBuf.nSendPut.load();
uint32_t nAmount = 0;
if(nPut < nGet)
{
nAmount = nGet - nPut - 1;
}
else
{
if(nGet == 0)
{
nAmount = nEnd - nPut - 1;
}
else
{
nAmount = nEnd - nPut;
}
}
MP_ASSERT((int)nAmount >= 0);
nAmount = MicroProfileMin(nLen, (int)nAmount);
if(nAmount)
{
memcpy(&S.WSBuf.SendBuffer[nPut], pMessage, nAmount);
pMessage = (void*)((char*)pMessage + nAmount);
nLen -= nAmount;
S.WSBuf.nSendPut.store((nPut + nAmount) % MICROPROFILE_WEBSOCKET_BUFFER_SIZE);
}
else
{
if(S.nSocketFail)
{
return;
}
MicroProfileSleep(20);
}
}
}
bool MicroProfileSocketSend2(MpSocket Connection, const void* pMessage, int nLen)
{
if(S.nSocketFail || nLen <= 0)
{
return false;
}
// MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSocketSend2", 0);
#ifndef _WIN32
int error_code;
socklen_t error_code_size = sizeof(error_code);
getsockopt(Connection, SOL_SOCKET, SO_ERROR, &error_code, &error_code_size);
if(error_code != 0)
{
return false;
}
#endif
int s = 0;
while(nLen)
{
s = send(Connection, (const char*)pMessage, nLen, 0);
if(s < 0)
{
const int error = errno;
if(error == EAGAIN || error == EWOULDBLOCK)
{
MicroProfileSleep(20);
continue;
}
break;
}
nLen -= s;
pMessage = (const char*)pMessage + s;
}
#ifdef _WIN32
if(s == SOCKET_ERROR)
{
return false;
}
#endif
if(s < 0)
{
return false;
}
return true;
}
uint32_t MicroProfileWebSocketIdPack(uint32_t type, uint32_t element)
{
MP_ASSERT(type < 255);
MP_ASSERT(element < 0xffffff);
return type << 24 | element;
}
void MicroProfileWebSocketIdUnpack(uint32_t nPacked, uint32_t& type, uint32_t& element)
{
type = (nPacked >> 24) & 0xff;
element = nPacked & 0xffffff;
}
struct MicroProfileWebSocketHeader0
{
union
{
struct
{
uint8_t opcode : 4;
uint8_t RSV3 : 1;
uint8_t RSV2 : 1;
uint8_t RSV1 : 1;
uint8_t FIN : 1;
};
uint8_t v;
};
};
struct MicroProfileWebSocketHeader1
{
union
{
struct
{
uint8_t payload : 7;
uint8_t MASK : 1;
};
uint8_t v;
};
};
bool MicroProfileWebSocketSend(MpSocket Connection, const char* pMessage, uint64_t nLen)
{
MicroProfileWebSocketHeader0 h0;
MicroProfileWebSocketHeader1 h1;
h0.v = 0;
h1.v = 0;
h0.opcode = 1;
h0.FIN = 1;
uint32_t nExtraSizeBytes = 0;
uint8_t nExtraSize[8];
if(nLen > 125)
{
if(nLen > 0xffff)
{
nExtraSizeBytes = 8;
h1.payload = 127;
}
else
{
h1.payload = 126;
nExtraSizeBytes = 2;
}
uint64_t nCount = nLen;
for(uint32_t i = 0; i < nExtraSizeBytes; ++i)
{
nExtraSize[nExtraSizeBytes - i - 1] = nCount & 0xff;
nCount >>= 8;
}
uint32_t nSize = 0;
for(uint32_t i = 0; i < nExtraSizeBytes; i++)
{
nSize <<= 8;
nSize += nExtraSize[i];
}
MP_ASSERT(nSize == nLen); // verify
}
else
{
h1.payload = nLen;
}
MP_ASSERT(pMessage == S.WSBuf.pBuffer); // space for header is preallocated here
MP_ASSERT(pMessage == S.WSBuf.pBufferAllocation + 20); // space for header is preallocated here
MP_ASSERT(nExtraSizeBytes < 18);
char* pTmp = (char*)(pMessage - nExtraSizeBytes - 2);
memcpy(pTmp + 2, &nExtraSize[0], nExtraSizeBytes);
pTmp[1] = *(char*)&h1;
pTmp[0] = *(char*)&h0;
// MicroProfileSocketSend(Connection, pTmp, nExtraSizeBytes + 2 + nLen);
#if 1
MicroProfileSocketSend(Connection, &h0, 1);
MicroProfileSocketSend(Connection, &h1, 1);
if(nExtraSizeBytes)
{
MicroProfileSocketSend(Connection, &nExtraSize[0], nExtraSizeBytes);
}
MicroProfileSocketSend(Connection, pMessage, nLen);
#endif
return true;
}
void MicroProfileWebSocketClearTimers()
{
while(S.WebSocketTimers > -1)
{
int nNext = S.TimerInfo[S.WebSocketTimers].nWSNext;
S.TimerInfo[S.WebSocketTimers].nWSNext = -2;
S.WebSocketTimers = nNext;
}
MP_ASSERT(S.WebSocketTimers == -1);
while(S.WebSocketCounters > -1)
{
int nNext = S.CounterInfo[S.WebSocketCounters].nWSNext;
S.CounterInfo[S.WebSocketCounters].nWSNext = -2;
S.WebSocketCounters = nNext;
}
MP_ASSERT(S.WebSocketCounters == -1);
while(S.WebSocketGroups > -1)
{
int nNext = S.GroupInfo[S.WebSocketGroups].nWSNext;
S.GroupInfo[S.WebSocketGroups].nWSNext = -2;
S.WebSocketGroups = nNext;
}
MP_ASSERT(S.WebSocketGroups == -1);
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
}
void MicroProfileWebSocketToggleTimer(uint32_t nTimer)
{
if(nTimer < S.nTotalTimers)
{
auto& TI = S.TimerInfo[nTimer];
int* pPrev = &S.WebSocketTimers;
while(*pPrev > -1 && *pPrev != (int)nTimer)
{
MP_ASSERT(*pPrev < (int)S.nTotalTimers && *pPrev >= 0);
pPrev = &S.TimerInfo[*pPrev].nWSNext;
}
if(TI.nWSNext >= -1)
{
MP_ASSERT(*pPrev == (int)nTimer);
*pPrev = TI.nWSNext;
TI.nWSNext = -2;
}
else
{
MP_ASSERT(*pPrev == -1);
TI.nWSNext = -1;
*pPrev = (int)nTimer;
}
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
}
}
void MicroProfileWebSocketToggleCounter(uint32_t nCounter)
{
if(nCounter < S.nNumCounters)
{
auto& TI = S.CounterInfo[nCounter];
int* pPrev = &S.WebSocketCounters;
while(*pPrev > -1 && *pPrev != (int)nCounter)
{
MP_ASSERT(*pPrev < (int)S.nNumCounters && *pPrev >= 0);
pPrev = &S.CounterInfo[*pPrev].nWSNext;
}
if(TI.nWSNext >= -1)
{
MP_ASSERT(*pPrev == (int)nCounter);
*pPrev = TI.nWSNext;
TI.nWSNext = -2;
}
else
{
MP_ASSERT(*pPrev == -1);
TI.nWSNext = -1;
*pPrev = (int)nCounter;
}
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
}
}
void MicroProfileWebSocketToggleGroup(uint32_t nGroup)
{
if(nGroup < S.nGroupCount)
{
auto& TI = S.GroupInfo[nGroup];
int* pPrev = &S.WebSocketGroups;
while(*pPrev > -1 && *pPrev != (int)nGroup)
{
MP_ASSERT(*pPrev < (int)S.nGroupCount && *pPrev >= 0);
pPrev = &S.GroupInfo[*pPrev].nWSNext;
}
if(TI.nWSNext >= -1)
{
MP_ASSERT(*pPrev == (int)nGroup);
*pPrev = TI.nWSNext;
TI.nWSNext = -2;
}
else
{
MP_ASSERT(*pPrev == -1);
TI.nWSNext = -1;
*pPrev = (int)nGroup;
}
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
}
}
bool MicroProfileWebSocketTimerEnabled(uint32_t nTimer)
{
if(nTimer < S.nTotalTimers)
{
return S.TimerInfo[nTimer].nWSNext > -2;
}
return false;
}
bool MicroProfileWebSocketCounterEnabled(uint32_t nCounter)
{
if(nCounter < S.nNumCounters)
{
return S.CounterInfo[nCounter].nWSNext > -2;
}
return false;
}
void MicroProfileWebSocketCommand(uint32_t nCommand)
{
uint32_t nType, nElement;
MicroProfileWebSocketIdUnpack(nCommand, nType, nElement);
switch(nType)
{
case TYPE_NONE:
break;
case TYPE_SETTING:
switch(nElement)
{
case SETTING_FORCE_ENABLE:
MicroProfileSetEnableAllGroups(!MicroProfileGetEnableAllGroups());
break;
case SETTING_CONTEXT_SWITCH_TRACE:
if(!S.bContextSwitchRunning)
{
MicroProfileStartContextSwitchTrace();
}
else
{
MicroProfileStopContextSwitchTrace();
}
break;
case SETTING_PLATFORM_MARKERS:
MicroProfilePlatformMarkersSetEnabled(!MicroProfilePlatformMarkersGetEnabled());
break;
}
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
break;
case TYPE_TIMER:
MicroProfileWebSocketToggleTimer(nElement);
break;
case TYPE_GROUP:
MicroProfileToggleGroup(nElement);
break;
case TYPE_CATEGORY:
MicroProfileToggleCategory(nElement);
break;
case TYPE_COUNTER:
MicroProfileWebSocketToggleCounter(nElement);
break;
default:
uprintf("unknown type %d\n", nType);
}
}
#define MICROPROFILE_PRESET_HEADER_MAGIC2 0x28586813
#define MICROPROFILE_PRESET_HEADER_VERSION2 0x00000200
struct MicroProfileSettingsFileHeader
{
uint32_t nMagic;
uint32_t nVersion;
uint32_t nNumHeaders;
uint32_t nHeadersOffset;
uint32_t nMaxJsonSize;
uint32_t nMaxNameSize;
};
struct MicroProfileSettingsHeader
{
uint32_t nJsonOffset;
uint32_t nJsonSize;
uint32_t nNameOffset;
uint32_t nNameSize;
};
template <typename T>
void MicroProfileParseSettings(const char* pFileName, T CB)
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex());
FILE* F = fopen(pFileName, "rb");
if(!F)
{
return;
}
long nFileSize = 0;
fseek(F, 0, SEEK_END);
nFileSize = ftell(F);
char* pFile = 0;
char* pAlloc = 0;
if(nFileSize > (32 << 10))
{
pFile = pAlloc = (char*)MP_ALLOC(nFileSize + 1, 1);
}
else
{
pFile = (char*)alloca(nFileSize + 1);
}
fseek(F, 0, SEEK_SET);
if(1 != fread(pFile, nFileSize, 1, F))
{
uprintf("failed to read settings file\n");
fclose(F);
return;
}
fclose(F);
pFile[nFileSize] = '\0';
char* pPos = pFile;
char* pEnd = pFile + nFileSize;
while(pPos != pEnd)
{
const char* pName = 0;
int nNameLen = 0;
const char* pJson = 0;
int nJsonLen = 0;
int Failed = 0;
auto SkipWhite = [&](char* pPos, const char* pEnd)
{
while(pPos != pEnd)
{
if(isspace(*pPos))
{
pPos++;
}
else if('#' == *pPos)
{
while(pPos != pEnd && *pPos != '\n')
{
++pPos;
}
}
else
{
break;
}
}
return pPos;
};
auto ParseName = [&](char* pPos, char* pEnd, const char** ppName, int* pLen)
{
pPos = SkipWhite(pPos, pEnd);
int nLen = 0;
*ppName = pPos;
while(pPos != pEnd && (isalpha(*pPos) || isdigit(*pPos) || *pPos == '_'))
{
nLen++;
pPos++;
}
*pLen = nLen;
if(pPos == pEnd || !isspace(*pPos))
{
Failed = 1;
return pEnd;
}
*pPos++ = '\0';
return pPos;
};
auto ParseJson = [&](char* pPos, char* pEnd, const char** pJson, int* pLen) -> char*
{
pPos = SkipWhite(pPos, pEnd);
if(*pPos != '{' || pPos == pEnd)
{
Failed = 1;
return pPos;
}
*pJson = pPos++;
int nLen = 1;
int nDepth = 1;
while(pPos != pEnd && nDepth != 0)
{
nLen++;
char nChar = *pPos++;
if(nChar == '{')
{
nDepth++;
}
else if(nChar == '}')
{
nDepth--;
}
}
if(pPos == pEnd || !isspace(*pPos))
{
Failed = 1;
return pEnd;
}
*pLen = nLen;
*pPos++ = '\0';
return pPos;
};
pPos = ParseName(pPos, pEnd, &pName, &nNameLen);
pPos = ParseJson(pPos, pEnd, &pJson, &nJsonLen);
if(Failed)
{
break;
}
if(!CB(pName, nNameLen, pJson, nJsonLen))
{
break;
}
}
if(pAlloc)
MP_FREE(pAlloc);
}
bool MicroProfileSavePresets(const char* pSettingsName, const char* pJsonSettings)
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex());
FILE* F = fopen(S.pSettingsTemp, "w");
if(!F)
{
return false;
}
bool bWritten = false;
MicroProfileParseSettings(S.pSettings,
[&](const char* pName, uint32_t nNameSize, const char* pJson, uint32_t nJsonSize) -> bool
{
fwrite(pName, nNameSize, 1, F);
fputc(' ', F);
if(0 != MP_STRCASECMP(pSettingsName, pName))
{
fwrite(pJson, nJsonSize, 1, F);
}
else
{
bWritten = true;
fwrite(pJsonSettings, strlen(pJsonSettings), 1, F);
}
fputc('\n', F);
return true;
});
if(!bWritten)
{
fwrite(pSettingsName, strlen(pSettingsName), 1, F);
fputc(' ', F);
fwrite(pJsonSettings, strlen(pJsonSettings), 1, F);
fputc('\n', F);
}
fflush(F);
fclose(F);
#ifdef MICROPROFILE_MOVE_FILE
MICROPROFILE_MOVE_FILE(S.pSettingsTemp, S.pSettings);
#elif defined(_WIN32)
MoveFileExA(S.pSettingsTemp, S.pSettings, MOVEFILE_REPLACE_EXISTING);
#else
rename(S.pSettingsTemp, S.pSettings);
#endif
return false;
}
void MicroProfileWriteJsonString(const char* pJson, uint32_t nJsonLen)
{
char* pCur = (char*)pJson;
char* pEnd = pCur + nJsonLen;
MicroProfileWSPrintf("\"", pCur);
while(pCur != pEnd)
{
char* pTag = strchr(pCur, '\"');
if(pTag)
{
*pTag = '\0';
MicroProfileWSPrintf("%s\\\"", pCur);
*pTag = '\"';
pCur = pTag + 1;
}
else
{
MicroProfileWSPrintf("%s\"", pCur);
pCur = pEnd;
}
}
};
void MicroProfileWebSocketSendPresets(MpSocket Connection)
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex());
uprintf("sending presets ... \n");
MicroProfileWSPrintStart(Connection);
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{", MSG_PRESETS);
MicroProfileWSPrintf("\"p\":{\"Default\":\"{}\"");
MicroProfileParseSettings(S.pSettings,
[](const char* pName, uint32_t nNameLen, const char* pJson, uint32_t nJsonLen)
{
MicroProfileWSPrintf(",\"%s\":", pName);
MicroProfileWriteJsonString(pJson, nJsonLen);
return true;
});
MicroProfileWSPrintf("},\"r\":{");
bool bFirst = true;
MicroProfileParseSettings(S.pSettingsReadOnly,
[&bFirst](const char* pName, uint32_t nNameLen, const char* pJson, uint32_t nJsonLen)
{
MicroProfileWSPrintf("%c\"%s\":", bFirst ? ' ' : ',', pName);
MicroProfileWriteJsonString(pJson, nJsonLen);
bFirst = false;
return true;
});
MicroProfileWSPrintf("}}}");
MicroProfileWSFlush();
MicroProfileWSPrintEnd();
}
#define LOAD_PRESET_DEFAULT 0x1
#define LOAD_PRESET_READONLY 0x2
void MicroProfileLoadPresets(const char* pSettingsName, uint32_t nLoadPresetType)
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileGetMutex());
const char* pPresetFiles[] = { S.pSettings, S.pSettingsReadOnly };
for(uint32_t i = 0; i < 2; ++i)
{
if(nLoadPresetType & (1u << i))
{
const char* pPresetFile = pPresetFiles[i];
bool bReadOnly = (1u << i) == LOAD_PRESET_READONLY;
bool bSuccess = false;
MicroProfileParseSettings(pPresetFile,
[&bSuccess, bReadOnly, pSettingsName](const char* pName, uint32_t l0, const char* pJson, uint32_t l1)
{
if(0 == MP_STRCASECMP(pName, pSettingsName))
{
uint32_t nLen = (uint32_t)strlen(pJson) + 1;
if(nLen > S.nJsonSettingsBufferSize)
{
if(S.pJsonSettings)
S.pJsonSettings = nullptr;
S.pJsonSettings = (char*)MP_ALLOC(nLen, 1);
S.nJsonSettingsBufferSize = nLen;
}
S.pJsonSettingsName = pSettingsName;
memcpy(S.pJsonSettings, pJson, nLen);
S.nJsonSettingsPending = 1;
S.bJsonSettingsReadOnly = bReadOnly ? 1 : 0;
bSuccess = true;
return false;
}
return true;
});
if(bSuccess)
return;
}
}
}
bool MicroProfileWebSocketReceive(MpSocket Connection)
{
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-------+-+-------------+-------------------------------+
// |F|R|R|R| opcode|M| Payload len | Extended payload length |
// |I|S|S|S| (4) |A| (7) | (16/64) |
// |N|V|V|V| |S| | (if payload len==126/127) |
// | |1|2|3| |K| | |
// +-+-+-+-+-------+-+-------------+ - - - - - - - - - - - - - - - +
int r;
uint64_t nSize;
uint64_t nSizeBytes = 0;
uint8_t Mask[4];
static unsigned char* Bytes = 0;
static uint64_t BytesAllocated = 0;
MicroProfileWebSocketHeader0 h0;
MicroProfileWebSocketHeader1 h1;
static_assert(sizeof(h0) == 1, "");
static_assert(sizeof(h1) == 1, "");
r = recv(Connection, (char*)&h0, 1, 0);
if(1 != r)
goto fail;
r = recv(Connection, (char*)&h1, 1, 0);
if(1 != r)
goto fail;
if(h0.v == 0x88)
{
goto fail;
}
if(h0.RSV1 != 0 || h0.RSV2 != 0 || h0.RSV3 != 0)
goto fail;
nSize = h1.payload;
nSizeBytes = 0;
switch(nSize)
{
case 126:
nSizeBytes = 2;
break;
case 127:
nSizeBytes = 8;
break;
default:
break;
}
if(nSizeBytes)
{
nSize = 0;
uint64_t MessageLength = 0;
uint8_t BytesMessage[8];
r = recv(Connection, (char*)&BytesMessage[0], nSizeBytes, 0);
if((int)nSizeBytes != r)
goto fail;
for(uint32_t i = 0; i < nSizeBytes; i++)
{
nSize <<= 8;
nSize += BytesMessage[i];
}
for(uint32_t i = 0; i < nSizeBytes; i++)
MessageLength |= BytesMessage[i] << ((nSizeBytes - 1 - i) * 8);
MP_ASSERT(MessageLength == nSize);
}
if(h1.MASK)
{
recv(Connection, (char*)&Mask[0], 4, 0);
}
MICROPROFILE_COUNTER_LOCAL_ADD_ATOMIC(g_MicroProfileBytesPerFlip, nSize);
if(nSize + 1 > BytesAllocated)
{
Bytes = (unsigned char*)MP_REALLOC(Bytes, nSize + 1);
BytesAllocated = nSize + 1;
}
recv(Connection, (char*)Bytes, nSize, 0);
for(uint32_t i = 0; i < nSize; ++i)
Bytes[i] ^= Mask[i & 3];
Bytes[nSize] = '\0';
switch(Bytes[0])
{
case 'a':
{
S.nAggregateFlip = strtoll((const char*)&Bytes[1], nullptr, 10);
}
break;
case 's':
{
char* pJson = strchr((char*)Bytes, ',');
if(pJson && *pJson != '\0')
{
*pJson = '\0';
MicroProfileSavePresets((const char*)Bytes + 1, (const char*)pJson + 1);
}
break;
}
case 'l':
{
MicroProfileLoadPresets((const char*)Bytes + 1, LOAD_PRESET_DEFAULT);
break;
}
case 'm':
{
MicroProfileLoadPresets((const char*)Bytes + 1, LOAD_PRESET_READONLY);
break;
}
case 'd':
{
MicroProfileWebSocketClearTimers();
memset(&S.nActiveGroupsWanted, 0, sizeof(S.nActiveGroupsWanted));
S.nWebSocketDirty |= MICROPROFILE_WEBSOCKET_DIRTY_ENABLED;
break;
}
case 'c':
{
char* pStr = (char*)Bytes + 1;
char* pEnd = pStr + nSize - 1;
uint32_t Message = strtol(pStr, &pEnd, 10);
MicroProfileWebSocketCommand(Message);
}
break;
case 'f':
MicroProfileToggleFrozen();
break;
case 'v':
S.nWSViewMode = (int)Bytes[1] - '0';
break;
case 'r':
uprintf("got clear message\n");
S.nAggregateClear = 1;
break;
case 'x':
MicroProfileWebSocketClearTimers();
break;
#if MICROPROFILE_DYNAMIC_INSTRUMENT
case 'D': // instrumentation without loading queryable symbols.
{
uprintf("got INSTRUMENT Message: %s\n", (const char*)&Bytes[0]);
char* pGet = (char*)&Bytes[1];
uint32_t nNumArguments = 0;
#ifdef _WIN32
r = sscanf_s(pGet, "%d", &nNumArguments);
#else
r = sscanf(pGet, "%d", &nNumArguments);
#endif
if(r != 1)
{
uprintf("failed to parse..\n");
break;
}
while(' ' == *pGet || (*pGet >= '0' && *pGet <= '9'))
{
pGet++;
}
if(nNumArguments > 200)
nNumArguments = 200;
uint32_t nParsedArguments = 0;
const char* pModule = 0;
const char* pSymbol = 0;
const char** pModules = (const char**)(alloca(sizeof(const char*) * nNumArguments));
const char** pSymbols = (const char**)(alloca(sizeof(const char*) * nNumArguments));
auto Next = [&pGet]() -> const char*
{
if(!pGet)
return 0;
const char* pRet = pGet;
pGet = (char*)strchr(pRet, '!');
if(!pGet)
{
return 0;
}
*pGet++ = '\0';
return (const char*)pRet;
};
do
{
pModule = Next();
pSymbol = Next();
if(pModule && pSymbol)
{
pModules[nParsedArguments] = pModule;
pSymbols[nParsedArguments] = pSymbol;
uprintf("found symbol %s ::: %s \n", pModule, pSymbol);
nParsedArguments++;
if(nParsedArguments == nNumArguments)
{
break;
}
}
} while(pGet);
MicroProfileInstrumentWithoutSymbols(pModules, pSymbols, nParsedArguments);
break;
}
case 'I':
case 'i':
{
uprintf("got Message: %s\n", (const char*)&Bytes[0]);
void* p = 0;
uint32_t nColor = 0x0;
int nMinBytes = 0;
int nMaxCalls = 0;
int nCharsRead = 0;
#ifdef _WIN32
r = sscanf_s((const char*)&Bytes[1], "%p %x %d %d%n", &p, &nColor, &nMinBytes, &nMaxCalls, &nCharsRead);
#else
r = sscanf((const char*)&Bytes[1], "%p %x %d %d%n", &p, &nColor, &nMinBytes, &nMaxCalls, &nCharsRead);
#endif
if(r == 4)
{
const char* pModule = (const char*)&Bytes[1];
// int nNumChars = stbsp_snprintf(0, 0, "%p %x", p, nColor);
pModule += nCharsRead;
while(*pModule != ' ' && *pModule != '\0')
++pModule;
if(*pModule == '\0')
break;
pModule++;
const char* pName = pModule;
while(*pName != '!' && *pName != '\0')
{
pName++;
}
if(*pName == '!')
{
// name and module seperately
*(char*)pName = '\0';
pName++;
}
else
{
// name only
pName = pModule;
pModule = "";
}
uprintf("scanning for ptr %p %x mod:'%s' name'%s'\n", p, nColor, pModule, pName);
if(Bytes[0] == 'I')
{
MicroProfileInstrumentFunctionsCalled(p, pModule, pName, nMinBytes, nMaxCalls);
}
else
{
MicroProfileInstrumentFunction(p, pModule, pName, nColor);
}
}
}
break;
case 'S':
uprintf("loading symbols...\n");
MicroProfileSymbolInitialize(true);
break;
case 'q':
MicroProfileSymbolQueryFunctions(Connection, 1 + (const char*)Bytes);
break;
case 'L':
uprintf("LOAD MODULE: '%s'\n", 1 + (const char*)Bytes);
MicroProfileSymbolInitialize(true, 1 + (const char*)Bytes);
break;
#else
case 'D':
case 'I':
case 'i':
case 'S':
case 'q':
case 'L':
break;
#endif
default:
uprintf("got unknown message size %lld: '%s'\n", (long long)nSize, Bytes);
}
return true;
fail:
return false;
}
void MicroProfileWebSocketSendPresets(MpSocket Connection);
void MicroProfileWebSocketHandshake(MpSocket Connection, char* pWebSocketKey)
{
// reset web socket buffer
S.WSBuf.nSendPut.store(0);
S.WSBuf.nSendGet.store(0);
S.nSocketFail = 0;
const char* pGUID = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11";
const char* pHandShake = "HTTP/1.1 101 Switching Protocols\r\n"
"Upgrade: websocket\r\n"
"Connection: Upgrade\r\n"
"Sec-WebSocket-Accept: ";
char EncodeBuffer[512];
int nLen = stbsp_snprintf(EncodeBuffer, sizeof(EncodeBuffer) - 1, "%s%s", pWebSocketKey, pGUID);
// uprintf("encode buffer is '%s' %d, %d\n", EncodeBuffer, nLen, (int)strlen(EncodeBuffer));
uint8_t sha[20];
MicroProfile_SHA1_CTX ctx;
MicroProfile_SHA1_Init(&ctx);
MicroProfile_SHA1_Update(&ctx, (unsigned char*)EncodeBuffer, nLen);
MicroProfile_SHA1_Final((unsigned char*)&sha[0], &ctx);
char HashOut[(2 + sizeof(sha) / 3) * 4];
memset(&HashOut[0], 0, sizeof(HashOut));
MicroProfileBase64Encode(&HashOut[0], &sha[0], sizeof(sha));
char Reply[11024];
nLen = stbsp_snprintf(Reply, sizeof(Reply) - 1, "%s%s\r\n\r\n", pHandShake, HashOut);
;
MP_ASSERT(nLen >= 0);
MicroProfileSocketSend(Connection, Reply, nLen);
S.WebSockets[S.nNumWebSockets++] = Connection;
S.WSCategoriesSent = 0;
S.WSGroupsSent = 0;
S.WSTimersSent = 0;
S.WSCountersSent = 0;
S.nJsonSettingsPending = 0;
#if MICROPROFILE_DYNAMIC_INSTRUMENT
S.WSFunctionsInstrumentedSent = 0;
S.WSSymbolModulesSent = 0;
{
uint64_t t0 = MP_TICK();
MicroProfileSymbolUpdateModuleList();
uint64_t t1 = MP_TICK();
float fTime = float(MicroProfileTickToMsMultiplierCpu()) * (t1 - t0);
(void)fTime;
uprintf("update module list time %6.2fms\n", fTime);
}
#endif
MicroProfileWebSocketSendState(Connection);
MicroProfileWebSocketSendPresets(Connection);
if(!S.nWSWasConnected)
{
S.nWSWasConnected = 1;
MicroProfileLoadPresets("Default", LOAD_PRESET_DEFAULT | LOAD_PRESET_READONLY);
}
else
{
#if MICROPROFILE_DYNAMIC_INSTRUMENT
MicroProfileWSPrintStart(Connection);
MicroProfileWSPrintf("{\"k\":\"%d\",\"qp\":%d}", MSG_QUERY_INDEX, S.nQueryProcessed);
MicroProfileWSFlush();
MicroProfileWSPrintEnd();
#endif
if(S.pJsonSettings)
{
MicroProfileWSPrintStart(Connection);
MicroProfileWSPrintf(
"{\"k\":\"%d\",\"ro\":%d,\"name\":\"%s\",\"v\":%s}", MSG_CURRENTSETTINGS, S.bJsonSettingsReadOnly ? 1 : 0, S.pJsonSettingsName ? S.pJsonSettingsName : "", S.pJsonSettings);
MicroProfileWSFlush();
MicroProfileWSPrintEnd();
}
}
}
void MicroProfileWebSocketSendCounters()
{
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileWebSocketSendCounters", MP_GREEN4);
if(S.nWSViewMode == VIEW_COUNTERS)
{
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":[", MSG_COUNTERS);
for(uint32_t i = 0; i < S.nNumCounters; ++i)
{
bool IsDouble = (S.CounterInfo[i].nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) != 0;
if(IsDouble)
{
double dCounter = S.CountersDouble[i].load();
MicroProfileWSPrintf("%c%f", i == 0 ? ' ' : ',', dCounter);
}
else
{
uint64_t nCounter = S.Counters[i].load();
MicroProfileWSPrintf("%c%lld", i == 0 ? ' ' : ',', nCounter);
}
}
MicroProfileWSPrintf("]}");
MicroProfileWSFlush();
}
}
#if MICROPROFILE_DYNAMIC_INSTRUMENT
void MicroProfileSymbolSendModuleState()
{
if(S.WSSymbolModulesSent != S.SymbolNumModules || S.nSymbolsDirty.load()) // todo: tag when modulestate is updated.
{
S.nSymbolsDirty.exchange(0);
MicroProfileWSPrintf(",\"M\":[");
bool bFirst = true;
for(int i = 0; i < S.SymbolNumModules; ++i)
{
MicroProfileSymbolModule& M = S.SymbolModules[i];
const char* pModuleName = (const char*)M.pBaseString;
uint64_t nAddrBegin = M.Regions[0].nBegin;
// intptr_t nProgress = M.nProgress;
intptr_t nProgressTarget = M.nProgressTarget;
nProgressTarget = MicroProfileMax(intptr_t(1), M.nProgressTarget);
// nProgress = MicroProfileMin(nProgressTarget, M.nProgress);
float fLoadPrc = M.nProgress / float(nProgressTarget);
uint64_t nNumSymbols = M.nSymbolsLoaded;
#define FMT "{\"n\":\"%s\",\"a\":\"%llx\",\"s\":\"%lld\", \"p\":%f, \"d\":%d}"
MicroProfileWSPrintf(bFirst ? FMT : ("," FMT), pModuleName, nAddrBegin, nNumSymbols, fLoadPrc, M.bDownloading ? 1 : 0);
#undef FMT
bFirst = false;
}
MicroProfileWSPrintf("]");
S.WSSymbolModulesSent = S.SymbolNumModules;
}
}
#endif
void MicroProfileWebSocketSendFrame(MpSocket Connection)
{
if(S.nFrameCurrent != S.WebSocketFrameLast[0] || S.nFrozen)
{
MicroProfileWebSocketSendState(Connection);
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileWebSocketSendFrame", MP_GREEN4);
MicroProfileWSPrintStart(Connection);
float fTickToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
float fTickToMsGpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondGpu());
MicroProfileFrameState* pFrameCurrent = &S.Frames[S.nFrameCurrent];
MicroProfileFrameState* pFrameNext = &S.Frames[S.nFrameNext];
uint64_t nFrameTicks = pFrameNext->nFrameStartCpu - pFrameCurrent->nFrameStartCpu;
uint64_t nFrame = pFrameCurrent->nFrameId;
double fTime = nFrameTicks * fTickToMsCpu;
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"t\":%f,\"f\":%lld,\"a\":%d,\"fr\":%d,\"m\":%d", MSG_FRAME, fTime, nFrame, MicroProfileGetCurrentAggregateFrames(), S.nFrozen, S.nWSViewMode);
#if MICROPROFILE_DYNAMIC_INSTRUMENT
MicroProfileWSPrintf(",\"s\":{\"n\":%d,\"f\":%d,\"r\":%d,\"l\":%d,\"q\":%d}",
S.SymbolNumModules,
S.SymbolState.nModuleLoadsFinished.load(),
S.SymbolState.nModuleLoadsRequested.load(),
S.SymbolState.nSymbolsLoaded.load(),
S.pPendingQuery ? 1 : 0);
MicroProfileSymbolSendModuleState();
#endif
auto WriteTickArray = [fTickToMsCpu, fTickToMsGpu](MicroProfile::GroupTime* pFrameGroup)
{
MicroProfileWSPrintf("[");
int f = 0;
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
{
uint64_t nTicksExcl = pFrameGroup[i].nTicksExclusive;
if(nTicksExcl)
{
uint64_t nTicks = pFrameGroup[i].nTicks;
float fCount = (float)pFrameGroup[i].nCount;
float fToMs = S.GroupInfo[i].Type == MicroProfileTokenTypeCpu ? fTickToMsCpu : fTickToMsGpu;
MicroProfileWSPrintf("%c[%f,%f,%f]", f ? ',' : ' ', nTicks * fToMs, nTicksExcl * fToMs, fCount);
f = 1;
}
}
MicroProfileWSPrintf("]");
};
auto WriteIndexArray = [](MicroProfile::GroupTime* pFrameGroup)
{
MicroProfileWSPrintf("[");
int f = 0;
for(uint32_t i = 0; i < MICROPROFILE_MAX_GROUPS; ++i)
{
uint64_t nTicksExcl = pFrameGroup[i].nTicksExclusive;
if(nTicksExcl)
{
uint32_t id = MicroProfileWebSocketIdPack(TYPE_GROUP, i);
MicroProfileWSPrintf("%c%d", f ? ',' : ' ', id);
f = 1;
}
}
MicroProfileWSPrintf("]");
};
MicroProfileWSPrintf(",\"g\":");
WriteTickArray(S.FrameGroup);
MicroProfileWSPrintf(",\"gi\":");
WriteIndexArray(S.FrameGroup);
if(S.nWSViewMode == VIEW_GRAPH_THREAD_GROUP)
{
MicroProfileWSPrintf(",\"gt\":[");
int f = 0;
for(uint32_t i = 0; i < MICROPROFILE_MAX_THREADS; ++i)
{
if(0 != (S.FrameGroupThreadValid[i / 32] & (1 << (i % 32))))
{
if(!f)
MicroProfileWSPrintf("{");
else
MicroProfileWSPrintf(",{");
MicroProfileThreadLog* pLog = S.Pool[i];
MicroProfileWSPrintf("\"i\":%d,\"n\":\"%s\",\"g\":", i, pLog->ThreadName);
WriteTickArray(&S.FrameGroupThread[i][0]);
MicroProfileWSPrintf(",\"gi\":");
WriteIndexArray(&S.FrameGroupThread[i][0]);
MicroProfileWSPrintf("}");
f = 1;
}
}
MicroProfileWSPrintf("]");
}
if(S.nFrameCurrent != S.WebSocketFrameLast[0])
{
MicroProfileWSPrintf(",\"x\":{\"t\":{");
int nTimer = S.WebSocketTimers;
// uprintf("T : ");
while(nTimer >= 0)
{
MicroProfileTimerInfo& TI = S.TimerInfo[nTimer];
float fTickToMs = TI.Type == MicroProfileTokenTypeGpu ? fTickToMsGpu : fTickToMsCpu;
uint32_t id = MicroProfileWebSocketIdPack(TYPE_TIMER, nTimer);
fTime = fTickToMs * S.Frame[nTimer].nTicks;
float fCount = (float)S.Frame[nTimer].nCount;
float fTimeExcl = fTickToMs * S.FrameExclusive[nTimer];
// uprintf("%4.2f, ", fTimeExcl);
if(!MicroProfileGroupActive(TI.nGroupIndex))
{
fTime = fCount = fTimeExcl = 0.f;
}
nTimer = TI.nWSNext;
MicroProfileWSPrintf("\"%d\":[%f,%f,%f]%c", id, fTime, fTimeExcl, fCount, nTimer == -1 ? ' ' : ',');
}
MicroProfileWSPrintf("}, \"c\":{");
int nCounter = S.WebSocketCounters;
while(nCounter >= 0)
{
MicroProfileCounterInfo& CI = S.CounterInfo[nCounter];
bool IsDouble = (CI.nFlags & MICROPROFILE_COUNTER_FLAG_DOUBLE) != 0;
uint32_t id = MicroProfileWebSocketIdPack(TYPE_COUNTER, nCounter);
int nCounterNext = CI.nWSNext;
if(IsDouble)
{
double value = S.CountersDouble[nCounter].load();
MicroProfileWSPrintf("\"%d\":%f%c", id, value, nCounterNext < 0 ? ' ' : ',');
}
else
{
uint64_t value = S.Counters[nCounter].load();
MicroProfileWSPrintf("\"%d\":%lld%c", id, value, nCounterNext < 0 ? ' ' : ',');
}
nCounter = nCounterNext;
}
MicroProfileWSPrintf("}, \"g\":{");
// uprintf("\n");
MicroProfileWSPrintf("}}");
}
MicroProfileWSPrintf("}}");
MicroProfileWSFlush();
MicroProfileWebSocketSendCounters();
MicroProfileWSPrintEnd();
S.WebSocketFrameLast[0] = S.nFrameCurrent;
}
else
{
MicroProfileWSPrintStart(Connection);
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"fr\":%d,\"m\":%d", MSG_INACTIVE_FRAME, S.nFrozen, S.nWSViewMode);
#if MICROPROFILE_DYNAMIC_INSTRUMENT
MicroProfileWSPrintf(",\"s\":{\"n\":%d,\"f\":%d,\"r\":%d,\"l\":%d,\"q\":%d}",
S.SymbolNumModules,
S.SymbolState.nModuleLoadsFinished.load(),
S.SymbolState.nModuleLoadsRequested.load(),
S.SymbolState.nSymbolsLoaded.load(),
S.pPendingQuery ? 1 : 0);
#endif
MicroProfileWSPrintf("}}");
MicroProfileWSFlush();
MicroProfileWebSocketSendCounters();
MicroProfileWSPrintEnd();
}
#if MICROPROFILE_DYNAMIC_INSTRUMENT
MicroProfileSymbolQuerySendResult(Connection);
MicroProfileSymbolSendFunctionNames(Connection);
MicroProfileSymbolSendErrors(Connection);
#endif
}
void MicroProfileWebSocketFrame()
{
if(!S.nNumWebSockets)
{
return;
}
MICROPROFILE_SCOPEI("MicroProfile", "Websocket-update", MP_GREEN4);
fd_set Read, Write, Error;
FD_ZERO(&Read);
FD_ZERO(&Write);
FD_ZERO(&Error);
MpSocket LastSocket = 1;
for(uint32_t i = 0; i < S.nNumWebSockets; ++i)
{
LastSocket = MicroProfileMax(LastSocket, S.WebSockets[i] + 1);
FD_SET(S.WebSockets[i], &Read);
FD_SET(S.WebSockets[i], &Write);
FD_SET(S.WebSockets[i], &Error);
}
timeval tv;
tv.tv_sec = 0;
tv.tv_usec = 0;
if(-1 == select(LastSocket, &Read, &Write, &Error, &tv))
{
MP_ASSERT(0);
}
for(uint32_t i = 0; i < S.nNumWebSockets;)
{
MpSocket s = S.WebSockets[i];
bool bConnected = true;
if(FD_ISSET(s, &Error))
{
MP_ASSERT(0); // todo, remove & fix.
}
if(FD_ISSET(s, &Read))
{
bConnected = MicroProfileWebSocketReceive(s);
}
if(FD_ISSET(s, &Write))
{
if(S.nJsonSettingsPending)
{
MicroProfileWSPrintStart(s);
MicroProfileWSPrintf(
"{\"k\":\"%d\",\"ro\":%d,\"name\":\"%s\",\"v\":%s}", MSG_LOADSETTINGS, S.bJsonSettingsReadOnly ? 1 : 0, S.pJsonSettingsName ? S.pJsonSettingsName : "", S.pJsonSettings);
MicroProfileWSFlush();
MicroProfileWSPrintEnd();
S.nJsonSettingsPending = 0;
}
if(S.nWebSocketDirty)
{
MicroProfileFlipEnabled();
MicroProfileWebSocketSendEnabled(s);
S.nWebSocketDirty = 0;
}
MicroProfileWebSocketSendFrame(s);
}
if(S.nSocketFail)
{
bConnected = false;
}
S.nSocketFail = 0;
if(!bConnected)
{
uprintf("removing socket %" PRId64 "\n", (uint64_t)s);
#ifndef _WIN32
shutdown(S.WebSockets[i], SHUT_WR);
#else
shutdown(S.WebSockets[i], 1);
#endif
char tmp[128];
int r = 1;
while(r > 0)
{
r = recv(S.WebSockets[i], tmp, sizeof(tmp), 0);
}
#ifdef _WIN32
closesocket(S.WebSockets[i]);
#else
close(S.WebSockets[i]);
#endif
--S.nNumWebSockets;
S.WebSockets[i] = S.WebSockets[S.nNumWebSockets];
uprintf("done removing\n");
}
else
{
++i;
}
}
if(S.nWasFrozen)
{
S.nWasFrozen--;
}
}
void MicroProfileWSPrintStart(MpSocket C)
{
MP_ASSERT(S.WSBuf.Socket == 0);
MP_ASSERT(S.WSBuf.nPut == 0);
S.WSBuf.Socket = C;
}
void MicroProfileResizeWSBuf(uint32_t nMinSize = 0)
{
uint32_t nNewSize = MicroProfileMax(S.WSBuf.nPut + 2 * (nMinSize + 2 + 20), MicroProfileMax(S.WSBuf.nBufferSize * 3 / 2, (uint32_t)MICROPROFILE_WEBSOCKET_BUFFER_SIZE));
S.WSBuf.pBufferAllocation = (char*)MICROPROFILE_REALLOC(S.WSBuf.pBufferAllocation, nNewSize);
S.WSBuf.pBuffer = S.WSBuf.pBufferAllocation + 20;
S.WSBuf.nBufferSize = nNewSize - 20;
}
char* MicroProfileWSPrintfCallback(const char* buf, void* user, int len)
{
MP_ASSERT(S.WSBuf.nPut == buf - S.WSBuf.pBuffer);
S.WSBuf.nPut += len;
if(S.WSBuf.nPut + STB_SPRINTF_MIN + 2 >= S.WSBuf.nBufferSize) //
{
MicroProfileResizeWSBuf(S.WSBuf.nPut + STB_SPRINTF_MIN);
}
return S.WSBuf.pBuffer + S.WSBuf.nPut;
}
void MicroProfileWSPrintf(const char* pFmt, ...)
{
if(!S.WSBuf.nBufferSize)
{
MicroProfileResizeWSBuf(STB_SPRINTF_MIN * 2);
}
va_list args;
va_start(args, pFmt);
MP_ASSERT(S.WSBuf.nPut + STB_SPRINTF_MIN < S.WSBuf.nBufferSize);
stbsp_vsprintfcb(MicroProfileWSPrintfCallback, 0, S.WSBuf.pBuffer + S.WSBuf.nPut, pFmt, args);
va_end(args);
}
void MicroProfileWSPrintEnd()
{
MP_ASSERT(S.WSBuf.nPut == 0);
S.WSBuf.Socket = 0;
}
void MicroProfileWSFlush()
{
MP_ASSERT(S.WSBuf.Socket != 0);
MP_ASSERT(S.WSBuf.nPut != 0);
MicroProfileWebSocketSend(S.WSBuf.Socket, &S.WSBuf.pBuffer[0], S.WSBuf.nPut);
S.WSBuf.nPut = 0;
}
void MicroProfileWebSocketSendEnabledMessage(uint32_t id, int bEnabled)
{
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"id\":%d,\"e\":%d}}", MSG_ENABLED, id, bEnabled ? 1 : 0);
MicroProfileWSFlush();
}
void MicroProfileWebSocketSendEnabled(MpSocket C)
{
MICROPROFILE_SCOPEI("MicroProfile", "Websocket-SendEnabled", MP_GREEN4);
MicroProfileWSPrintStart(C);
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
{
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_CATEGORY, i), MicroProfileCategoryEnabled(i));
}
for(uint32_t i = 0; i < S.nGroupCount; ++i)
{
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_GROUP, i), MicroProfileGroupEnabled(i));
}
for(uint32_t i = 0; i < S.nTotalTimers; ++i)
{
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_TIMER, i), MicroProfileWebSocketTimerEnabled(i));
}
for(uint32_t i = 0; i < S.nNumCounters; ++i)
{
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_COUNTER, i), MicroProfileWebSocketCounterEnabled(i));
}
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_FORCE_ENABLE), MicroProfileGetEnableAllGroups());
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_CONTEXT_SWITCH_TRACE), S.bContextSwitchRunning);
MicroProfileWebSocketSendEnabledMessage(MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_PLATFORM_MARKERS), MicroProfilePlatformMarkersGetEnabled());
MicroProfileWSPrintEnd();
}
void MicroProfileWebSocketSendEntry(uint32_t id, uint32_t parent, const char* pName, int nEnabled, uint32_t nColor, uint32_t nType)
{
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"id\":%d,\"pid\":%d,", MSG_TIMER_TREE, id, parent);
MicroProfileWSPrintf("\"name\":\"%s\",", pName);
MicroProfileWSPrintf("\"e\":%d,", nEnabled);
MicroProfileWSPrintf("\"type\":%d,", nType);
if(nColor == 0x42)
{
MicroProfileWSPrintf("\"color\":\"\"");
}
else
{
MicroProfileWSPrintf("\"color\":\"#%02x%02x%02x\"", MICROPROFILE_UNPACK_RED(nColor) & 0xff, MICROPROFILE_UNPACK_GREEN(nColor) & 0xff, MICROPROFILE_UNPACK_BLUE(nColor) & 0xff);
}
MicroProfileWSPrintf("}}");
MicroProfileWSFlush();
}
void MicroProfileWebSocketSendCounterEntry(uint32_t id, uint32_t parent, const char* pName, int nEnabled, int64_t nLimit, int nFormat)
{
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"id\":%d,\"pid\":%d,", MSG_TIMER_TREE, id, parent);
MicroProfileWSPrintf("\"name\":\"%s\",", pName);
MicroProfileWSPrintf("\"e\":%d,", nEnabled);
MicroProfileWSPrintf("\"limit\":%lld,", nLimit);
MicroProfileWSPrintf("\"format\":%d", nFormat);
MicroProfileWSPrintf("}}");
MicroProfileWSFlush();
}
void MicroProfileWebSocketSendState(MpSocket C)
{
if(S.WSCategoriesSent != S.nCategoryCount || S.WSGroupsSent != S.nGroupCount || S.WSTimersSent != S.nTotalTimers || S.WSCountersSent != S.nNumCounters)
{
MicroProfileWSPrintStart(C);
uint32_t root = MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_FORCE_ENABLE);
MicroProfileWebSocketSendEntry(root, 0, "All", MicroProfileGetEnableAllGroups(), (uint32_t)-1, 0);
for(uint32_t i = S.WSCategoriesSent; i < S.nCategoryCount; ++i)
{
MicroProfileCategory& CI = S.CategoryInfo[i];
uint32_t id = MicroProfileWebSocketIdPack(TYPE_CATEGORY, i);
uint32_t parent = root;
MicroProfileWebSocketSendEntry(id, parent, CI.pName, MicroProfileCategoryEnabled(i), 0xffffffff, 0);
}
for(uint32_t i = S.WSGroupsSent; i < S.nGroupCount; ++i)
{
MicroProfileGroupInfo& GI = S.GroupInfo[i];
uint32_t id = MicroProfileWebSocketIdPack(TYPE_GROUP, i);
uint32_t parent = MicroProfileWebSocketIdPack(TYPE_CATEGORY, GI.nCategory);
MicroProfileWebSocketSendEntry(id, parent, GI.pName, MicroProfileGroupEnabled(i), GI.nColor, GI.Type);
}
for(uint32_t i = S.WSTimersSent; i < S.nTotalTimers; ++i)
{
MicroProfileTimerInfo& TI = S.TimerInfo[i];
uint32_t id = MicroProfileWebSocketIdPack(TYPE_TIMER, i);
uint32_t parent = MicroProfileWebSocketIdPack(TYPE_GROUP, TI.nGroupIndex);
MicroProfileWebSocketSendEntry(id, parent, TI.pName, MicroProfileWebSocketTimerEnabled(i), TI.nColor, TI.Type);
}
for(uint32_t i = S.WSCountersSent; i < S.nNumCounters; ++i)
{
MicroProfileCounterInfo& CI = S.CounterInfo[i];
uint32_t id = MicroProfileWebSocketIdPack(TYPE_COUNTER, i);
uint32_t parent = CI.nParent == -1 ? 0u : MicroProfileWebSocketIdPack(TYPE_COUNTER, CI.nParent);
MicroProfileWebSocketSendCounterEntry(id, parent, CI.pName, MicroProfileWebSocketCounterEnabled(i), CI.nLimit, CI.eFormat);
}
#if MICROPROFILE_CONTEXT_SWITCH_TRACE
MicroProfileWebSocketSendEntry(MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_CONTEXT_SWITCH_TRACE), 0, "Context Switch Trace", S.bContextSwitchRunning, (uint32_t)-1, 0);
#endif
#if MICROPROFILE_PLATFORM_MARKERS
MicroProfileWebSocketSendEntry(MicroProfileWebSocketIdPack(TYPE_SETTING, SETTING_PLATFORM_MARKERS), 0, "Platform Markers", S.bContextSwitchRunning, (uint32_t)-1);
#endif
MicroProfileWSPrintEnd();
S.WSCategoriesSent = S.nCategoryCount;
S.WSGroupsSent = S.nGroupCount;
S.WSTimersSent = S.nTotalTimers;
S.WSCountersSent = S.nNumCounters;
}
}
bool MicroProfileWebServerUpdate()
{
MICROPROFILE_SCOPEI("MicroProfile", "Webserver-update", MP_GREEN4);
MpSocket Connection = accept(S.ListenerSocket, 0, 0);
bool bServed = false;
MicroProfileWebSocketFrame();
if(!MP_INVALID_SOCKET(Connection))
{
std::lock_guard<std::recursive_mutex> Lock(MicroProfileMutex());
char Req[8192];
int nReceived = recv(Connection, Req, sizeof(Req) - 1, 0);
if(nReceived > 0)
{
Req[nReceived] = '\0';
uprintf("req received\n%s", Req);
#define MICROPROFILE_HTML_PNG_HEADER "HTTP/1.0 200 OK\r\nContent-Type: image/png\r\n\r\n"
#define MICROPROFILE_HTML_JS_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/javascript\r\n\r\n"
#if MICROPROFILE_MINIZ
// Expires: Tue, 01 Jan 2199 16:00:00 GMT\r\n
#define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\nContent-Encoding: deflate\r\n\r\n"
#else
#define MICROPROFILE_HTML_HEADER "HTTP/1.0 200 OK\r\nContent-Type: text/html\r\n\r\n"
#endif
char* pHttp = strstr(Req, "HTTP/");
char* pGet = strstr(Req, "GET /");
char* pHost = strstr(Req, "Host: ");
char* pWebSocketKey = strstr(Req, "Sec-WebSocket-Key: ");
auto Terminate = [](char* pString)
{
char* pEnd = pString;
while(*pEnd != '\0')
{
if(*pEnd == '\r' || *pEnd == '\n' || *pEnd == ' ')
{
*pEnd = '\0';
return;
}
pEnd++;
}
};
if(pWebSocketKey)
{
if(S.nNumWebSockets) // only allow 1
{
return false;
}
pWebSocketKey += sizeof("Sec-WebSocket-Key: ") - 1;
Terminate(pWebSocketKey);
MicroProfileWebSocketHandshake(Connection, pWebSocketKey);
return false;
}
if(pHost)
{
pHost += sizeof("Host: ") - 1;
Terminate(pHost);
}
if(pHttp && pGet)
{
*pHttp = '\0';
pGet += sizeof("GET /") - 1;
Terminate(pGet);
MicroProfileParseGetResult R;
auto P = MicroProfileParseGet(pGet, &R);
switch(P)
{
case EMICROPROFILE_GET_COMMAND_SERVICE_WORKER:
{
MicroProfileSetNonBlocking(Connection, 1);
uint64_t nTickStart = MP_TICK();
send(Connection, MICROPROFILE_HTML_JS_HEADER, sizeof(MICROPROFILE_HTML_JS_HEADER) - 1, 0);
const char* JsCode = "self.addEventListener(\"fetch\", () => {}); \r\n\r\n";
send(Connection, JsCode, (int)strlen(JsCode), 0);
break;
}
case EMICROPROFILE_GET_COMMAND_FAVICON:
{
MicroProfileSetNonBlocking(Connection, 1);
uint64_t nTickStart = MP_TICK();
send(Connection, MICROPROFILE_HTML_PNG_HEADER, sizeof(MICROPROFILE_HTML_PNG_HEADER) - 1, 0);
extern const uint32_t uprof_512[];
extern const uint32_t uprof_512_len;
const char* pFile = (const char*)&uprof_512[0];
uint32_t nFileSize = uprof_512_len;
send(Connection, pFile, nFileSize, 0);
}
break;
case EMICROPROFILE_GET_COMMAND_LIVE:
{
MicroProfileSetNonBlocking(Connection, 0);
uint64_t nTickStart = MP_TICK();
send(Connection, MICROPROFILE_HTML_HEADER, sizeof(MICROPROFILE_HTML_HEADER) - 1, 0);
uint64_t nDataStart = S.nWebServerDataSent;
S.WebServerPut = 0;
#if 0 == MICROPROFILE_MINIZ
MicroProfileDumpHtmlLive(MicroProfileWriteSocket, &Connection);
uint64_t nDataEnd = S.nWebServerDataSent;
uint64_t nTickEnd = MP_TICK();
uint64_t nDiff = (nTickEnd - nTickStart);
float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
int nKb = ((nDataEnd-nDataStart)>>10) + 1;
int nCompressedKb = nKb;
MicroProfilePrintf(MicroProfileWriteSocket, &Connection, "\n<!-- Sent %dkb in %.2fms-->\n\n",nKb, fMs);
MicroProfileFlushSocket(Connection);
#else
MicroProfileCompressedSocketState CompressState;
MicroProfileCompressedSocketStart(&CompressState, Connection);
MicroProfileDumpHtmlLive(MicroProfileCompressedWriteSocket, &CompressState);
S.nWebServerDataSent += CompressState.nSize;
uint64_t nDataEnd = S.nWebServerDataSent;
uint64_t nTickEnd = MP_TICK();
uint64_t nDiff = (nTickEnd - nTickStart);
float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
int nKb = ((nDataEnd - nDataStart) >> 10) + 1;
int nCompressedKb = ((CompressState.nCompressedSize) >> 10) + 1;
MicroProfilePrintf(MicroProfileCompressedWriteSocket, &CompressState, "\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
MicroProfileCompressedSocketFinish(&CompressState);
MicroProfileFlushSocket(Connection);
#endif
uprintf("\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
(void)nCompressedKb;
}
break;
case EMICROPROFILE_GET_COMMAND_DUMP_RANGE:
case EMICROPROFILE_GET_COMMAND_DUMP:
{
{
MicroProfileSetNonBlocking(Connection, 0);
uint64_t nTickStart = MP_TICK();
send(Connection, MICROPROFILE_HTML_HEADER, sizeof(MICROPROFILE_HTML_HEADER) - 1, 0);
uint64_t nDataStart = S.nWebServerDataSent;
S.WebServerPut = 0;
#if 0 == MICROPROFILE_MINIZ
MicroProfileDumpHtml(MicroProfileWriteSocket, &Connection, R.nFrames, pHost, R.nFrameStart);
uint64_t nDataEnd = S.nWebServerDataSent;
uint64_t nTickEnd = MP_TICK();
uint64_t nDiff = (nTickEnd - nTickStart);
float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
int nKb = ((nDataEnd-nDataStart)>>10) + 1;
int nCompressedKb = nKb;
MicroProfilePrintf(MicroProfileWriteSocket, &Connection, "\n<!-- Sent %dkb in %.2fms-->\n\n",nKb, fMs);
MicroProfileFlushSocket(Connection);
#else
MicroProfileCompressedSocketState CompressState;
MicroProfileCompressedSocketStart(&CompressState, Connection);
MicroProfileDumpHtml(MicroProfileCompressedWriteSocket, &CompressState, R.nFrames, pHost, R.nFrameStart);
S.nWebServerDataSent += CompressState.nSize;
uint64_t nDataEnd = S.nWebServerDataSent;
uint64_t nTickEnd = MP_TICK();
uint64_t nDiff = (nTickEnd - nTickStart);
float fMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu()) * nDiff;
int nKb = ((nDataEnd - nDataStart) >> 10) + 1;
int nCompressedKb = ((CompressState.nCompressedSize) >> 10) + 1;
MicroProfilePrintf(MicroProfileCompressedWriteSocket, &CompressState, "\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
MicroProfileCompressedSocketFinish(&CompressState);
MicroProfileFlushSocket(Connection);
#endif
uprintf("\n<!-- Sent %dkb(compressed %dkb) in %.2fms-->\n\n", nKb, nCompressedKb, fMs);
(void)nCompressedKb;
}
}
break;
case EMICROPROFILE_GET_COMMAND_UNKNOWN:
{
uprintf("unknown get command %s\n", pGet);
}
break;
}
}
}
#ifdef _WIN32
closesocket(Connection);
#else
close(Connection);
#endif
}
return bServed;
}
#endif
#if MICROPROFILE_CONTEXT_SWITCH_TRACE
// functions that need to be implemented per platform.
void* MicroProfileTraceThread(void* unused);
int MicroProfileIsLocalThread(uint32_t nThreadId);
void MicroProfileStartContextSwitchTrace()
{
if(!S.bContextSwitchRunning && !S.nMicroProfileShutdown)
{
S.bContextSwitchRunning = true;
S.bContextSwitchStop = false;
MicroProfileThreadStart(&S.ContextSwitchThread, MicroProfileTraceThread);
}
}
void MicroProfileJoinContextSwitchTrace()
{
if(S.bContextSwitchStop)
{
MicroProfileThreadJoin(&S.ContextSwitchThread);
}
}
void MicroProfileStopContextSwitchTrace()
{
if(S.bContextSwitchRunning)
{
S.bContextSwitchStop = true;
}
}
#ifdef _WIN32
#define INITGUID
#include <evntcons.h>
#include <evntrace.h>
#include <strsafe.h>
static GUID g_MicroProfileThreadClassGuid = { 0x3d6fa8d1, 0xfe05, 0x11d0, 0x9d, 0xda, 0x00, 0xc0, 0x4f, 0xd7, 0xba, 0x7c };
struct MicroProfileSCSwitch
{
uint32_t NewThreadId;
uint32_t OldThreadId;
int8_t NewThreadPriority;
int8_t OldThreadPriority;
uint8_t PreviousCState;
int8_t SpareByte;
int8_t OldThreadWaitReason;
int8_t OldThreadWaitMode;
int8_t OldThreadState;
int8_t OldThreadWaitIdealProcessor;
uint32_t NewThreadWaitTime;
uint32_t Reserved;
};
VOID WINAPI MicroProfileContextSwitchCallback(PEVENT_TRACE pEvent)
{
if(pEvent->Header.Guid == g_MicroProfileThreadClassGuid)
{
if(pEvent->Header.Class.Type == 36)
{
MicroProfileSCSwitch* pCSwitch = (MicroProfileSCSwitch*)pEvent->MofData;
if((pCSwitch->NewThreadId != 0) || (pCSwitch->OldThreadId != 0))
{
MicroProfileContextSwitch Switch;
Switch.nThreadOut = pCSwitch->OldThreadId;
Switch.nThreadIn = pCSwitch->NewThreadId;
Switch.nCpu = pEvent->BufferContext.ProcessorNumber;
Switch.nTicks = pEvent->Header.TimeStamp.QuadPart;
MicroProfileContextSwitchPut(&Switch);
}
}
}
}
ULONG WINAPI MicroProfileBufferCallback(PEVENT_TRACE_LOGFILEA Buffer)
{
return (S.bContextSwitchStop || !S.bContextSwitchRunning) ? FALSE : TRUE;
}
struct MicroProfileKernelTraceProperties : public EVENT_TRACE_PROPERTIES
{
char dummy[sizeof(KERNEL_LOGGER_NAME)];
};
void MicroProfileContextSwitchShutdownTrace()
{
TRACEHANDLE SessionHandle = 0;
MicroProfileKernelTraceProperties sessionProperties;
ZeroMemory(&sessionProperties, sizeof(sessionProperties));
sessionProperties.Wnode.BufferSize = sizeof(sessionProperties);
sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID;
sessionProperties.Wnode.ClientContext = 1; // QPC clock resolution
sessionProperties.Wnode.Guid = SystemTraceControlGuid;
sessionProperties.BufferSize = 1;
sessionProperties.NumberOfBuffers = 128;
sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH;
sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
sessionProperties.MaximumFileSize = 0;
sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
sessionProperties.LogFileNameOffset = 0;
EVENT_TRACE_LOGFILEA log;
ZeroMemory(&log, sizeof(log));
log.LoggerName = (LPSTR)KERNEL_LOGGER_NAMEA;
log.ProcessTraceMode = 0;
TRACEHANDLE hLog = OpenTraceA(&log);
if(hLog)
{
ControlTrace(SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties, EVENT_TRACE_CONTROL_STOP);
}
CloseTrace(hLog);
}
typedef VOID(WINAPI* EventCallback)(PEVENT_TRACE);
typedef ULONG(WINAPI* BufferCallback)(PEVENT_TRACE_LOGFILEA);
bool MicroProfileStartWin32Trace(EventCallback EvtCb, BufferCallback BufferCB)
{
MicroProfileContextSwitchShutdownTrace();
ULONG status = ERROR_SUCCESS;
TRACEHANDLE SessionHandle = 0;
MicroProfileKernelTraceProperties sessionProperties;
ZeroMemory(&sessionProperties, sizeof(sessionProperties));
sessionProperties.Wnode.BufferSize = sizeof(sessionProperties);
sessionProperties.Wnode.Flags = WNODE_FLAG_TRACED_GUID;
sessionProperties.Wnode.ClientContext = 1; // QPC clock resolution
sessionProperties.Wnode.Guid = SystemTraceControlGuid;
sessionProperties.BufferSize = 1;
sessionProperties.NumberOfBuffers = 128;
sessionProperties.EnableFlags = EVENT_TRACE_FLAG_CSWITCH | EVENT_TRACE_FLAG_PROCESS;
sessionProperties.LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
sessionProperties.MaximumFileSize = 0;
sessionProperties.LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES);
sessionProperties.LogFileNameOffset = 0;
StopTrace(NULL, KERNEL_LOGGER_NAME, &sessionProperties);
status = StartTrace((PTRACEHANDLE)&SessionHandle, KERNEL_LOGGER_NAME, &sessionProperties);
if(ERROR_SUCCESS != status)
{
return false;
}
EVENT_TRACE_LOGFILEA log;
ZeroMemory(&log, sizeof(log));
log.LoggerName = (LPSTR)KERNEL_LOGGER_NAME;
log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
log.EventCallback = EvtCb;
log.BufferCallback = BufferCB;
TRACEHANDLE hLog = OpenTraceA(&log);
ProcessTrace(&hLog, 1, 0, 0);
CloseTrace(hLog);
MicroProfileContextSwitchShutdownTrace();
return true;
}
#include <psapi.h>
#include <tlhelp32.h>
#include <winternl.h>
#define ThreadQuerySetWin32StartAddress 9
typedef LONG NTSTATUS;
typedef NTSTATUS(WINAPI* pNtQIT)(HANDLE, LONG, PVOID, ULONG, PULONG);
#define STATUS_SUCCESS ((NTSTATUS)0x000 00000L)
#define ThreadQuerySetWin32StartAddress 9
#undef Process32First
#undef Process32Next
#undef PROCESSENTRY32
#undef Module32First
#undef Module32Next
#undef MODULEENTRY32
struct MicroProfileWin32ContextSwitchShared
{
std::atomic<int64_t> nPut;
std::atomic<int64_t> nGet;
std::atomic<int64_t> nQuit;
std::atomic<int64_t> nTickTrace;
std::atomic<int64_t> nTickProgram;
enum
{
BUFFER_SIZE = (2 << 20) / sizeof(MicroProfileContextSwitch),
};
MicroProfileContextSwitch Buffer[BUFFER_SIZE];
};
struct MicroProfileWin32ThreadInfo
{
struct Process
{
uint32_t pid;
uint32_t nNumModules;
uint32_t nModuleStart;
const char* pProcessModule;
};
struct Module
{
int64_t nBase;
int64_t nEnd;
const char* pName;
};
enum
{
MAX_PROCESSES = 5 * 1024,
MAX_THREADS = 20 * 1024,
MAX_MODULES = 20 * 1024,
MAX_STRINGS = 16 * 1024,
MAX_CHARS = 128 * 1024,
};
uint32_t nNumProcesses;
uint32_t nNumThreads;
uint32_t nStringOffset;
uint32_t nNumStrings;
uint32_t nNumModules;
Process P[MAX_PROCESSES];
Module M[MAX_MODULES];
MicroProfileThreadInfo T[MAX_THREADS];
const char* pStrings[MAX_STRINGS];
char StringData[MAX_CHARS];
};
static MicroProfileWin32ThreadInfo g_ThreadInfo;
const char* MicroProfileWin32ThreadInfoAddString(const char* pString)
{
size_t nLen = strlen(pString);
uint32_t nHash = *(uint32_t*)pString;
nHash ^= (nHash >> 16);
enum
{
MAX_SEARCH = 256,
};
for(uint32_t i = 0; i < MAX_SEARCH; ++i)
{
uint32_t idx = (i + nHash) % MicroProfileWin32ThreadInfo::MAX_STRINGS;
if(0 == g_ThreadInfo.pStrings[idx])
{
g_ThreadInfo.pStrings[idx] = &g_ThreadInfo.StringData[g_ThreadInfo.nStringOffset];
memcpy(&g_ThreadInfo.StringData[g_ThreadInfo.nStringOffset], pString, nLen + 1);
g_ThreadInfo.nStringOffset += (uint32_t)(nLen + 1);
return g_ThreadInfo.pStrings[idx];
}
if(0 == strcmp(g_ThreadInfo.pStrings[idx], pString))
{
return g_ThreadInfo.pStrings[idx];
}
}
return "internal hash table fail: should never happen";
}
void MicroProfileWin32ExtractModules(MicroProfileWin32ThreadInfo::Process& P)
{
HANDLE hModuleSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, P.pid);
MODULEENTRY32 me;
if(Module32First(hModuleSnapshot, &me))
{
do
{
if(g_ThreadInfo.nNumModules < MicroProfileWin32ThreadInfo::MAX_MODULES)
{
auto& M = g_ThreadInfo.M[g_ThreadInfo.nNumModules++];
P.nNumModules++;
intptr_t nBase = (intptr_t)me.modBaseAddr;
intptr_t nEnd = nBase + me.modBaseSize;
M.nBase = nBase;
M.nEnd = nEnd;
M.pName = MicroProfileWin32ThreadInfoAddString(&me.szModule[0]);
}
} while(Module32Next(hModuleSnapshot, &me));
}
if(hModuleSnapshot)
CloseHandle(hModuleSnapshot);
}
void MicroProfileWin32InitThreadInfo2()
{
memset(&g_ThreadInfo, 0, sizeof(g_ThreadInfo));
#if MICROPROFILE_DEBUG
float fToMsCpu = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
#endif
HANDLE hSnap = CreateToolhelp32Snapshot(TH32CS_SNAPALL, 0);
PROCESSENTRY32 pe32;
THREADENTRY32 te32;
te32.dwSize = sizeof(THREADENTRY32);
pe32.dwSize = sizeof(PROCESSENTRY32);
{
#if MICROPROFILE_DEBUG
int64_t nTickStart = MP_TICK();
#endif
if(Process32First(hSnap, &pe32))
{
do
{
MicroProfileWin32ThreadInfo::Process P;
P.pid = pe32.th32ProcessID;
P.pProcessModule = MicroProfileWin32ThreadInfoAddString(pe32.szExeFile);
g_ThreadInfo.P[g_ThreadInfo.nNumProcesses++] = P;
} while(Process32Next(hSnap, &pe32) && g_ThreadInfo.nNumProcesses < MicroProfileWin32ThreadInfo::MAX_PROCESSES);
}
#if MICROPROFILE_DEBUG
int64_t nTicksEnd = MP_TICK();
float fMs = fToMsCpu * (nTicksEnd - nTickStart);
uprintf("Process iteration %6.2fms processes %d\n", fMs, g_ThreadInfo.nNumProcesses);
#endif
}
{
#if MICROPROFILE_DEBUG
int64_t nTickStart = MP_TICK();
#endif
for(uint32_t i = 0; i < g_ThreadInfo.nNumProcesses; ++i)
{
g_ThreadInfo.P[i].nModuleStart = g_ThreadInfo.nNumModules;
g_ThreadInfo.P[i].nNumModules = 0;
MicroProfileWin32ExtractModules(g_ThreadInfo.P[i]);
}
#if MICROPROFILE_DEBUG
int64_t nTicksEnd = MP_TICK();
float fMs = fToMsCpu * (nTicksEnd - nTickStart);
uprintf("Module iteration %6.2fms NumModules %d\n", fMs, g_ThreadInfo.nNumModules);
#endif
}
pNtQIT NtQueryInformationThread = (pNtQIT)GetProcAddress(GetModuleHandleA("ntdll.dll"), "NtQueryInformationThread");
intptr_t dwStartAddress;
ULONG olen;
uint32_t nThreadsTested = 0;
uint32_t nThreadsSucceeded = 0;
if(Thread32First(hSnap, &te32))
{
#if MICROPROFILE_DEBUG
int64_t nTickStart = MP_TICK();
#endif
do
{
nThreadsTested++;
const char* pModule = "?";
HANDLE hThread = OpenThread(THREAD_QUERY_INFORMATION, FALSE, te32.th32ThreadID);
if(hThread)
{
NTSTATUS ntStatus = NtQueryInformationThread(hThread, (THREADINFOCLASS)ThreadQuerySetWin32StartAddress, &dwStartAddress, sizeof(dwStartAddress), &olen);
if(0 == ntStatus)
{
uint32_t nProcessIndex = (uint32_t)-1;
for(uint32_t i = 0; i < g_ThreadInfo.nNumProcesses; ++i)
{
if(g_ThreadInfo.P[i].pid == te32.th32OwnerProcessID)
{
nProcessIndex = i;
break;
}
}
if(nProcessIndex != (uint32_t)-1)
{
uint32_t nModuleStart = g_ThreadInfo.P[nProcessIndex].nModuleStart;
uint32_t nNumModules = g_ThreadInfo.P[nProcessIndex].nNumModules;
for(uint32_t i = 0; i < nNumModules; ++i)
{
auto& M = g_ThreadInfo.M[nModuleStart + i];
if(M.nBase <= dwStartAddress && M.nEnd >= dwStartAddress)
{
pModule = M.pName;
}
}
}
}
}
if(hThread)
CloseHandle(hThread);
{
MicroProfileThreadInfo T;
T.pid = te32.th32OwnerProcessID;
T.tid = te32.th32ThreadID;
const char* pProcess = "unknown";
for(uint32_t i = 0; i < g_ThreadInfo.nNumProcesses; ++i)
{
if(g_ThreadInfo.P[i].pid == T.pid)
{
pProcess = g_ThreadInfo.P[i].pProcessModule;
break;
}
}
T.pProcessModule = pProcess;
T.pThreadModule = MicroProfileWin32ThreadInfoAddString(pModule);
T.nIsLocal = GetCurrentProcessId() == T.pid ? 1 : 0;
nThreadsSucceeded++;
g_ThreadInfo.T[g_ThreadInfo.nNumThreads++] = T;
}
} while(Thread32Next(hSnap, &te32) && g_ThreadInfo.nNumThreads < MicroProfileWin32ThreadInfo::MAX_THREADS);
#if MICROPROFILE_DEBUG
int64_t nTickEnd = MP_TICK();
float fMs = fToMsCpu * (nTickEnd - nTickStart);
uprintf("Thread iteration %6.2fms Threads %d\n", fMs, g_ThreadInfo.nNumThreads);
#endif
}
}
void MicroProfileWin32UpdateThreadInfo()
{
static int nWasRunning = 1;
static int nOnce = 0;
int nRunning = MicroProfileAnyGroupActive() ? 1 : 0;
if((0 == nRunning && 1 == nWasRunning) || nOnce == 0)
{
nOnce = 1;
MicroProfileWin32InitThreadInfo2();
}
nWasRunning = nRunning;
}
const char* MicroProfileThreadNameFromId(MicroProfileThreadIdType nThreadId)
{
MicroProfileWin32UpdateThreadInfo();
static char result[1024];
for(uint32_t i = 0; i < g_ThreadInfo.nNumThreads; ++i)
{
if(g_ThreadInfo.T[i].tid == nThreadId)
{
sprintf_s(result, "p:%s t:%s", g_ThreadInfo.T[i].pProcessModule, g_ThreadInfo.T[i].pThreadModule);
return result;
}
}
sprintf_s(result, "?");
return result;
}
#define MICROPROFILE_FILEMAPPING "microprofile-shared"
#ifdef MICROPROFILE_WIN32_COLLECTOR
#define MICROPROFILE_WIN32_CSWITCH_TIMEOUT 15 // seconds to wait before collector exits
static MicroProfileWin32ContextSwitchShared* g_pShared = 0;
VOID WINAPI MicroProfileContextSwitchCallbackCollector(PEVENT_TRACE pEvent)
{
static int64_t nPackets = 0;
static int64_t nSkips = 0;
if(pEvent->Header.Guid == g_MicroProfileThreadClassGuid)
{
if(pEvent->Header.Class.Type == 36)
{
MicroProfileSCSwitch* pCSwitch = (MicroProfileSCSwitch*)pEvent->MofData;
if((pCSwitch->NewThreadId != 0) || (pCSwitch->OldThreadId != 0))
{
MicroProfileContextSwitch Switch;
Switch.nThreadOut = pCSwitch->OldThreadId;
Switch.nThreadIn = pCSwitch->NewThreadId;
Switch.nCpu = pEvent->BufferContext.ProcessorNumber;
Switch.nTicks = pEvent->Header.TimeStamp.QuadPart;
int64_t nPut = g_pShared->nPut.load(std::memory_order_relaxed);
int64_t nGet = g_pShared->nGet.load(std::memory_order_relaxed);
nPackets++;
if(nPut - nGet < MicroProfileWin32ContextSwitchShared::BUFFER_SIZE)
{
g_pShared->Buffer[nPut % MicroProfileWin32ContextSwitchShared::BUFFER_SIZE] = Switch;
g_pShared->nPut.store(nPut + 1, std::memory_order_release);
nSkips = 0;
}
else
{
nSkips++;
}
}
}
}
if(0 == (nPackets % (4 << 10)))
{
int64_t nTickTrace = MP_TICK();
g_pShared->nTickTrace.store(nTickTrace);
int64_t nTickProgram = g_pShared->nTickProgram.load();
float fTickToMs = MicroProfileTickToMsMultiplier(MicroProfileTicksPerSecondCpu());
float fTime = fabs(fTickToMs * (nTickTrace - nTickProgram));
printf("\rRead %" PRId64 " CSwitch Packets, Skips %" PRId64 " Time difference %6.3fms ", nPackets, nSkips, fTime);
fflush(stdout);
if(fTime > MICROPROFILE_WIN32_CSWITCH_TIMEOUT * 1000)
{
g_pShared->nQuit.store(1);
}
}
}
ULONG WINAPI MicroProfileBufferCallbackCollector(PEVENT_TRACE_LOGFILEA Buffer)
{
return (g_pShared->nQuit.load()) ? FALSE : TRUE;
}
int main(int argc, char* argv[])
{
if(argc != 2)
{
return 1;
}
printf("using file '%s'\n", argv[1]);
HANDLE hMemory = OpenFileMappingA(FILE_MAP_ALL_ACCESS, FALSE, argv[1]);
if(hMemory == NULL)
{
return 1;
}
g_pShared = (MicroProfileWin32ContextSwitchShared*)MapViewOfFile(hMemory, FILE_MAP_ALL_ACCESS, 0, 0, sizeof(MicroProfileWin32ContextSwitchShared));
if(g_pShared != NULL)
{
MicroProfileStartWin32Trace(MicroProfileContextSwitchCallbackCollector, MicroProfileBufferCallbackCollector);
UnmapViewOfFile(g_pShared);
}
CloseHandle(hMemory);
return 0;
}
#endif
#include <shellapi.h>
void* MicroProfileTraceThread(void* unused)
{
MicroProfileOnThreadCreate("ContextSwitchThread");
MicroProfileContextSwitchShutdownTrace();
if(!MicroProfileStartWin32Trace(MicroProfileContextSwitchCallback, MicroProfileBufferCallback))
{
MicroProfileContextSwitchShutdownTrace();
// not running as admin. try and start other process.
MicroProfileWin32ContextSwitchShared* pShared = 0;
char Filename[512];
time_t t = time(NULL);
snprintf(Filename, sizeof(Filename), "%s_%d", MICROPROFILE_FILEMAPPING, (int)t);
HANDLE hMemory = CreateFileMappingA(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, sizeof(MicroProfileWin32ContextSwitchShared), Filename);
if(hMemory != NULL)
{
pShared = (MicroProfileWin32ContextSwitchShared*)MapViewOfFile(hMemory, FILE_MAP_ALL_ACCESS, 0, 0, sizeof(MicroProfileWin32ContextSwitchShared));
if(pShared != NULL)
{
#ifdef _M_IX86
#define CSWITCH_EXE "microprofile-win32-cswitch_x86.exe"
#else
#define CSWITCH_EXE "microprofile-win32-cswitch_x64.exe"
#endif
pShared->nTickProgram.store(MP_TICK());
pShared->nTickTrace.store(MP_TICK());
HINSTANCE Instance = ShellExecuteA(NULL, "runas", CSWITCH_EXE, Filename, "", SW_SHOWMINNOACTIVE);
int64_t nInstance = (int64_t)Instance;
if(nInstance >= 32)
{
int64_t nPut, nGet;
while(!S.bContextSwitchStop)
{
nPut = pShared->nPut.load(std::memory_order_acquire);
nGet = pShared->nGet.load(std::memory_order_relaxed);
if(nPut == nGet)
{
Sleep(20);
}
else
{
for(int64_t i = nGet; i != nPut; i++)
{
MicroProfileContextSwitchPut(&pShared->Buffer[i % MicroProfileWin32ContextSwitchShared::BUFFER_SIZE]);
}
pShared->nGet.store(nPut, std::memory_order_release);
pShared->nTickProgram.store(MP_TICK());
}
}
pShared->nQuit.store(1);
}
}
UnmapViewOfFile(pShared);
}
CloseHandle(hMemory);
}
S.bContextSwitchRunning = false;
MicroProfileOnThreadExit();
return 0;
}
MicroProfileThreadInfo MicroProfileGetThreadInfo(MicroProfileThreadIdType nThreadId)
{
MicroProfileWin32UpdateThreadInfo();
for(uint32_t i = 0; i < g_ThreadInfo.nNumThreads; ++i)
{
if(g_ThreadInfo.T[i].tid == nThreadId)
{
return g_ThreadInfo.T[i];
}
}
MicroProfileThreadInfo TI((uint32_t)nThreadId, 0, 0);
return TI;
}
uint32_t MicroProfileGetThreadInfoArray(MicroProfileThreadInfo** pThreadArray)
{
MicroProfileWin32InitThreadInfo2();
*pThreadArray = &g_ThreadInfo.T[0];
return g_ThreadInfo.nNumThreads;
}
#elif defined(__APPLE__)
#include <sys/time.h>
void* MicroProfileTraceThread(void* unused)
{
FILE* pFile = fopen("mypipe", "r");
if(!pFile)
{
uprintf("CONTEXT SWITCH FAILED TO OPEN FILE: make sure to run dtrace script\n");
S.bContextSwitchRunning = false;
return 0;
}
uprintf("STARTING TRACE THREAD\n");
char* pLine = 0;
size_t cap = 0;
size_t len = 0;
struct timeval tv;
gettimeofday(&tv, NULL);
uint64_t nsSinceEpoch = ((uint64_t)(tv.tv_sec) * 1000000 + (uint64_t)(tv.tv_usec)) * 1000;
uint64_t nTickEpoch = MP_TICK();
uint32_t nLastThread[MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS] = { 0 };
mach_timebase_info_data_t sTimebaseInfo;
mach_timebase_info(&sTimebaseInfo);
S.bContextSwitchRunning = true;
uint64_t nProcessed = 0;
uint64_t nProcessedLast = 0;
while((len = getline(&pLine, &cap, pFile)) > 0 && !S.bContextSwitchStop)
{
nProcessed += len;
if(nProcessed - nProcessedLast > 10 << 10)
{
nProcessedLast = nProcessed;
uprintf("processed %llukb %llukb\n", (nProcessed - nProcessedLast) >> 10, nProcessed >> 10);
}
char* pX = strchr(pLine, 'X');
if(pX)
{
int cpu = atoi(pX + 1);
char* pX2 = strchr(pX + 1, 'X');
char* pX3 = strchr(pX2 + 1, 'X');
int thread = atoi(pX2 + 1);
char* lala;
int64_t timestamp = strtoll(pX3 + 1, &lala, 10);
MicroProfileContextSwitch Switch;
// convert to ticks.
uint64_t nDeltaNsSinceEpoch = timestamp - nsSinceEpoch;
uint64_t nDeltaTickSinceEpoch = sTimebaseInfo.numer * nDeltaNsSinceEpoch / sTimebaseInfo.denom;
uint64_t nTicks = nDeltaTickSinceEpoch + nTickEpoch;
if(cpu < MICROPROFILE_MAX_CONTEXT_SWITCH_THREADS)
{
Switch.nThreadOut = nLastThread[cpu];
Switch.nThreadIn = thread;
nLastThread[cpu] = thread;
Switch.nCpu = cpu;
Switch.nTicks = nTicks;
MicroProfileContextSwitchPut(&Switch);
}
}
}
uprintf("EXITING TRACE THREAD\n");
S.bContextSwitchRunning = false;
return 0;
}
MicroProfileThreadInfo MicroProfileGetThreadInfo(MicroProfileThreadIdType nThreadId)
{
MicroProfileThreadInfo TI((uint32_t)nThreadId, 0, 0);
return TI;
}
uint32_t MicroProfileGetThreadInfoArray(MicroProfileThreadInfo** pThreadArray)
{
*pThreadArray = 0;
return 0;
}
#endif
#else
MicroProfileThreadInfo MicroProfileGetThreadInfo(MicroProfileThreadIdType nThreadId)
{
MicroProfileThreadInfo TI((uint32_t)nThreadId, 0, 0);
return TI;
}
uint32_t MicroProfileGetThreadInfoArray(MicroProfileThreadInfo** pThreadArray)
{
*pThreadArray = 0;
return 0;
}
void MicroProfileStopContextSwitchTrace()
{
}
void MicroProfileJoinContextSwitchTrace()
{
}
void MicroProfileStartContextSwitchTrace()
{
}
#endif
#if MICROPROFILE_GPU_TIMERS
void MicroProfileGpuShutdownPlatform()
{
if(S.pGPU)
{
MicroProfileGpuShutdown();
MP_FREE(S.pGPU);
S.pGPU = nullptr;
MicroProfileGpuInsertTimeStamp_Callback = nullptr;
MicroProfileGpuGetTimeStamp_Callback = nullptr;
MicroProfileTicksPerSecondGpu_Callback = nullptr;
MicroProfileGetGpuTickReference_Callback = nullptr;
MicroProfileGpuFlip_Callback = nullptr;
MicroProfileGpuShutdown_Callback = nullptr;
}
}
void MicroProfileGpuInitPlatform(MicroProfileGpuTimerStateType eType,
MicroProfileGpuTimerState* pGPU,
MicroProfileGpuInsertTimeStamp_CB InsertTimeStamp,
MicroProfileGpuGetTimeStamp_CB GetTimeStamp,
MicroProfileTicksPerSecondGpu_CB TicksPerSecond,
MicroProfileGetGpuTickReference_CB GetTickReference,
MicroProfileGpuFlip_CB Flip,
MicroProfileGpuShutdown_CB Shutdown)
{
MP_ASSERT(S.pGPU == nullptr);
pGPU->Type = eType;
S.pGPU = pGPU;
MicroProfileGpuInsertTimeStamp_Callback = InsertTimeStamp;
MicroProfileGpuGetTimeStamp_Callback = GetTimeStamp;
MicroProfileTicksPerSecondGpu_Callback = TicksPerSecond;
MicroProfileGetGpuTickReference_Callback = GetTickReference;
MicroProfileGpuFlip_Callback = Flip;
MicroProfileGpuShutdown_Callback = Shutdown;
}
#endif
#if MICROPROFILE_GPU_TIMERS_D3D11
//:'######:::'########::'##::::'##::::'########:::'#######::'########:::::'##::::::'##:::
//'##... ##:: ##.... ##: ##:::: ##:::: ##.... ##:'##.... ##: ##.... ##::'####::::'####:::
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##:..::::: ##: ##:::: ##::.. ##::::.. ##:::
// ##::'####: ########:: ##:::: ##:::: ##:::: ##::'#######:: ##:::: ##:::: ##:::::: ##:::
// ##::: ##:: ##.....::: ##:::: ##:::: ##:::: ##::...... ##: ##:::: ##:::: ##:::::: ##:::
// ##::: ##:: ##:::::::: ##:::: ##:::: ##:::: ##:'##:::: ##: ##:::: ##:::: ##:::::: ##:::
//. ######::: ##::::::::. #######::::: ########::. #######:: ########:::'######::'######:
//:......::::..::::::::::.......::::::........::::.......:::........::::......:::......::
uint32_t MicroProfileGpuInsertTimeStampD3D11(void* pContext_)
{
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
if(!pGPU)
return 0;
MicroProfileD3D11Frame& Frame = pGPU->m_QueryFrames[pGPU->m_nQueryFrame];
uint32_t nStart = Frame.m_nQueryStart;
if(Frame.m_nRateQueryStarted)
{
uint32_t nIndex = (uint32_t)-1;
do
{
nIndex = Frame.m_nQueryCount.load();
if(nIndex + 1 >= Frame.m_nQueryCountMax)
{
return (uint32_t)-1;
}
} while(!Frame.m_nQueryCount.compare_exchange_weak(nIndex, nIndex + 1));
nIndex += nStart;
uint32_t nQueryIndex = nIndex % MICROPROFILE_D3D11_MAX_QUERIES;
ID3D11Query* pQuery = (ID3D11Query*)pGPU->m_pQueries[nQueryIndex];
ID3D11DeviceContext* pContext = (ID3D11DeviceContext*)pContext_;
pContext->End(pQuery);
return nQueryIndex;
}
return (uint32_t)-1;
}
uint64_t MicroProfileGpuGetTimeStampD3D11(uint32_t nIndex)
{
if(nIndex == (uint32_t)-1)
{
return (uint64_t)-1;
}
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
if(!pGPU)
return 0;
int64_t nResult = pGPU->m_nQueryResults[nIndex];
MP_ASSERT(nResult != -1);
return nResult;
}
bool MicroProfileGpuGetDataD3D11(void* pQuery, void* pData, uint32_t nDataSize)
{
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
if(!pGPU)
return false;
HRESULT hr;
do
{
hr = ((ID3D11DeviceContext*)pGPU->m_pImmediateContext)->GetData((ID3D11Query*)pQuery, pData, nDataSize, 0);
} while(hr == S_FALSE);
switch(hr)
{
case DXGI_ERROR_DEVICE_REMOVED:
case DXGI_ERROR_INVALID_CALL:
case E_INVALIDARG:
MP_BREAK();
return false;
}
return true;
}
uint64_t MicroProfileTicksPerSecondGpuD3D11()
{
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
if(!pGPU)
return 1;
return pGPU->m_nQueryFrequency;
}
uint32_t MicroProfileGpuFlipD3D11(void* pDeviceContext_)
{
if(!pDeviceContext_)
{
return (uint32_t)-1;
}
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
if(!pGPU)
return 0;
ID3D11DeviceContext* pDeviceContext = (ID3D11DeviceContext*)pDeviceContext_;
uint32_t nFrameTimeStamp = MicroProfileGpuInsertTimeStamp(pDeviceContext);
MicroProfileD3D11Frame& CurrentFrame = pGPU->m_QueryFrames[pGPU->m_nQueryFrame];
ID3D11DeviceContext* pImmediateContext = (ID3D11DeviceContext*)pGPU->m_pImmediateContext;
if(CurrentFrame.m_nRateQueryStarted)
{
pImmediateContext->End((ID3D11Query*)CurrentFrame.m_pRateQuery);
}
uint32_t nNextFrame = (pGPU->m_nQueryFrame + 1) % MICROPROFILE_GPU_FRAME_DELAY;
pGPU->m_nQueryPut = (CurrentFrame.m_nQueryStart + CurrentFrame.m_nQueryCount) % MICROPROFILE_D3D11_MAX_QUERIES;
MicroProfileD3D11Frame& OldFrame = pGPU->m_QueryFrames[nNextFrame];
if(OldFrame.m_nRateQueryStarted)
{
struct RateQueryResult
{
uint64_t nFrequency;
BOOL bDisjoint;
};
RateQueryResult Result;
if(MicroProfileGpuGetDataD3D11(OldFrame.m_pRateQuery, &Result, sizeof(Result)))
{
if(pGPU->m_nQueryFrequency != (int64_t)Result.nFrequency)
{
if(pGPU->m_nQueryFrequency)
{
OutputDebugStringA("Query freq changing");
}
pGPU->m_nQueryFrequency = Result.nFrequency;
}
uint32_t nStart = OldFrame.m_nQueryStart;
uint32_t nCount = OldFrame.m_nQueryCount;
for(uint32_t i = 0; i < nCount; ++i)
{
uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D11_MAX_QUERIES;
if(!MicroProfileGpuGetDataD3D11(pGPU->m_pQueries[nIndex], &pGPU->m_nQueryResults[nIndex], sizeof(uint64_t)))
{
pGPU->m_nQueryResults[nIndex] = -1;
}
}
}
else
{
uint32_t nStart = OldFrame.m_nQueryStart;
uint32_t nCount = OldFrame.m_nQueryCount;
for(uint32_t i = 0; i < nCount; ++i)
{
uint32_t nIndex = (i + nStart) % MICROPROFILE_D3D11_MAX_QUERIES;
pGPU->m_nQueryResults[nIndex] = -1;
}
}
pGPU->m_nQueryGet = (OldFrame.m_nQueryStart + OldFrame.m_nQueryCount) % MICROPROFILE_D3D11_MAX_QUERIES;
}
pGPU->m_nQueryFrame = nNextFrame;
MicroProfileD3D11Frame& NextFrame = pGPU->m_QueryFrames[nNextFrame];
pImmediateContext->Begin((ID3D11Query*)NextFrame.m_pRateQuery);
NextFrame.m_nQueryStart = pGPU->m_nQueryPut;
NextFrame.m_nQueryCount = 0;
if(pGPU->m_nQueryPut >= pGPU->m_nQueryGet)
{
NextFrame.m_nQueryCountMax = (MICROPROFILE_D3D11_MAX_QUERIES - pGPU->m_nQueryPut) + pGPU->m_nQueryGet;
}
else
{
NextFrame.m_nQueryCountMax = pGPU->m_nQueryGet - pGPU->m_nQueryPut - 1;
}
if(NextFrame.m_nQueryCountMax)
NextFrame.m_nQueryCountMax -= 1;
NextFrame.m_nRateQueryStarted = 1;
return nFrameTimeStamp;
}
void MicroProfileGpuInitD3D11(void* pDevice_, void* pImmediateContext)
{
ID3D11Device* pDevice = (ID3D11Device*)pDevice_;
MicroProfileGpuTimerStateD3D11* pGPU = MP_ALLOC_OBJECT(MicroProfileGpuTimerStateD3D11);
MicroProfileGpuInitPlatform(MicroProfileGpuTimerStateType_D3D11,
pGPU,
MicroProfileGpuInsertTimeStampD3D11,
MicroProfileGpuGetTimeStampD3D11,
MicroProfileTicksPerSecondGpuD3D11,
MicroProfileGetGpuTickReferenceD3D11,
MicroProfileGpuFlipD3D11,
MicroProfileGpuShutdownD3D11);
pGPU->m_pImmediateContext = pImmediateContext;
D3D11_QUERY_DESC Desc;
Desc.MiscFlags = 0;
Desc.Query = D3D11_QUERY_TIMESTAMP;
for(uint32_t i = 0; i < MICROPROFILE_D3D11_MAX_QUERIES; ++i)
{
HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&pGPU->m_pQueries[i]);
MP_ASSERT(hr == S_OK);
pGPU->m_nQueryResults[i] = -1;
}
HRESULT hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&pGPU->pSyncQuery);
MP_ASSERT(hr == S_OK);
pGPU->m_nQueryPut = 0;
pGPU->m_nQueryGet = 0;
pGPU->m_nQueryFrame = 0;
pGPU->m_nQueryFrequency = 0;
Desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i)
{
pGPU->m_QueryFrames[i].m_nQueryStart = 0;
pGPU->m_QueryFrames[i].m_nQueryCount = 0;
pGPU->m_QueryFrames[i].m_nRateQueryStarted = 0;
hr = pDevice->CreateQuery(&Desc, (ID3D11Query**)&pGPU->m_QueryFrames[i].m_pRateQuery);
MP_ASSERT(hr == S_OK);
}
}
void MicroProfileGpuShutdownD3D11()
{
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
if(!pGPU)
return;
for(uint32_t i = 0; i < MICROPROFILE_D3D11_MAX_QUERIES; ++i)
{
if(pGPU->m_pQueries[i])
{
ID3D11Query* pQuery = (ID3D11Query*)pGPU->m_pQueries[i];
pQuery->Release();
pGPU->m_pQueries[i] = 0;
}
}
for(uint32_t i = 0; i < MICROPROFILE_GPU_FRAME_DELAY; ++i)
{
if(pGPU->m_QueryFrames[i].m_pRateQuery)
{
ID3D11Query* pQuery = (ID3D11Query*)pGPU->m_QueryFrames[i].m_pRateQuery;
pQuery->Release();
pGPU->m_QueryFrames[i].m_pRateQuery = 0;
}
}
if(pGPU->pSyncQuery)
{
ID3D11Query* pSyncQuery = (ID3D11Query*)pGPU->pSyncQuery;
pSyncQuery->Release();
pGPU->pSyncQuery = 0;
}
}
int MicroProfileGetGpuTickReferenceD3D11(int64_t* pOutCPU, int64_t* pOutGpu)
{
MicroProfileGpuTimerStateD3D11* pGPU = MicroProfileGetGpuTimerStateD3D11();
if(!pGPU)
return 0;
{
MicroProfileD3D11Frame& Frame = pGPU->m_QueryFrames[pGPU->m_nQueryFrame];
if(Frame.m_nRateQueryStarted)
{
ID3D11Query* pSyncQuery = (ID3D11Query*)pGPU->pSyncQuery;
ID3D11DeviceContext* pImmediateContext = (ID3D11DeviceContext*)pGPU->m_pImmediateContext;
pImmediateContext->End(pSyncQuery);
HRESULT hr;
do
{
hr = pImmediateContext->GetData(pSyncQuery, pOutGpu, sizeof(*pOutGpu), 0);
} while(hr == S_FALSE);
*pOutCPU = MP_TICK();
switch(hr)
{
case DXGI_ERROR_DEVICE_REMOVED:
case DXGI_ERROR_INVALID_CALL:
case E_INVALIDARG:
MP_BREAK();
return false;
}
MP_ASSERT(hr == S_OK);
return 1;
}
}
return 0;
}
MicroProfileGpuTimerStateD3D11* MicroProfileGetGpuTimerStateD3D11()
{
if(S.pGPU && S.pGPU->Type == MicroProfileGpuTimerStateType_D3D11)
return (MicroProfileGpuTimerStateD3D11*)S.pGPU;
return nullptr;
}
#endif
#if MICROPROFILE_GPU_TIMERS_D3D12
//:'######:::'########::'##::::'##::::'########:::'#######::'########:::::'##::::'#######::
//'##... ##:: ##.... ##: ##:::: ##:::: ##.... ##:'##.... ##: ##.... ##::'####:::'##.... ##:
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##:..::::: ##: ##:::: ##::.. ##:::..::::: ##:
// ##::'####: ########:: ##:::: ##:::: ##:::: ##::'#######:: ##:::: ##:::: ##::::'#######::
// ##::: ##:: ##.....::: ##:::: ##:::: ##:::: ##::...... ##: ##:::: ##:::: ##:::'##::::::::
// ##::: ##:: ##:::::::: ##:::: ##:::: ##:::: ##:'##:::: ##: ##:::: ##:::: ##::: ##::::::::
//. ######::: ##::::::::. #######::::: ########::. #######:: ########:::'######: #########:
//:......::::..::::::::::.......::::::........::::.......:::........::::......::.........::
#include <d3d12.h>
uint32_t MicroProfileGpuInsertTimeStampD3D12(void* pContext)
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
if(!pGPU || !pContext)
return 0;
ID3D12GraphicsCommandList* pCommandList = (ID3D12GraphicsCommandList*)pContext;
bool IsCopy = D3D12_COMMAND_LIST_TYPE_COPY == pCommandList->GetType();
uint32_t nNode = pGPU->nCurrentNode;
uint32_t nFrame = pGPU->nFrame;
ID3D12QueryHeap* pHeap = IsCopy ? pGPU->NodeState[nNode].pCopyQueueHeap : pGPU->NodeState[nNode].pHeap;
uint32_t nQueryIndex = IsCopy ? ((pGPU->nFrameCountCopyQueueTimeStamps.fetch_add(1) + pGPU->nFrameStartCopyQueueTimeStamps) % MICROPROFILE_D3D12_MAX_QUERIES)
: ((pGPU->nFrameCountTimeStamps.fetch_add(1) + pGPU->nFrameStartTimeStamps) % MICROPROFILE_D3D12_MAX_QUERIES);
pCommandList->EndQuery(pHeap, D3D12_QUERY_TYPE_TIMESTAMP, nQueryIndex);
MP_ASSERT(nQueryIndex <= 0xffff);
uint32_t res = (IsCopy ? 0x80000000 : 0) | ((nFrame << 16) & 0x7fff0000) | (nQueryIndex);
return res;
}
void MicroProfileGpuFetchRange(uint32_t nBegin, int32_t nCount, uint64_t nFrame, int64_t nTimestampOffset)
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
if(!pGPU || nCount <= 0)
return;
void* pData = 0;
// uprintf("fetch [%d-%d]\n", nBegin, nBegin + nCount);
D3D12_RANGE Range = { sizeof(uint64_t) * nBegin, sizeof(uint64_t) * (nBegin + nCount) };
pGPU->pBuffer->Map(0, &Range, &pData);
memcpy(&pGPU->nResults[nBegin], nBegin + (uint64_t*)pData, nCount * sizeof(uint64_t));
for(int i = 0; i < nCount; ++i)
{
pGPU->nQueryFrames[i + nBegin] = nFrame;
pGPU->nResults[i + nBegin] -= nTimestampOffset;
}
pGPU->pBuffer->Unmap(0, 0);
}
void MicroProfileGpuFetchRangeCopy(uint32_t nBegin, int32_t nCount, uint64_t nFrame, int64_t nTimestampOffset)
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
if(!pGPU || nCount <= 0)
return;
void* pData = 0;
D3D12_RANGE Range = { sizeof(uint64_t) * nBegin, sizeof(uint64_t) * (nBegin + nCount) };
pGPU->pBufferCopy->Map(0, &Range, &pData);
memcpy(&pGPU->nResultsCopy[nBegin], nBegin + (uint64_t*)pData, nCount * sizeof(uint64_t));
for(int i = 0; i < nCount; ++i)
{
pGPU->nQueryFramesCopy[i + nBegin] = nFrame;
pGPU->nResultsCopy[i + nBegin] -= nTimestampOffset;
}
pGPU->pBufferCopy->Unmap(0, 0);
}
void MicroProfileGpuWaitFenceD3D12(uint32_t nNode, uint64_t nFence)
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
if(!pGPU)
return;
auto GetFence = [&]() -> uint64_t
{
uint64_t f0 = pGPU->NodeState[nNode].pFence->GetCompletedValue();
uint64_t f1 = pGPU->NodeState[nNode].pFenceCopy->GetCompletedValue();
return MicroProfileMin(f0, f1);
};
uint64_t nCompletedFrame = GetFence();
// while(nCompletedFrame < nPending)
// while(0 < nPending - nCompletedFrame)
while(0 < (int64_t)(nFence - nCompletedFrame))
{
MICROPROFILE_SCOPEI("Microprofile", "gpu-wait", MP_GREEN4);
Sleep(20); // todo: use event.
nCompletedFrame = GetFence();
if((uint64_t)-1 == nCompletedFrame) // likely device removed.
return;
}
}
void MicroProfileGpuFetchResultsD3D12(uint64_t nFrame)
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
if(!pGPU)
return;
uint64_t nPending = pGPU->nPendingFrame;
// while(nPending <= nFrame)
// while(0 <= nFrame - nPending)
while(0 <= (int64_t)(nFrame - nPending))
{
uint32_t nInternal = nPending % MICROPROFILE_D3D_INTERNAL_DELAY;
uint32_t nNode = pGPU->Frames[nInternal].nNode;
MicroProfileGpuWaitFenceD3D12(nNode, nPending);
int64_t nTimestampOffset = 0;
if(nNode != 0)
{
// Adjust timestamp queries from GPU x to be in GPU 0's frame of reference
HRESULT hr;
int64_t nCPU0, nGPU0;
hr = pGPU->NodeState[0].pCommandQueue->GetClockCalibration((uint64_t*)&nGPU0, (uint64_t*)&nCPU0);
MP_ASSERT(hr == S_OK);
int64_t nCPUx, nGPUx;
hr = pGPU->NodeState[nNode].pCommandQueue->GetClockCalibration((uint64_t*)&nGPUx, (uint64_t*)&nCPUx);
MP_ASSERT(hr == S_OK);
int64_t nFreqCPU = MicroProfileTicksPerSecondCpu();
int64_t nElapsedCPU = nCPUx - nCPU0;
int64_t nElapsedGPU = pGPU->nFrequency * nElapsedCPU / nFreqCPU;
nTimestampOffset = nGPUx - nGPU0 - nElapsedGPU;
}
{
uint32_t nTimeStampBegin = pGPU->Frames[nInternal].nTimeStampBegin;
uint32_t nTimeStampCount = pGPU->Frames[nInternal].nTimeStampCount;
MicroProfileGpuFetchRange(
nTimeStampBegin, (nTimeStampBegin + nTimeStampCount) > MICROPROFILE_D3D12_MAX_QUERIES ? MICROPROFILE_D3D12_MAX_QUERIES - nTimeStampBegin : nTimeStampCount, nPending, nTimestampOffset);
MicroProfileGpuFetchRange(0, (nTimeStampBegin + nTimeStampCount) - MICROPROFILE_D3D12_MAX_QUERIES, nPending, nTimestampOffset);
}
{
uint32_t nTimeStampBegin = pGPU->Frames[nInternal].nTimeStampBeginCopyQueue;
uint32_t nTimeStampCount = pGPU->Frames[nInternal].nTimeStampCountCopyQueue;
MicroProfileGpuFetchRangeCopy(
nTimeStampBegin, (nTimeStampBegin + nTimeStampCount) > MICROPROFILE_D3D12_MAX_QUERIES ? MICROPROFILE_D3D12_MAX_QUERIES - nTimeStampBegin : nTimeStampCount, nPending, nTimestampOffset);
MicroProfileGpuFetchRangeCopy(0, (nTimeStampBegin + nTimeStampCount) - MICROPROFILE_D3D12_MAX_QUERIES, nPending, nTimestampOffset);
}
nPending = ++pGPU->nPendingFrame;
MP_ASSERT(pGPU->nFrame > nPending);
}
}
uint64_t MicroProfileGpuGetTimeStampD3D12(uint32_t nIndex)
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
if(!pGPU)
return 0;
uint32_t nFrame = nIndex >> 16;
bool IsCopy = (nFrame & 0x8000) != 0;
nFrame &= 0x7fff;
uint32_t nQueryIndex = nIndex & 0xffff;
uint32_t lala = IsCopy ? pGPU->nQueryFramesCopy[nQueryIndex] : pGPU->nQueryFrames[nQueryIndex];
// uprintf("read TS [%d <- %lld]\n", nQueryIndex, pGPU->nResults[nQueryIndex]);
MP_ASSERT(nIndex == 0 || (0x7fff & lala) == nFrame);
uint64_t r = IsCopy ? pGPU->nResultsCopy[nQueryIndex] : pGPU->nResults[nQueryIndex];
if(r == 0x7fffffffffffffff)
{
MP_BREAK();
}
return r;
}
uint64_t MicroProfileTicksPerSecondGpuD3D12()
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
if(!pGPU)
return 1;
return pGPU->nFrequency;
}
uint32_t MicroProfileGpuFlipD3D12(void* pContext)
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
if(!pGPU)
return 0;
uint32_t nNode = pGPU->nCurrentNode;
uint32_t nFrameIndex = pGPU->nFrame % MICROPROFILE_D3D_INTERNAL_DELAY;
uint32_t nCount = 0, nStart = 0;
uint32_t nCountCopyQueue = 0, nStartCopyQueue = 0;
ID3D12CommandAllocator* pCommandAllocator = pGPU->Frames[nFrameIndex].pCommandAllocator;
ID3D12CommandAllocator* pCommandAllocatorCopy = pGPU->Frames[nFrameIndex].pCommandAllocatorCopy;
pCommandAllocator->Reset();
pCommandAllocatorCopy->Reset();
ID3D12GraphicsCommandList* pCommandList = pGPU->Frames[nFrameIndex].pCommandList[nNode];
pCommandList->Reset(pCommandAllocator, nullptr);
ID3D12GraphicsCommandList* pCommandListCopy = nullptr;
uint32_t nFrameTimeStamp = MicroProfileGpuInsertTimeStamp(pCommandList);
{
nCount = pGPU->nFrameCountTimeStamps.exchange(0);
nStart = pGPU->nFrameStartTimeStamps;
pGPU->nFrameStartTimeStamps = (pGPU->nFrameStartTimeStamps + nCount) % MICROPROFILE_D3D12_MAX_QUERIES;
uint32_t nEnd = MicroProfileMin(nStart + nCount, (uint32_t)MICROPROFILE_D3D12_MAX_QUERIES);
MP_ASSERT(nStart != nEnd);
uint32_t nSize = nEnd - nStart;
pCommandList->ResolveQueryData(pGPU->NodeState[nNode].pHeap, D3D12_QUERY_TYPE_TIMESTAMP, nStart, nEnd - nStart, pGPU->pBuffer, nStart * sizeof(int64_t));
if(nStart + nCount > MICROPROFILE_D3D12_MAX_QUERIES)
{
pCommandList->ResolveQueryData(pGPU->NodeState[nNode].pHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, nEnd + nStart - MICROPROFILE_D3D12_MAX_QUERIES, pGPU->pBuffer, 0);
}
pCommandList->Close();
}
{
pCommandListCopy = pGPU->Frames[nFrameIndex].pCommandListCopy[nNode];
pCommandListCopy->Reset(pCommandAllocatorCopy, nullptr);
nCountCopyQueue = pGPU->nFrameCountCopyQueueTimeStamps.exchange(0);
nStartCopyQueue = pGPU->nFrameStartCopyQueueTimeStamps;
pGPU->nFrameStartCopyQueueTimeStamps = (nStartCopyQueue + nCountCopyQueue) % MICROPROFILE_D3D12_MAX_QUERIES;
uint32_t nEnd = MicroProfileMin(nStartCopyQueue + nCountCopyQueue, (uint32_t)MICROPROFILE_D3D12_MAX_QUERIES);
if(nStartCopyQueue != nEnd)
{
uint32_t nSize = nEnd - nStartCopyQueue;
pCommandListCopy->ResolveQueryData(
pGPU->NodeState[nNode].pCopyQueueHeap, D3D12_QUERY_TYPE_TIMESTAMP, nStartCopyQueue, nEnd - nStartCopyQueue, pGPU->pBufferCopy, nStartCopyQueue * sizeof(int64_t));
if(nStartCopyQueue + nCountCopyQueue > MICROPROFILE_D3D12_MAX_QUERIES)
{
pCommandListCopy->ResolveQueryData(pGPU->NodeState[nNode].pCopyQueueHeap, D3D12_QUERY_TYPE_TIMESTAMP, 0, nEnd + nStartCopyQueue - MICROPROFILE_D3D12_MAX_QUERIES, pGPU->pBufferCopy, 0);
}
}
pCommandListCopy->Close();
}
if(pCommandList)
{
ID3D12CommandList* pList = pCommandList;
pGPU->NodeState[nNode].pCommandQueue->ExecuteCommandLists(1, &pList);
}
if(pCommandListCopy)
{
ID3D12CommandList* pList = pCommandListCopy;
pGPU->NodeState[nNode].pCommandQueueCopy->ExecuteCommandLists(1, &pList);
}
pGPU->NodeState[nNode].pCommandQueue->Signal(pGPU->NodeState[nNode].pFence, pGPU->nFrame);
pGPU->NodeState[nNode].pCommandQueueCopy->Signal(pGPU->NodeState[nNode].pFenceCopy, pGPU->nFrame);
pGPU->Frames[nFrameIndex].nTimeStampBegin = nStart;
pGPU->Frames[nFrameIndex].nTimeStampCount = nCount;
pGPU->Frames[nFrameIndex].nTimeStampBeginCopyQueue = nStartCopyQueue;
pGPU->Frames[nFrameIndex].nTimeStampCountCopyQueue = nCountCopyQueue;
pGPU->Frames[nFrameIndex].nNode = nNode;
pGPU->nFrame++;
// fetch from earlier frames
MicroProfileGpuFetchResultsD3D12(pGPU->nFrame - MICROPROFILE_GPU_FRAME_DELAY);
return nFrameTimeStamp;
}
void MicroProfileGpuInitD3D12(void* pDevice_, uint32_t nNodeCount, void** pCommandQueues_, void** pCommandQueuesCopy_)
{
MicroProfileGpuTimerStateD3D12* pGPU = MP_ALLOC_OBJECT(MicroProfileGpuTimerStateD3D12);
memset(pGPU, 0, sizeof(MicroProfileGpuTimerStateD3D12));
MicroProfileGpuInitPlatform(MicroProfileGpuTimerStateType_D3D12,
pGPU,
MicroProfileGpuInsertTimeStampD3D12,
MicroProfileGpuGetTimeStampD3D12,
MicroProfileTicksPerSecondGpuD3D12,
MicroProfileGetGpuTickReferenceD3D12,
MicroProfileGpuFlipD3D12,
MicroProfileGpuShutdownD3D12);
ID3D12Device* pDevice = (ID3D12Device*)pDevice_;
pGPU->pDevice = pDevice;
pGPU->nNodeCount = nNodeCount;
MP_ASSERT(pGPU->nNodeCount <= MICROPROFILE_D3D_MAX_NODE_COUNT);
for(uint32_t nNode = 0; nNode < pGPU->nNodeCount; ++nNode)
{
pGPU->NodeState[nNode].pCommandQueue = (ID3D12CommandQueue*)pCommandQueues_[nNode];
pGPU->NodeState[nNode].pCommandQueueCopy = (ID3D12CommandQueue*)pCommandQueuesCopy_[nNode];
if(nNode == 0)
{
pGPU->NodeState[nNode].pCommandQueue->GetTimestampFrequency((uint64_t*)&(pGPU->nFrequency));
MP_ASSERT(pGPU->nFrequency);
}
else
{
// Don't support GPUs with different timer frequencies for now
int64_t nFrequency;
pGPU->NodeState[nNode].pCommandQueue->GetTimestampFrequency((uint64_t*)&nFrequency);
MP_ASSERT(nFrequency == pGPU->nFrequency);
}
D3D12_QUERY_HEAP_DESC QHDesc;
QHDesc.Count = MICROPROFILE_D3D12_MAX_QUERIES;
QHDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
QHDesc.NodeMask = MP_NODE_MASK_ONE(nNode);
HRESULT hr = pDevice->CreateQueryHeap(&QHDesc, IID_PPV_ARGS(&pGPU->NodeState[nNode].pHeap));
MP_ASSERT(hr == S_OK);
QHDesc.Type = D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP;
hr = pDevice->CreateQueryHeap(&QHDesc, IID_PPV_ARGS(&pGPU->NodeState[nNode].pCopyQueueHeap));
MP_ASSERT(hr == S_OK);
pDevice->CreateFence(pGPU->nPendingFrame, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pGPU->NodeState[nNode].pFence));
pDevice->CreateFence(pGPU->nPendingFrame, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pGPU->NodeState[nNode].pFenceCopy));
}
HRESULT hr;
D3D12_HEAP_PROPERTIES HeapProperties;
HeapProperties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
HeapProperties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
HeapProperties.CreationNodeMask = 0;
HeapProperties.VisibleNodeMask = MP_NODE_MASK_ALL(pGPU->nNodeCount);
HeapProperties.Type = D3D12_HEAP_TYPE_READBACK;
const size_t nResourceSize = MICROPROFILE_D3D12_MAX_QUERIES * 8;
D3D12_RESOURCE_DESC ResourceDesc;
ResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
ResourceDesc.Alignment = 0;
ResourceDesc.Width = nResourceSize;
ResourceDesc.Height = 1;
ResourceDesc.DepthOrArraySize = 1;
ResourceDesc.MipLevels = 1;
ResourceDesc.Format = DXGI_FORMAT_UNKNOWN;
ResourceDesc.SampleDesc.Count = 1;
ResourceDesc.SampleDesc.Quality = 0;
ResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
ResourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
hr = pDevice->CreateCommittedResource(&HeapProperties, D3D12_HEAP_FLAG_NONE, &ResourceDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&pGPU->pBuffer));
MP_ASSERT(hr == S_OK);
hr = pDevice->CreateCommittedResource(&HeapProperties, D3D12_HEAP_FLAG_NONE, &ResourceDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&pGPU->pBufferCopy));
MP_ASSERT(hr == S_OK);
pGPU->nFrame = 0;
pGPU->nPendingFrame = 0;
for(MicroProfileFrameD3D12& Frame : pGPU->Frames)
{
hr = pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&Frame.pCommandAllocator));
MP_ASSERT(hr == S_OK);
hr = pDevice->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COPY, IID_PPV_ARGS(&Frame.pCommandAllocatorCopy));
MP_ASSERT(hr == S_OK);
for(uint32_t nNode = 0; nNode < pGPU->nNodeCount; ++nNode)
{
hr = pDevice->CreateCommandList(MP_NODE_MASK_ONE(nNode), D3D12_COMMAND_LIST_TYPE_DIRECT, Frame.pCommandAllocator, nullptr, IID_PPV_ARGS(&Frame.pCommandList[nNode]));
MP_ASSERT(hr == S_OK);
hr = Frame.pCommandList[nNode]->Close();
MP_ASSERT(hr == S_OK);
hr = pDevice->CreateCommandList(MP_NODE_MASK_ONE(nNode), D3D12_COMMAND_LIST_TYPE_COPY, Frame.pCommandAllocatorCopy, nullptr, IID_PPV_ARGS(&Frame.pCommandListCopy[nNode]));
MP_ASSERT(hr == S_OK);
hr = Frame.pCommandListCopy[nNode]->Close();
MP_ASSERT(hr == S_OK);
}
}
}
void MicroProfileGpuShutdownD3D12()
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
if(!pGPU)
return;
for(uint32_t nNode = 0; nNode < pGPU->nNodeCount; ++nNode)
{
MicroProfileGpuWaitFenceD3D12(nNode, pGPU->nFrame - 1);
}
for(uint32_t nNode = 0; nNode < pGPU->nNodeCount; ++nNode)
{
pGPU->NodeState[nNode].pHeap->Release();
pGPU->NodeState[nNode].pCopyQueueHeap->Release();
pGPU->NodeState[nNode].pFence->Release();
pGPU->NodeState[nNode].pFenceCopy->Release();
}
pGPU->pBuffer->Release();
pGPU->pBufferCopy->Release();
for(MicroProfileFrameD3D12& Frame : pGPU->Frames)
{
Frame.pCommandAllocator->Release();
Frame.pCommandAllocatorCopy->Release();
for(uint32_t nNode = 0; nNode < pGPU->nNodeCount; ++nNode)
{
Frame.pCommandList[nNode]->Release();
Frame.pCommandListCopy[nNode]->Release();
}
}
}
void MicroProfileSetCurrentNodeD3D12(uint32_t nNode)
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
pGPU->nCurrentNode = nNode;
}
int MicroProfileGetGpuTickReferenceD3D12(int64_t* pOutCPU, int64_t* pOutGpu)
{
MicroProfileGpuTimerStateD3D12* pGPU = MicroProfileGetGpuTimerStateD3D12();
if(!pGPU)
{
*pOutCPU = 1;
*pOutGpu = 1;
return 1;
}
HRESULT hr = pGPU->NodeState[0].pCommandQueue->GetClockCalibration((uint64_t*)pOutGpu, (uint64_t*)pOutCPU);
MP_ASSERT(hr == S_OK);
return 1;
}
MicroProfileGpuTimerStateD3D12* MicroProfileGetGpuTimerStateD3D12()
{
if(S.pGPU && S.pGPU->Type == MicroProfileGpuTimerStateType_D3D12)
return (MicroProfileGpuTimerStateD3D12*)S.pGPU;
return nullptr;
}
#endif
#if MICROPROFILE_GPU_TIMERS_VULKAN
//:'######:::'########::'##::::'##::::'##::::'##:'##::::'##:'##:::::::'##:::'##::::'###::::'##::: ##:
//'##... ##:: ##.... ##: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: ##::'##::::'## ##::: ###:: ##:
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: ##:'##::::'##:. ##:: ####: ##:
// ##::'####: ########:: ##:::: ##:::: ##:::: ##: ##:::: ##: ##::::::: #####::::'##:::. ##: ## ## ##:
// ##::: ##:: ##.....::: ##:::: ##::::. ##:: ##:: ##:::: ##: ##::::::: ##. ##::: #########: ##. ####:
// ##::: ##:: ##:::::::: ##:::: ##:::::. ## ##::: ##:::: ##: ##::::::: ##:. ##:: ##.... ##: ##:. ###:
//. ######::: ##::::::::. #######:::::::. ###::::. #######:: ########: ##::. ##: ##:::: ##: ##::. ##:
//:......::::..::::::::::.......:::::::::...::::::.......:::........::..::::..::..:::::..::..::::..::
#ifndef MICROPROFILE_VULKAN_MAX_QUERIES
#define MICROPROFILE_VULKAN_MAX_QUERIES (32 << 10)
#endif
#define MICROPROFILE_VULKAN_MAX_NODE_COUNT 4
#define MICROPROFILE_VULKAN_INTERNAL_DELAY 8
#include <vulkan/vulkan.h>
struct MicroProfileGpuFrameVulkan
{
uint32_t nBegin;
uint32_t nCount;
uint32_t nNode;
VkCommandBuffer CommandBuffer[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
VkFence Fences[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
};
struct MicroProfileGpuTimerStateVulkan : public MicroProfileGpuTimerState
{
VkDevice Devices[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
VkPhysicalDevice PhysicalDevices[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
VkQueue Queues[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
VkQueryPool QueryPool[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
VkCommandPool CommandPool[MICROPROFILE_VULKAN_MAX_NODE_COUNT];
uint32_t nNodeCount;
uint32_t nCurrentNode;
uint64_t nFrame;
uint64_t nPendingFrame;
uint32_t nFrameStart;
std::atomic<uint32_t> nFrameCount;
int64_t nFrequency;
uint16_t nQueryFrames[MICROPROFILE_VULKAN_MAX_QUERIES];
int64_t nResults[MICROPROFILE_VULKAN_MAX_QUERIES];
MicroProfileGpuFrameVulkan Frames[MICROPROFILE_VULKAN_INTERNAL_DELAY];
};
MicroProfileGpuTimerStateVulkan* MicroProfileGetGpuTimerStateVulkan()
{
if(S.pGPU && S.pGPU->Type == MicroProfileGpuTimerStateType_Vulkan)
return (MicroProfileGpuTimerStateVulkan*)S.pGPU;
return nullptr;
}
uint32_t MicroProfileGpuInsertTimeStampVulkan(void* pContext)
{
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
if(!pGPU)
return 0;
VkCommandBuffer CB = (VkCommandBuffer)pContext;
uint32_t nNode = pGPU->nCurrentNode;
uint32_t nFrame = pGPU->nFrame;
uint32_t nQueryIndex = (pGPU->nFrameCount.fetch_add(1) + pGPU->nFrameStart) % MICROPROFILE_VULKAN_MAX_QUERIES;
vkCmdWriteTimestamp(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, pGPU->QueryPool[nNode], nQueryIndex);
MP_ASSERT(nQueryIndex <= 0xffff);
// uprintf("insert timestamp %d :: %d ... ctx %p\n", nQueryIndex, nFrame, pContext);
return ((nFrame << 16) & 0xffff0000) | (nQueryIndex);
}
void MicroProfileGpuFetchRangeVulkan(VkCommandBuffer CommandBuffer, uint32_t nNode, uint32_t nBegin, int32_t nCount, uint64_t nFrame, int64_t nTimestampOffset)
{
if(nCount <= 0)
return;
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
if(!pGPU)
return;
vkGetQueryPoolResults(pGPU->Devices[nNode], pGPU->QueryPool[nNode], nBegin, nCount, 8 * nCount, &pGPU->nResults[nBegin], 8, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
vkCmdResetQueryPool(CommandBuffer, pGPU->QueryPool[nNode], nBegin, nCount);
for(int i = 0; i < nCount; ++i)
{
pGPU->nQueryFrames[i + nBegin] = nFrame;
}
}
void MicroProfileGpuWaitFenceVulkan(uint32_t nNode, uint64_t nFrame)
{
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
if(!pGPU)
return;
int r;
int c = 0;
do
{
MICROPROFILE_SCOPEI("Microprofile", "gpu-wait", MP_GREEN4);
r = vkWaitForFences(pGPU->Devices[nNode], 1, &pGPU->Frames[nFrame].Fences[nNode], 1, 1000 * 30);
#if 0
if(c++ > 1000 && (c%100) == 0)
{
uprintf("waiting really long time for fence\n");
OutputDebugString("waiting really long time for fence\n");
}
#endif
} while(r != VK_SUCCESS);
}
void MicroProfileGpuFetchResultsVulkan(VkCommandBuffer Buffer, uint64_t nFrame)
{
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
if(!pGPU)
return;
uint64_t nPending = pGPU->nPendingFrame;
// while(nPending <= nFrame)
// while(0 <= nFrame - nPending)
while(0 <= (int64_t)(nFrame - nPending))
{
uint32_t nInternal = nPending % MICROPROFILE_VULKAN_INTERNAL_DELAY;
uint32_t nNode = pGPU->Frames[nInternal].nNode;
MicroProfileGpuWaitFenceVulkan(nNode, nInternal);
int64_t nTimestampOffset = 0;
if(nNode != 0)
{
MP_ASSERT(0 && "NOT IMPLEMENTED");
// note: timestamp adjustment not implemented.
}
uint32_t nBegin = pGPU->Frames[nInternal].nBegin;
uint32_t nCount = pGPU->Frames[nInternal].nCount;
MicroProfileGpuFetchRangeVulkan(Buffer, nNode, nBegin, (nBegin + nCount) > MICROPROFILE_VULKAN_MAX_QUERIES ? MICROPROFILE_VULKAN_MAX_QUERIES - nBegin : nCount, nPending, nTimestampOffset);
MicroProfileGpuFetchRangeVulkan(Buffer, nNode, 0, (nBegin + nCount) - MICROPROFILE_VULKAN_MAX_QUERIES, nPending, nTimestampOffset);
nPending = ++pGPU->nPendingFrame;
MP_ASSERT(pGPU->nFrame > nPending);
}
}
uint64_t MicroProfileGpuGetTimeStampVulkan(uint32_t nIndex)
{
if(nIndex == (uint32_t)-1)
{
return 0;
}
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
if(!pGPU)
return 0;
uint32_t nFrame = nIndex >> 16;
uint32_t nQueryIndex = nIndex & 0xffff;
uint32_t lala = pGPU->nQueryFrames[nQueryIndex];
MP_ASSERT((0xffff & lala) == nFrame);
// uprintf("read TS [%d <- %lld]\n", nQueryIndex, pGPU->nResults[nQueryIndex]);
return pGPU->nResults[nQueryIndex];
}
uint64_t MicroProfileTicksPerSecondGpuVulkan()
{
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
if(!pGPU)
return 1;
return pGPU->nFrequency;
}
uint32_t MicroProfileGpuFlipVulkan(void* pContext)
{
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
if(!pGPU)
return 0;
uint32_t nNode = pGPU->nCurrentNode;
uint32_t nFrameIndex = pGPU->nFrame % MICROPROFILE_VULKAN_INTERNAL_DELAY;
uint32_t nCount = 0, nStart = 0;
VkCommandBuffer CommandBuffer = pGPU->Frames[nFrameIndex].CommandBuffer[nNode];
auto& F = pGPU->Frames[nFrameIndex];
VkFence Fence = F.Fences[nNode];
VkDevice Device = pGPU->Devices[nNode];
VkQueue Queue = pGPU->Queues[nNode];
vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1);
uint32_t nFrameTimeStamp = MicroProfileGpuInsertTimeStamp(pContext);
vkResetCommandBuffer(F.CommandBuffer[nNode], VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
VkCommandBufferBeginInfo CBI;
CBI.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
CBI.pNext = 0;
CBI.pInheritanceInfo = 0;
CBI.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(F.CommandBuffer[nNode], &CBI);
vkResetFences(Device, 1, &Fence);
nCount = pGPU->nFrameCount.exchange(0);
nStart = pGPU->nFrameStart;
pGPU->nFrameStart = (pGPU->nFrameStart + nCount) % MICROPROFILE_VULKAN_MAX_QUERIES;
uint32_t nEnd = MicroProfileMin(nStart + nCount, (uint32_t)MICROPROFILE_VULKAN_MAX_QUERIES);
MP_ASSERT(nStart != nEnd);
uint32_t nSize = nEnd - nStart;
pGPU->Frames[nFrameIndex].nBegin = nStart;
pGPU->Frames[nFrameIndex].nCount = nCount;
pGPU->Frames[nFrameIndex].nNode = nNode;
pGPU->nFrame++;
////fetch from earlier frames
MicroProfileGpuFetchResultsVulkan(CommandBuffer, pGPU->nFrame - MICROPROFILE_GPU_FRAME_DELAY);
vkEndCommandBuffer(F.CommandBuffer[nNode]);
VkSubmitInfo SubmitInfo = {};
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
SubmitInfo.pNext = nullptr;
SubmitInfo.waitSemaphoreCount = 0;
SubmitInfo.pWaitSemaphores = nullptr;
SubmitInfo.commandBufferCount = 1;
SubmitInfo.pCommandBuffers = &CommandBuffer;
SubmitInfo.signalSemaphoreCount = 0;
SubmitInfo.pSignalSemaphores = nullptr;
vkQueueSubmit(Queue, 1, &SubmitInfo, Fence);
return nFrameTimeStamp;
}
void MicroProfileGpuInitVulkan(VkDevice* pDevices, VkPhysicalDevice* pPhysicalDevices, VkQueue* pQueues, uint32_t* QueueFamily, uint32_t nNodeCount)
{
MicroProfileGpuTimerStateVulkan* pGPU = MP_ALLOC_OBJECT(MicroProfileGpuTimerStateVulkan);
memset(pGPU, 0, sizeof(MicroProfileGpuTimerStateVulkan));
MicroProfileGpuInitPlatform(MicroProfileGpuTimerStateType_Vulkan,
pGPU,
MicroProfileGpuInsertTimeStampVulkan,
MicroProfileGpuGetTimeStampVulkan,
MicroProfileTicksPerSecondGpuVulkan,
MicroProfileGetGpuTickReferenceVulkan,
MicroProfileGpuFlipVulkan,
MicroProfileGpuShutdownVulkan);
pGPU->nNodeCount = nNodeCount;
VkQueryPoolCreateInfo Q;
Q.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
Q.pNext = 0;
Q.flags = 0;
Q.queryType = VK_QUERY_TYPE_TIMESTAMP;
Q.queryCount = MICROPROFILE_VULKAN_MAX_QUERIES + 1;
VkCommandPoolCreateInfo CreateInfo;
CreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
CreateInfo.pNext = 0;
CreateInfo.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
VkResult r;
for(uint32_t i = 0; i < nNodeCount; ++i)
{
pGPU->Devices[i] = pDevices[i];
pGPU->PhysicalDevices[i] = pPhysicalDevices[i];
pGPU->Queues[i] = pQueues[i];
r = vkCreateQueryPool(pGPU->Devices[i], &Q, 0, &pGPU->QueryPool[i]);
MP_ASSERT(r == VK_SUCCESS);
CreateInfo.queueFamilyIndex = QueueFamily[i];
r = vkCreateCommandPool(pGPU->Devices[i], &CreateInfo, 0, &pGPU->CommandPool[i]);
MP_ASSERT(r == VK_SUCCESS);
for(uint32_t j = 0; j < MICROPROFILE_VULKAN_INTERNAL_DELAY; ++j)
{
auto& F = pGPU->Frames[j];
VkCommandBufferAllocateInfo AllocInfo;
AllocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
AllocInfo.pNext = 0;
AllocInfo.commandBufferCount = 1;
AllocInfo.commandPool = pGPU->CommandPool[i];
AllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
r = vkAllocateCommandBuffers(pGPU->Devices[i], &AllocInfo, &F.CommandBuffer[i]);
MP_ASSERT(r == VK_SUCCESS);
VkFenceCreateInfo FCI;
FCI.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
FCI.pNext = 0;
FCI.flags = j == 0 ? 0 : VK_FENCE_CREATE_SIGNALED_BIT;
r = vkCreateFence(pGPU->Devices[i], &FCI, 0, &F.Fences[i]);
MP_ASSERT(r == VK_SUCCESS);
if(j == 0)
{
VkCommandBufferBeginInfo CBI;
CBI.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
CBI.pNext = 0;
CBI.pInheritanceInfo = 0;
CBI.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(F.CommandBuffer[i], &CBI);
vkCmdResetQueryPool(F.CommandBuffer[i], pGPU->QueryPool[i], 0, MICROPROFILE_VULKAN_MAX_QUERIES + 1);
vkEndCommandBuffer(F.CommandBuffer[i]);
VkSubmitInfo SubmitInfo = {};
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
SubmitInfo.pNext = nullptr;
SubmitInfo.waitSemaphoreCount = 0;
SubmitInfo.pWaitSemaphores = nullptr;
SubmitInfo.commandBufferCount = 1;
SubmitInfo.pCommandBuffers = &F.CommandBuffer[i];
SubmitInfo.signalSemaphoreCount = 0;
SubmitInfo.pSignalSemaphores = nullptr;
vkQueueSubmit(pQueues[i], 1, &SubmitInfo, F.Fences[i]);
vkWaitForFences(pGPU->Devices[i], 1, &F.Fences[i], 1, (uint64_t)-1);
vkResetCommandBuffer(F.CommandBuffer[i], VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
}
}
}
VkPhysicalDeviceProperties Properties;
vkGetPhysicalDeviceProperties(pPhysicalDevices[0], &Properties);
pGPU->nFrequency = 1000000000ll / Properties.limits.timestampPeriod;
}
void MicroProfileGpuShutdownVulkan()
{
// this is clearly leaking ..
}
void MicroProfileSetCurrentNodeVulkan(uint32_t nNode)
{
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
if(!pGPU)
return;
pGPU->nCurrentNode = nNode;
}
int MicroProfileGetGpuTickReferenceVulkan(int64_t* pOutCPU, int64_t* pOutGpu)
{
MicroProfileGpuTimerStateVulkan* pGPU = MicroProfileGetGpuTimerStateVulkan();
if(!pGPU)
return 0;
auto& F = pGPU->Frames[pGPU->nFrame % MICROPROFILE_VULKAN_INTERNAL_DELAY];
uint32_t nGpu = pGPU->nCurrentNode;
VkCommandBufferBeginInfo CBI;
CBI.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
CBI.pNext = 0;
CBI.pInheritanceInfo = 0;
CBI.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
VkCommandBuffer CB = F.CommandBuffer[nGpu];
VkDevice Device = pGPU->Devices[nGpu];
VkFence Fence = F.Fences[nGpu];
vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1);
vkResetFences(Device, 1, &Fence);
vkResetCommandBuffer(CB, VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT);
vkBeginCommandBuffer(CB, &CBI);
vkCmdResetQueryPool(CB, pGPU->QueryPool[nGpu], MICROPROFILE_VULKAN_MAX_QUERIES, 1);
vkCmdWriteTimestamp(CB, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, pGPU->QueryPool[nGpu], MICROPROFILE_VULKAN_MAX_QUERIES);
vkEndCommandBuffer(CB);
VkSubmitInfo SubmitInfo = {};
SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
SubmitInfo.pNext = nullptr;
SubmitInfo.waitSemaphoreCount = 0;
SubmitInfo.pWaitSemaphores = nullptr;
SubmitInfo.commandBufferCount = 1;
SubmitInfo.pCommandBuffers = &CB;
SubmitInfo.signalSemaphoreCount = 0;
SubmitInfo.pSignalSemaphores = nullptr;
vkQueueSubmit(pGPU->Queues[nGpu], 1, &SubmitInfo, Fence);
vkWaitForFences(Device, 1, &Fence, 1, (uint64_t)-1);
*pOutGpu = 0;
vkGetQueryPoolResults(Device, pGPU->QueryPool[nGpu], MICROPROFILE_VULKAN_MAX_QUERIES, 1, 8, pOutGpu, 8, VK_QUERY_RESULT_64_BIT);
*pOutCPU = MP_TICK();
return 1;
}
#endif
#if MICROPROFILE_GPU_TIMERS_GL
//:'######:::'########::'##::::'##:::::'######:::'##:::::::
//'##... ##:: ##.... ##: ##:::: ##::::'##... ##:: ##:::::::
// ##:::..::: ##:::: ##: ##:::: ##:::: ##:::..::: ##:::::::
// ##::'####: ########:: ##:::: ##:::: ##::'####: ##:::::::
// ##::: ##:: ##.....::: ##:::: ##:::: ##::: ##:: ##:::::::
// ##::: ##:: ##:::::::: ##:::: ##:::: ##::: ##:: ##:::::::
//. ######::: ##::::::::. #######:::::. ######::: ########:
//:......::::..::::::::::.......:::::::......::::........::
void MicroProfileGpuInitGL()
{
MicroProfileGpuTimerStateGL* pGPU = MP_ALLOC_OBJECT(MicroProfileGpuTimerStateGL);
memset(pGPU, 0, sizeof(MicroProfileGpuTimerStateGL));
MicroProfileGpuInitPlatform(MicroProfileGpuTimerStateType_GL,
pGPU,
MicroProfileGpuInsertTimeStampGL,
MicroProfileGpuGetTimeStampGL,
MicroProfileTicksPerSecondGpuGL,
MicroProfileGetGpuTickReferenceGL,
MicroProfileGpuFlipGL,
MicroProfileGpuShutdownGL);
pGPU->GLTimerPos = 0;
glGenQueries(MICROPROFILE_GL_MAX_QUERIES, &pGPU->GLTimers[0]);
}
uint32_t MicroProfileGpuInsertTimeStampGL(void* pContext)
{
MicroProfileGpuTimerStateGL* pGPU = MicroProfileGetGpuTimerStateGL();
if(!pGPU)
return 0;
uint32_t nIndex = (pGPU->GLTimerPos + 1) % MICROPROFILE_GL_MAX_QUERIES;
glQueryCounter(pGPU->GLTimers[nIndex], GL_TIMESTAMP);
pGPU->GLTimerPos = nIndex;
return nIndex;
}
uint64_t MicroProfileGpuGetTimeStampGL(uint32_t nKey)
{
MicroProfileGpuTimerStateGL* pGPU = MicroProfileGetGpuTimerStateGL();
if(!pGPU)
return 0;
uint64_t result;
glGetQueryObjectui64v(pGPU->GLTimers[nKey], GL_QUERY_RESULT, &result);
return result;
}
uint64_t MicroProfileTicksPerSecondGpuGL()
{
return 1000000000ll;
}
int MicroProfileGetGpuTickReferenceGL(int64_t* pOutCpu, int64_t* pOutGpu)
{
MicroProfileGpuTimerStateGL* pGPU = MicroProfileGetGpuTimerStateGL();
if(!pGPU)
return 0;
int64_t nGpuTimeStamp;
glGetInteger64v(GL_TIMESTAMP, &nGpuTimeStamp);
if(nGpuTimeStamp)
{
*pOutCpu = MP_TICK();
*pOutGpu = nGpuTimeStamp;
#if 0 // debug test if timestamp diverges
static int64_t nTicksPerSecondCpu = MicroProfileTicksPerSecondCpu();
static int64_t nTicksPerSecondGpu = MicroProfileTicksPerSecondGpu();
static int64_t nGpuStart = 0;
static int64_t nCpuStart = 0;
if(!nCpuStart)
{
nCpuStart = *pOutCpu;
nGpuStart = *pOutGpu;
}
static int nCountDown = 100;
if(0 == nCountDown--)
{
int64_t nCurCpu = *pOutCpu;
int64_t nCurGpu = *pOutGpu;
double fDistanceCpu = (nCurCpu - nCpuStart) / (double)nTicksPerSecondCpu;
double fDistanceGpu = (nCurGpu - nGpuStart) / (double)nTicksPerSecondGpu;
char buf[254];
snprintf(buf, sizeof(buf)-1,"Distance %f %f diff %f\n", fDistanceCpu, fDistanceGpu, fDistanceCpu-fDistanceGpu);
OutputDebugString(buf);
nCountDown = 100;
}
#endif
return 1;
}
return 0;
}
uint32_t MicroProfileGpuFlipGL(void* pContext)
{
return MicroProfileGpuInsertTimeStampGL(pContext);
}
void MicroProfileGpuShutdownGL()
{
MicroProfileGpuTimerStateGL* pGPU = MicroProfileGetGpuTimerStateGL();
if(!pGPU)
return;
glDeleteQueries(MICROPROFILE_GL_MAX_QUERIES, &pGPU->GLTimers[0]);
}
MicroProfileGpuTimerStateGL* MicroProfileGetGpuTimerStateGL()
{
if(S.pGPU && S.pGPU->Type == MicroProfileGpuTimerStateType_GL)
return (MicroProfileGpuTimerStateGL*)S.pGPU;
return nullptr;
}
#endif
uint32_t MicroProfileStringHash(const char* pString) // note matching: code in javascript: microprofilelive.html: function StringHash(s)
{
uint32_t h = 0xfeedba3e;
char c;
while(0 != (c = *pString++))
{
h = c + ((h << 5) - h);
}
return h;
}
const char* MicroProfileStrDup(const char* pStr)
{
size_t len = strlen(pStr) + 1;
char* pOut = (char*)MP_ALLOC(len, 8);
memcpy(pOut, pStr, len);
return pOut;
}
uint32_t MicroProfileColorFromString(const char* pString) // note matching code/constants in javascript: microprofilelive.html: function StringToColor(s)
{
// var h = StringHash(s);
// var cidx = h % 360;
// return "hsl(" + cidx + ",50%, 70%)"; //note: matching code constants in microprofile.cpp: MicroProfileColorFromString
float h = MicroProfileStringHash(pString) % 360;
float s = 0.5f;
float l = 0.7f;
// from https://www.rapidtables.com/convert/color/hsl-to-rgb.html
float c = (1 - fabsf(2 * l - 1)) * s;
float x = c * (1 - fabsf(fmodf(h / 60, 2.f) - 1));
float m = l - c / 2.f;
float r = 0.f, g = 0.f, b = 0.f;
if(h < 60)
{
r = c;
g = x;
}
else if(h < 120.f)
{
r = x;
g = c;
}
else if(h < 180.f)
{
g = c;
b = x;
}
else if(h < 240.f)
{
g = x;
b = c;
}
else if(h < 300.f)
{
r = x;
b = c;
}
else
{
r = c;
b = x;
}
r += m;
g += m;
b += m;
r *= 255.f;
g *= 255.f;
b *= 255.f;
uint32_t R = MicroProfileMin(0xffu, (uint32_t)r);
uint32_t G = MicroProfileMin(0xffu, (uint32_t)g);
uint32_t B = MicroProfileMin(0xffu, (uint32_t)b);
return (R << 16) | (G << 8) | B;
}
#if MICROPROFILE_DYNAMIC_INSTRUMENT
// '##::::'##::'#######:::'#######::'##:::'##:::::'######::'##::::'##::::'###::::'########::'########:'########::
// ##:::: ##:'##.... ##:'##.... ##: ##::'##:::::'##... ##: ##:::: ##:::'## ##::: ##.... ##: ##.....:: ##.... ##:
// ##:::: ##: ##:::: ##: ##:::: ##: ##:'##:::::: ##:::..:: ##:::: ##::'##:. ##:: ##:::: ##: ##::::::: ##:::: ##:
// #########: ##:::: ##: ##:::: ##: #####:::::::. ######:: #########:'##:::. ##: ########:: ######::: ##:::: ##:
// ##.... ##: ##:::: ##: ##:::: ##: ##. ##:::::::..... ##: ##.... ##: #########: ##.. ##::: ##...:::: ##:::: ##:
// ##:::: ##: ##:::: ##: ##:::: ##: ##:. ##:::::'##::: ##: ##:::: ##: ##.... ##: ##::. ##:: ##::::::: ##:::: ##:
// ##:::: ##:. #######::. #######:: ##::. ##::::. ######:: ##:::: ##: ##:::: ##: ##:::. ##: ########: ########::
// ..:::::..:::.......::::.......:::..::::..::::::......:::..:::::..::..:::::..::..:::::..::........::........:::
#include <distorm.h>
#include <mnemonics.h>
#if MICROPROFILE_BREAK_ON_PATCH_FAIL
#define BREAK_ON_PATCH_FAIL() MP_BREAK()
#else
#define BREAK_ON_PATCH_FAIL() \
do \
{ \
} while(0)
#endif
void* MicroProfileX64FollowJump(void* pSrc);
bool MicroProfileCopyInstructionBytes(char* pDest,
void* pSrc,
const int nLimit,
const int nMaxSize,
char* pTrunk,
intptr_t nTrunkSize,
uint32_t nUsableJumpRegs,
int* nBytesDest,
int* nBytesSrc,
uint32_t* pRegsWritten,
uint32_t* nRetSafe);
bool MicroProfilePatchFunction(void* f, int Argument, MicroProfileHookFunc enter, MicroProfileHookFunc leave, MicroProfilePatchError* pError);
template <typename Callback>
void MicroProfileIterateSymbols(Callback CB, uint32_t* nModules, uint32_t nNumModules);
bool MicroProfileDemangleName(const char* pName, char* OutName, uint32_t Size);
bool MicroProfilePatchBeginSuspend();
void MicroProfilePatchEndSuspend();
bool MicroProfilePatchHasSuspendedThread(intptr_t Begin, intptr_t End);
#if 1
#define STRING_MATCH_SIZE 64
typedef uint64_t uint_string_match;
#else
#define STRING_MATCH_SIZE 32
typedef uint32_t uint_string_match;
#endif
struct MicroProfileStringMatchMask
{
uint_string_match nMask;
uint_string_match M[64];
};
struct MicroProfileSymbolDesc
{
const char* pName;
const char* pShortName;
intptr_t nAddress;
intptr_t nAddressEnd;
uint_string_match nMask;
int nIgnoreSymbol;
uint32_t nModule;
};
struct MicroProfileSymbolBlock
{
MicroProfileSymbolBlock* pNext;
uint32_t nNumSymbols;
uint32_t nNumChars;
uint_string_match nMask;
MicroProfileStringMatchMask MatchMask;
enum
{
ESIZE = 4 << 10,
};
union
{
MicroProfileSymbolDesc Symbols[ESIZE / sizeof(MicroProfileSymbolDesc)];
char Chars[ESIZE];
};
};
typedef void (*MicroProfileOnSymbolCallback)(const char* pSymbolName, intptr_t nAddress);
MP_THREAD_LOCAL uintptr_t g_MicroProfile_TLS[17] = { 16 };
extern "C" MP_NOINLINE uintptr_t MicroProfile_Patch_TLS_PUSH(uintptr_t t)
{
uintptr_t* pTLS = &g_MicroProfile_TLS[0];
uintptr_t Limit = (uint32_t)pTLS[0];
uintptr_t Pos = (uint32_t)(pTLS[0] >> 32);
if(Pos == Limit)
{
return 0;
}
else
{
pTLS[0] = (Limit) | ((Pos + 1) << 32);
}
pTLS[Pos + 1] = t;
return 1;
}
extern "C" MP_NOINLINE uintptr_t MicroProfile_Patch_TLS_POP()
{
uintptr_t* pTLS = &g_MicroProfile_TLS[0];
uintptr_t Limit = (uint32_t)pTLS[0];
uintptr_t Pos = (uint32_t)(pTLS[0] >> 32);
if(Pos == 0)
{
MP_BREAK(); // this should never happen
return 0;
}
else
{
pTLS[0] = (Limit) | ((Pos - 1) << 32);
}
uintptr_t t = pTLS[Pos];
return t;
}
char* MicroProfileInsertRegisterJump(char* pCode, intptr_t pDest, int reg)
{
MP_ASSERT(reg >= R_RAX && reg <= R_R15);
int large = reg >= R_R8 ? 1 : 0;
int offset = large ? (reg - R_R8) : (reg - R_RAX);
unsigned char* uc = (unsigned char*)pCode;
*uc++ = large ? 0x49 : 0x48;
*uc++ = 0xb8 + offset;
memcpy(uc, &pDest, 8);
uc += 8;
if(large)
*uc++ = 0x41;
*uc++ = 0xff;
*uc++ = 0xe0 + offset;
return (char*)uc;
// 164: 48 b8 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rax
// 16e: 48 b9 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rcx
// 178: 48 ba 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rdx
// 182: 48 bb 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rbx
// 18c: 48 bc 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rsp
// 196: 48 bd 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rbp
// 1a0: 48 be 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rsi
// 1aa: 48 bf 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %rdi
// 1b4: 49 b8 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r8
// 1be: 49 b9 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r9
// 1c8: 49 ba 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r10
// 1d2: 49 bb 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r11
// 1dc: 49 bc 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r12
// 1e6: 49 bd 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r13
// 1f0: 49 be 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r14
// 1fa: 49 bf 08 07 06 05 04 03 02 01 movabsq $72623859790382856, %r15
// 204: ff e0 jmpq *%rax
// 206: ff e1 jmpq *%rcx
// 208: ff e2 jmpq *%rdx
// 20a: ff e3 jmpq *%rbx
// 20c: ff e4 jmpq *%rsp
// 20e: ff e5 jmpq *%rbp
// 210: ff e6 jmpq *%rsi
// 212: ff e7 jmpq *%rdi
// 214: 41 ff e0 jmpq *%r8
// 217: 41 ff e1 jmpq *%r9
// 21a: 41 ff e2 jmpq *%r10
// 21d: 41 ff e3 jmpq *%r11
// 220: 41 ff e4 jmpq *%r12
// 223: 41 ff e5 jmpq *%r13
// 226: 41 ff e6 jmpq *%r14
// 229: 41 ff e7 jmpq *%r15
}
char* MicroProfileInsertRelativeJump(char* pCode, intptr_t pDest)
{
intptr_t src = intptr_t(pCode) + 5;
intptr_t off = pDest - src;
MP_ASSERT(off > intptr_t(0xffffffff80000000) && off <= 0x7fffffff);
int32_t i32off = (int32_t)off;
unsigned char* uc = (unsigned char*)pCode;
unsigned char* c = (unsigned char*)&i32off;
*uc++ = 0xe9;
memcpy(uc, c, 4);
uc += 4;
return (char*)uc;
}
char* MicroProfileInsertRetJump(char* pCode, intptr_t pDest)
{
uint32_t lower = (uint32_t)pDest;
uint32_t upper = (uint32_t)(pDest >> 32);
unsigned char* uc = (unsigned char*)pCode;
*uc++ = 0x68;
memcpy(uc, &lower, 4);
uc += 4;
*uc++ = 0xc7;
*uc++ = 0x44;
*uc++ = 0x24;
*uc++ = 0x04;
memcpy(uc, &upper, 4);
uc += 4;
*uc++ = 0xc3;
return (char*)uc;
}
uint8_t* MicroProfileInsertMov(uint8_t* p, uint8_t* pend, int r, intptr_t value)
{
int Large = r >= R_R8 ? 1 : 0;
int RegIndex = Large ? (r - R_R8) : (r - R_RAX);
*p++ = Large ? 0x49 : 0x48;
*p++ = 0xb8 + RegIndex; // + (reg - (large?(R_R8-R_RAX):0));
intptr_t* pAddress = (intptr_t*)p;
pAddress[0] = value;
p = (uint8_t*)(pAddress + 1);
MP_ASSERT(p < pend);
return p;
}
uint8_t* MicroProfileInsertCall(uint8_t* p, uint8_t* pend, int r)
{
int Large = r >= R_R8 ? 1 : 0;
int RegIndex = Large ? (r - R_R8) : (r - R_RAX);
if(Large)
{
*p++ = 0x41;
}
*p++ = 0xff;
*p++ = 0xd0 + RegIndex;
MP_ASSERT(p < pend);
return p;
}
bool MicroProfileStringMatch(const char* pSymbol, uint32_t nStartOffset, const char** pPatterns, uint32_t* nPatternLength, uint32_t nNumPatterns)
{
MP_ASSERT(nStartOffset <= nNumPatterns);
const char* p = pSymbol;
for(uint32_t i = nStartOffset; i < nNumPatterns; ++i)
{
p = MP_STRCASESTR(p, pPatterns[i]);
if(p)
{
p += nPatternLength[i];
}
else
{
return false;
}
}
return true;
}
int MicroProfileStringMatchOffset(const char* pSymbol, const char** pPatterns, uint32_t* nPatternLength, uint32_t nNumPatterns)
{
int nOffset = 0;
const char* p = pSymbol;
for(uint32_t i = 0; i < nNumPatterns; ++i)
{
p = MP_STRCASESTR(p, pPatterns[i]);
if(p)
{
p += nPatternLength[i];
nOffset++;
}
else
{
break;
}
}
return nOffset;
}
void* MicroProfileX64FollowJump(void* pSrc)
{
for(uint32_t i = 0; i < S.DynamicTokenIndex; ++i)
if(S.FunctionsInstrumented[i] == pSrc)
return pSrc; // if already instrumented, do not follow the jump inserted by itself.
// uprintf("deref possible trampoline for %p\n", pSrc);
_DecodeType dt = Decode64Bits;
_DInst Instructions[1];
unsigned int nCount = 0;
_CodeInfo ci;
ci.code = (uint8_t*)pSrc;
ci.codeLen = 15;
ci.codeOffset = 0;
ci.dt = dt;
ci.features = DF_NONE;
int r = distorm_decompose(&ci, Instructions, 1, &nCount);
if(!r || nCount != 1)
{
return pSrc; // fail, just return
}
auto& I = Instructions[0];
if(I.opcode == I_JMP)
{
if(I.ops[0].type == O_PC)
{
if(I.ops[0].size == 0x20)
{
intptr_t p = (intptr_t)pSrc;
p += I.size;
p += I.imm.sdword;
return (void*)p;
}
}
else if(I.ops[0].type == O_SMEM)
{
if(I.ops[0].index == R_RIP)
{
intptr_t p = (intptr_t)pSrc;
p += I.size;
p += I.disp;
void* pHest = *(void**)p;
return pHest;
}
}
uprintf("failed to interpret I_JMP %p %d %d\n", pSrc, I.ops[0].size, I.ops[0].type);
return pSrc;
MP_BREAK();
}
return pSrc;
}
bool MicroProfileCopyInstructionBytes(char* pDest,
void* pSrc,
const int nLimit,
const int nMaxSize,
char* pTrunk,
intptr_t nTrunkSize,
const uint32_t nUsableJumpRegs,
int* pBytesDest,
int* pBytesSrc,
uint32_t* pRegsWritten,
uint32_t* pRetSafe)
{
_DecodeType dt = Decode64Bits;
_DInst Instructions[128];
int rip[128] = { 0 };
uint32_t nRegsWrittenInstr[128] = { 0 };
int offsets[129] = { 0 };
unsigned int nCount = 0;
_CodeInfo ci;
ci.code = (uint8_t*)pSrc;
ci.codeLen = nLimit + 15;
ci.codeOffset = 0;
ci.dt = dt;
ci.features = DF_NONE;
int r = distorm_decompose(&ci, Instructions, 128, &nCount);
if(r != DECRES_SUCCESS)
{
BREAK_ON_PATCH_FAIL();
return false;
}
int offset = 0;
unsigned int i = 0;
unsigned nInstructions = 0;
int64_t nTrunkUsage = 0;
offsets[0] = 0;
uint32_t nRegsWritten = 0;
auto Align16 = [](intptr_t p) { return (p + 15) & (~15); };
{
intptr_t iTrunk = (intptr_t)pTrunk;
intptr_t iTrunkEnd = iTrunk + nTrunkSize;
intptr_t iTrunkAligned = (iTrunk + 15) & ~15;
nTrunkSize = iTrunkEnd - iTrunkAligned;
pTrunk = (char*)iTrunkAligned;
}
const uint8_t* pTrunkEnd = (uint8_t*)(pTrunk + nTrunkSize);
auto RegToBit = [](int r) -> uint32_t
{
if(r >= R_RAX && r <= R_R15)
{
return (1u << (r - R_RAX));
}
else if(r >= R_EAX && r <= R_R15D)
{
return (1u << (r - R_EAX));
}
else if(r >= R_AX && r <= R_R15W)
{
return (1u << (r - R_AX));
}
else if(r >= R_AL && r <= R_R15B)
{
return (1u << (r - R_AL));
}
return 0; // might hit on registers like RIP
MP_BREAK();
};
#ifdef _WIN32
const uint32_t nUsableRegisters = RegToBit(R_RAX) | RegToBit(R_R10) | RegToBit(R_R11);
#else
const uint32_t nUsableRegisters = RegToBit(R_RAX) | RegToBit(R_R10) | RegToBit(R_R11);
#endif
int nBytesToMove = 0;
for(i = 0; i < nCount; ++i)
{
nBytesToMove += Instructions[i].size;
if(nBytesToMove >= nLimit)
break;
}
*pBytesSrc = nBytesToMove;
uint32_t nRspMask = RegToBit(R_RSP);
*pRetSafe = 1;
for(i = 0; i < nCount; ++i)
{
rip[i] = 0;
auto& I = Instructions[i];
// bool bHasRipReference = false;
if(I.opcode == I_LEA)
{
}
if(I.opcode == I_CALL)
{
auto& O = I.ops[0];
if(O.type != O_PC || O.size != 0x20)
{
uprintf("unknown call encountered. cannot move\n");
BREAK_ON_PATCH_FAIL();
return false;
}
if((nRegsWritten & nUsableRegisters) == nUsableRegisters)
{
uprintf("call encountered, but register all regs was written to. TODO: push regs?\n");
BREAK_ON_PATCH_FAIL();
return false;
}
// return value might be used past return so preserve registers.
#ifdef _WIN32
nRegsWritten |= RegToBit(R_RAX);
#else
nRegsWritten |= RegToBit(R_RAX) | RegToBit(R_RDX);
#endif
}
switch(I.ops[0].type)
{
case O_REG:
{
uint32_t reg = I.ops[0].index;
nRegsWritten |= RegToBit(reg);
auto& O2 = I.ops[1];
switch(O2.type)
{
case O_REG:
case O_MEM:
case O_SMEM:
{
// if register is RSP 'contaminated', it prevents us from using that to do retjmps
uint32_t nMask = RegToBit(O2.index);
if(nRspMask & nMask)
{
nRspMask |= RegToBit(reg);
}
}
default:
break;
}
break;
}
case O_MEM:
case O_SMEM:
{
uint32_t reg = I.ops[0].index;
if(nRspMask & RegToBit(reg))
{
uprintf("found contaminated reg at +%lld\n", (long long)I.addr);
*pRetSafe = 0;
}
break;
}
}
nRegsWrittenInstr[i] = nRegsWritten;
for(int j = 0; j < 4; ++j)
{
auto& O = I.ops[j];
switch(O.type)
{
case O_REG:
case O_SMEM:
case O_MEM:
{
if(O.index == R_RIP)
{
if(j != 1)
{
uprintf("found non base reference of rip. fail\n");
BREAK_ON_PATCH_FAIL();
return false;
}
if(I.dispSize != 0x20 && I.dispSize != 0x10)
{
uprintf("found offset size != 32 && != 16 bit. not implemented\n");
BREAK_ON_PATCH_FAIL();
return false;
}
rip[i] = 1;
nTrunkUsage += Align16(O.size / 8);
if(nTrunkUsage > nTrunkSize)
{
uprintf("overuse of trunk %lld\n", (long long)nTrunkUsage);
BREAK_ON_PATCH_FAIL();
return false;
}
}
break;
}
}
}
if(rip[i])
{
if(I.ops[0].type != O_REG)
{
uprintf("arg 0 should be O_REG, fail\n");
BREAK_ON_PATCH_FAIL();
return false;
}
if(I.ops[1].type != O_SMEM)
{
uprintf("arg 1 should be O_SMEM, fail was %d\n", O_SMEM);
BREAK_ON_PATCH_FAIL();
return false;
}
}
int fc = META_GET_FC(Instructions[i].meta);
switch(fc)
{
case FC_CALL:
{
break;
}
case FC_RET:
case FC_SYS:
case FC_UNC_BRANCH:
case FC_CND_BRANCH:
uprintf("found branch inst %d :: %d\n", fc, offset);
BREAK_ON_PATCH_FAIL();
return false;
}
offset += Instructions[i].size;
offsets[i + 1] = offset;
if(offset >= nLimit)
{
nInstructions = i + 1;
break;
}
}
if(nTrunkUsage > nTrunkSize)
{
uprintf("function using too much trunk space\n");
BREAK_ON_PATCH_FAIL();
return false;
}
if(offset < nLimit)
{
uprintf("function only had %d bytes of %d\n", offset, nLimit);
BREAK_ON_PATCH_FAIL();
return false;
}
if(0 == *pRetSafe && 0 == (nUsableJumpRegs & ~nRegsWritten))
{
// if ret jump is unsafe all of the usable jump regs are taken, fail.
uprintf("cannot patch function without breaking code]\n");
BREAK_ON_PATCH_FAIL();
MP_BREAK();
return false;
}
// MP_BREAK();
*pRegsWritten = nRegsWritten;
uint8_t* d = (uint8_t*)pDest;
uint8_t* dend = d + nMaxSize;
const uint8_t* s = (const uint8_t*)pSrc;
nTrunkUsage = 0;
for(i = 0; i < nInstructions; ++i)
{
auto& I = Instructions[i];
unsigned size = Instructions[i].size;
if(I.opcode == I_CALL)
{
// find reg
uint32_t nRegsWritten = nRegsWrittenInstr[i];
uint32_t nUsable = nUsableRegisters & ~nRegsWritten;
MP_ASSERT(nUsable);
int r = R_RAX;
while(0 == (1 & nUsable))
{
nUsable >>= 1;
r++;
}
intptr_t p = offsets[i + 1];
p += (intptr_t)pSrc;
p += I.imm.sdword;
d = MicroProfileInsertMov(d, dend, r, p);
d = MicroProfileInsertCall(d, dend, r);
s += size;
}
else if(rip[i])
{
if(I.opcode == I_LEA)
{
if(I.ops[0].type != O_REG)
{
MP_BREAK();
}
if(I.ops[1].index != R_RIP)
{
MP_BREAK();
}
int reg = I.ops[0].index - R_RAX;
int large = I.ops[0].index >= R_R8 ? 1 : 0;
*d++ = large ? 0x49 : 0x48;
*d++ = 0xb8 + (reg - (large ? (R_R8 - R_RAX) : 0));
// calculate the offset
int64_t offset = offsets[i + 1] + I.disp;
intptr_t base = (intptr_t)pSrc;
intptr_t sum = base + offset;
intptr_t* pAddress = (intptr_t*)d;
pAddress[0] = sum;
s += size;
d += 10;
d = (uint8_t*)(pAddress + 1);
}
else
{
if(15 & (intptr_t)pTrunk)
{
MP_BREAK();
}
intptr_t t = (intptr_t)pTrunk;
t = (t + 15) & ~15;
pTrunk = (char*)t;
auto& O = I.ops[1];
uint32_t Op1Size = O.size / 8;
memcpy(d, s, size);
int32_t DispOriginal = (int32_t)I.disp;
const uint8_t* pOriginal = (s + size) + DispOriginal;
intptr_t DispNew = ((uint8_t*)pTrunk - (d + size));
if(!((intptr_t)pTrunk + Op1Size <= (intptr_t)pTrunkEnd))
{
MP_BREAK();
}
memcpy(pTrunk, pOriginal, Op1Size);
pTrunk += Align16(Op1Size);
if(I.dispSize == 32)
{
int32_t off = (int32_t)DispNew;
if(DispNew > 0x7fffffff || DispNew < 0)
{
MP_BREAK();
}
memcpy(d + size - 4, &off, 4);
}
else if(I.dispSize == 16)
{
int16_t off = (int16_t)DispNew;
if(DispNew > 0x7fff || DispNew < 0)
{
MP_BREAK();
}
memcpy(d + size - 2, &off, 2);
}
d += size;
s += size;
}
}
else
{
memcpy(d, s, size);
d += size;
s += size;
}
}
*pBytesDest = (int)(d - (uint8_t*)pDest);
return true;
}
extern "C" void MicroProfileInterceptEnter(int a)
{
MicroProfileToken T = S.DynamicTokens[a];
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
MP_ASSERT(pLog->nStackScope < MICROPROFILE_STACK_MAX); // if youre hitting this assert your instrumenting a deeply nested function
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[pLog->nStackScope++];
pScopeState->Token = T;
if(T)
{
pScopeState->nTick = MicroProfileEnterInternal(T);
}
else
{
pScopeState->nTick = MICROPROFILE_INVALID_TICK;
}
}
extern "C" void MicroProfileInterceptLeave(int a)
{
MicroProfileThreadLog* pLog = MicroProfileGetThreadLog2();
MP_ASSERT(pLog->nStackScope > 0); // if youre hitting this assert you probably have mismatched _ENTER/_LEAVE markers
MicroProfileScopeStateC* pScopeState = &pLog->ScopeState[--pLog->nStackScope];
MicroProfileLeaveInternal(pScopeState->Token, pScopeState->nTick);
}
bool MicroProfileInstrumentFromAddressOnly(void* pFunction)
{
MicroProfileSymbolDesc* pDesc = MicroProfileSymbolFindFuction(pFunction);
if(pDesc)
{
uprintf("Found function %p :: %s %s\n", (void*)pDesc->nAddress, pDesc->pName, pDesc->pShortName);
uint32_t nColor = MicroProfileColorFromString(pDesc->pName);
return MicroProfileInstrumentFunction(pFunction, MicroProfileSymbolModuleGetString(pDesc->nModule), pDesc->pName, nColor);
}
else
{
uprintf("No Function Found %p\n", pFunction);
return false;
}
}
template <typename CB>
void MicroProfileInstrumentScanForFunctionCalls(CB Callback, void* pFunction, size_t nFunctionSize)
{
pFunction = MicroProfileX64FollowJump(pFunction);
const intptr_t nCodeLen = nFunctionSize;
const uint32_t nMaxInstructions = 15;
intptr_t nOffset = 0;
_DecodeType dt = Decode64Bits;
_DInst Instructions[15];
_CodeInfo ci;
do
{
ci.code = nOffset + (uint8_t*)pFunction;
ci.codeLen = nCodeLen - nOffset;
ci.codeOffset = 0;
ci.dt = dt;
ci.features = DF_RETURN_FC_ONLY;
uint32_t nCount = 0;
uint32_t nOffsetNext = 0;
int r = distorm_decompose(&ci, Instructions, nMaxInstructions, &nCount);
// uprintf("decomposed %d\n", nCount);
if(r != DECRES_SUCCESS && r != DECRES_MEMORYERR)
{
BREAK_ON_PATCH_FAIL();
return;
}
if(nCount == 0)
{
// no instructions left
break;
}
// uprintf("instructions decoded %d %p ::\n", nCount, pFunction);
for(int i = 0; i < (int)nCount; ++i)
{
// rip[i] = 0;
auto& I = Instructions[i];
// bool bHasRipReference = false;
if(I.addr < nOffsetNext)
{
MP_BREAK();
}
nOffsetNext = I.addr + I.size;
if(I.opcode == I_CALL)
{
auto& O = I.ops[0];
if(O.type != O_PC || O.size != 0x20)
{
uprintf("non immediate call encountered. cannot follow\n");
BREAK_ON_PATCH_FAIL();
continue;
}
intptr_t pDst = nOffset + (intptr_t)pFunction;
pDst += I.addr;
pDst += I.size;
pDst += I.imm.sdword;
void* fFun1 = MicroProfileX64FollowJump((void*)pDst);
Callback(fFun1);
}
}
nOffset += nOffsetNext;
} while(nOffset < nCodeLen);
}
void MicroProfileInstrumentFunctionsCalled(void* pFunction, const char* pModuleName, const char* pFunctionName, int nMinBytes, int nMaxCalls)
{
pFunction = MicroProfileX64FollowJump(pFunction);
MicroProfileSymbolDesc* pDesc = MicroProfileSymbolFindFuction(pFunction);
if(pDesc)
{
uprintf("instrumenting child functions %p %p :: %s :: %s\n", (void*)pDesc->nAddress, (void*)pDesc->nAddressEnd, pDesc->pName, pDesc->pShortName);
int a = 0;
(void)a;
}
else
{
uprintf("could not find symbol info\n");
return;
}
const intptr_t nCodeLen = (intptr_t)pDesc->nAddressEnd - (intptr_t)pDesc->nAddress;
MicroProfilePatchBeginSuspend();
int NumFunctionsInstrumented = 0;
auto Callback = [&NumFunctionsInstrumented, nMinBytes, nMaxCalls](void* pFunc)
{
MicroProfileSymbolDesc* pDesc = MicroProfileSymbolFindFuction(pFunc);
if(!pDesc)
return;
const char* pName = pDesc ? pDesc->pName : "??";
intptr_t Size = pDesc->nAddressEnd - pDesc->nAddress;
if(nMinBytes == 0 || Size >= nMinBytes)
{
if(0 == nMaxCalls || NumFunctionsInstrumented < nMaxCalls)
{
uprintf("** func Instrumented, count %d, size %d %s\n", NumFunctionsInstrumented, Size, pName);
if(MicroProfileInstrumentFromAddressOnly(pFunc))
{
++NumFunctionsInstrumented;
}
}
else
{
uprintf("** func Skipped, count %d>=%d :: %s\n", NumFunctionsInstrumented, nMaxCalls, pName);
}
}
else
{
uprintf("** func Skipped, Size %d<%d :: %s\n", Size, nMinBytes, pName);
}
};
MicroProfileInstrumentScanForFunctionCalls(Callback, pFunction, nCodeLen);
MicroProfilePatchEndSuspend();
}
bool MicroProfileInstrumentFunction(void* pFunction, const char* pModuleName, const char* pFunctionName, uint32_t nColor)
{
MicroProfilePatchBeginSuspend();
struct ScopeExit
{
~ScopeExit()
{
MicroProfilePatchEndSuspend();
}
} dummy;
MicroProfilePatchError Err;
if(S.DynamicTokenIndex == MICROPROFILE_MAX_DYNAMIC_TOKENS)
{
uprintf("instrument failing, out of dynamic tokens %d\n", S.DynamicTokenIndex);
return false;
}
for(uint32_t i = 0; i < S.DynamicTokenIndex; ++i)
{
if(S.FunctionsInstrumented[i] == pFunction)
{
uprintf("function %p already instrumented\n", pFunction);
return false;
}
}
if(MicroProfilePatchFunction(pFunction, S.DynamicTokenIndex, MicroProfileInterceptEnter, MicroProfileInterceptLeave, &Err))
{
MicroProfileToken Tok = S.DynamicTokens[S.DynamicTokenIndex] = MicroProfileGetToken("PATCHED", pFunctionName, nColor, MicroProfileTokenTypeCpu, 0);
S.FunctionsInstrumented[S.DynamicTokenIndex] = pFunction;
S.FunctionsInstrumentedName[S.DynamicTokenIndex] = MicroProfileStringIntern(pFunctionName);
S.FunctionsInstrumentedModuleNames[S.DynamicTokenIndex] = MicroProfileStringIntern(pModuleName);
S.DynamicTokenIndex++;
uint16_t nGroup = MicroProfileGetGroupIndex(Tok);
if(!MicroProfileGroupActive(nGroup))
{
MicroProfileGroupSetEnabled(nGroup);
}
#if MICROPROFILE_WEBSERVER
MicroProfileWebSocketToggleTimer(MicroProfileGetTimerIndex(Tok));
#endif
return false;
}
else
{
bool bFound = false;
for(int i = 0; i < S.nNumPatchErrors; ++i)
{
if(Err.nCodeSize == S.PatchErrors[i].nCodeSize && 0 == memcmp(Err.Code, S.PatchErrors[i].Code, Err.nCodeSize))
{
bFound = true;
break;
}
}
if(!bFound && S.nNumPatchErrors < MICROPROFILE_MAX_PATCH_ERRORS)
{
memcpy(&S.PatchErrors[S.nNumPatchErrors++], &Err, sizeof(Err));
}
bFound = false;
for(int i = 0; i < S.nNumPatchErrorFunctions; ++i)
{
if(0 == strcmp(pFunctionName, S.PatchErrorFunctionNames[i]))
{
bFound = true;
}
}
if(!bFound && S.nNumPatchErrorFunctions < MICROPROFILE_MAX_PATCH_ERRORS)
{
S.PatchErrorFunctionNames[S.nNumPatchErrorFunctions++] = pFunctionName;
}
uprintf("interception fail!!\n");
return false;
}
}
void MicroProfileInstrumentPreInit();
void MicroProfileSymbolInitializeInternal();
void MicroProfileSymbolFreeDataInternal();
void MicroProfileSymbolKickThread();
void MicroProfileQueryJoinThread();
bool MicroProfileSymbolInitialize(bool bStartLoad, const char* pModuleName)
{
if(!bStartLoad)
return S.SymbolState.nModuleLoadsFinished.load() != 0;
// int nRequests = 0;
{
MicroProfileScopeLock L(MicroProfileMutex());
for(int i = 0; i < S.SymbolNumModules; ++i)
{
if(0 == pModuleName || 0 == strcmp(pModuleName, (const char*)S.SymbolModules[i].pBaseString))
{
if(0 == S.SymbolModules[i].nModuleLoadRequested.exchange(1))
{
S.SymbolState.nModuleLoadsRequested.fetch_add(1);
}
}
}
}
// todo: unload modules
MicroProfileSymbolKickThread();
return S.SymbolState.nModuleLoadsRequested.load() == S.SymbolState.nModuleLoadsFinished.load();
// if(S.SymbolState.nState == MICROPROFILE_SYMBOLSTATE_DEFAULT)
// {
// if(!bStartLoad)
// return false;
// {
// MicroProfileScopeLock L(MicroProfileMutex());
// S.SymbolState.nState.store(MICROPROFILE_SYMBOLSTATE_LOADING);
// S.SymbolState.nSymbolsLoaded.store(0);
// }
// MicroProfileSymbolKickThread();
// return false;
// }
// if(nRequests)
// {
// }
// if(S.SymbolState.nState.load() == MICROPROFILE_SYMBOLSTATE_DONE)
// {
// MicroProfileQueryJoinThread();
// }
// if(S.SymbolState.nState == MICROPROFILE_SYMBOLSTATE_DONE && bStartLoad)
// {
// MicroProfileSymbolFreeDataInternal();
// {
// MicroProfileScopeLock L(MicroProfileMutex());
// S.SymbolState.nState.store(MICROPROFILE_SYMBOLSTATE_LOADING);
// S.SymbolState.nSymbolsLoaded.store(0);
// }
// MicroProfileSymbolKickThread();
// return false;
// }
// else
// {
// return S.SymbolState.nState == MICROPROFILE_SYMBOLSTATE_DONE;
// }
}
void MicroProfileSymbolFreeDataInternal()
{
{
uprintf("todod;....\n");
MP_BREAK();
// MP_ASSERT(S.SymbolState.nState == MICROPROFILE_SYMBOLSTATE_DONE);
S.nNumPatchErrorFunctions = 0;
memset(S.PatchErrorFunctionNames, 0, sizeof(S.PatchErrorFunctionNames));
for(int i = 0; i < S.SymbolNumModules; ++i)
{
while(S.SymbolModules[i].pSymbolBlock)
{
MicroProfileSymbolBlock* pBlock = S.SymbolModules[i].pSymbolBlock;
S.SymbolModules[i].pSymbolBlock = pBlock->pNext;
MP_FREE(pBlock);
MICROPROFILE_COUNTER_SUB("/MicroProfile/Symbols/Allocs", 1);
MICROPROFILE_COUNTER_SUB("/MicroProfile/Symbols/Memory", sizeof(MicroProfileSymbolBlock));
}
}
memset(&S.SymbolModules[0], 0, sizeof(S.SymbolModules));
memset(&S.SymbolModuleNameBuffer[0], 0, sizeof(S.SymbolModuleNameBuffer));
S.SymbolModuleNameOffset = 0;
S.SymbolNumModules = 0;
}
}
#if STRING_MATCH_SIZE == 64
int MicroProfileCharacterMaskCharIndex(char c)
{
if(c >= 'A' && c <= 'Z')
c = 'a' + (c - 'A');
// abcdefghijklmnopqrstuvwxyz
if(c >= 'a' && c <= 'z')
{
int b = c - 'a';
return b;
}
if(c >= '0' && c <= '9')
{
int b = c - '0';
return b + 26;
}
switch(c)
{
case ':':
return 37;
case ';':
return 38;
case '\\':
return 39;
case '\'':
return 40;
case '\"':
return 41;
case '/':
return 42;
case '{':
return 43;
case '}':
return 44;
case '(':
return 45;
case ')':
return 46;
case '[':
return 47;
case ']':
return 48;
case '<':
return 49;
case '>':
return 50;
case '.':
return 51;
case ',':
return 52; // special characters
case ' ':
return -1; // special characters
}
return 63;
}
uint64_t MicroProfileCharacterMaskChar(char c)
{
uint64_t nMask = 1;
int nIndex = MicroProfileCharacterMaskCharIndex(c);
if(nIndex == -1)
return 0;
return nMask << nIndex;
}
#else
uint32_t MicroProfileCharacterMaskChar(char c)
{
if(c >= 'A' && c <= 'Z')
c = 'a' + (c - 'A');
// abcdefghijklmnopqrstuvwxyz
if(c >= 'a' && c <= 'z')
{
int b = c - 'a';
b = MicroProfileMin(20, b); // squish the last together
// static int once = 0;
// if(0 == once)
//{
// for(int i = 20; i < 28; ++i)
// {
// uprintf("char %d is %c\n", i, (char)('a' + i));
// }
// once = 1;
//}
uint32_t v = 1;
return v << b;
}
if(c >= '0' && c <= '9')
{
int b = c - '0';
b += 21;
if(b < 21 || b > 30)
MP_BREAK();
return 1 << b;
}
switch(c)
{
case ':':
case ';':
case '\\':
case '\'':
case '\"':
case '/':
case '{':
case '}':
case '(':
case ')':
case '[':
case ']':
return 1u << 31; // special characters
case ' ':
return 0;
}
return 0;
}
int MicroProfileCharacterMaskCharIndex(char c)
{
if(c >= 'A' && c <= 'Z')
c = 'a' + (c - 'A');
// abcdefghijklmnopqrstuvwxyz
if(c >= 'a' && c <= 'z')
{
int b = c - 'a';
b = MicroProfileMin(20, b); // squish the last together
static int once = 0;
if(0 == once)
{
for(int i = 20; i < 28; ++i)
{
uprintf("char %d is %c\n", i, (char)('a' + i));
}
once = 1;
}
return b;
}
if(c >= '0' && c <= '9')
{
int b = c - '0';
b += 21;
if(b < 21 || b > 30)
MP_BREAK();
return b;
}
switch(c)
{
case ':':
case ';':
case '\\':
case '\'':
case '\"':
case '/':
case '{':
case '}':
case '(':
case ')':
case '[':
case ']':
return 31; // special characters
case ' ':
return -1;
}
return 1;
}
#endif
uint_string_match MicroProfileCharacterMaskString(const char* pStr)
{
uint_string_match nMask = 0;
char c = 0;
while(0 != (c = *pStr++))
{
nMask |= MicroProfileCharacterMaskChar(c);
}
return nMask;
}
void MicroProfileCharacterMaskString2(const char* pStr, MicroProfileStringMatchMask& M)
{
uint_string_match nMask = 0;
char c = 0;
int nLast = -1;
while(0 != (c = *pStr++))
{
nMask |= MicroProfileCharacterMaskChar(c);
int nIndex = MicroProfileCharacterMaskCharIndex(c);
if(nIndex >= 0 && nLast >= 0)
{
MP_ASSERT(nIndex < STRING_MATCH_SIZE);
M.M[nLast] |= 1llu << nIndex;
}
nLast = nIndex;
}
M.nMask |= nMask;
}
bool MicroProfileCharacterMatch(const MicroProfileStringMatchMask& Block, const MicroProfileStringMatchMask& String)
{
if(String.nMask != (Block.nMask & String.nMask))
return false;
for(uint32_t i = 0; i < STRING_MATCH_SIZE; ++i)
{
if(String.M[i] != (Block.M[i] & String.M[i]))
return false;
}
return true;
}
uint32_t MicroProfileSymbolGetModule(const char* pString, intptr_t nBaseAddr)
{
for(int i = 0; i < S.SymbolNumModules; ++i)
{
auto& M = S.SymbolModules[i];
for(int j = 0; j < M.nNumExecutableRegions; ++j)
{
if(M.Regions[j].nBegin <= nBaseAddr && nBaseAddr < M.Regions[j].nEnd)
return i;
}
}
MP_BREAK(); // should never happen.
return 0;
}
void MicroProfileSymbolMergeExecutableRegions()
{
for(int i = 0; i < S.SymbolNumModules; ++i)
{
auto& M = S.SymbolModules[i];
if(M.nNumExecutableRegions > 1)
{
std::sort(&M.Regions[0], &M.Regions[M.nNumExecutableRegions], [](const MicroProfileSymbolModuleRegion& l, const MicroProfileSymbolModuleRegion& r) { return l.nBegin < r.nBegin; });
int p = 0;
int g = 1;
while(g < M.nNumExecutableRegions)
{
if(M.Regions[p].nEnd == M.Regions[g].nBegin)
{
M.Regions[p].nEnd = M.Regions[g].nEnd;
g++;
}
else
{
++p;
if(p != g)
M.Regions[p] = M.Regions[g];
g++;
}
}
M.nNumExecutableRegions = p + 1;
}
}
for(int i = 0; i < S.SymbolNumModules; ++i)
{
auto& M = S.SymbolModules[i];
uprintf("region %s %s\n", M.pTrimmedString, M.pBaseString);
for(int j = 0; j < M.nNumExecutableRegions; ++j)
uprintf("\t[%p-%p]\n", (void*)M.Regions[j].nBegin, (void*)M.Regions[j].nEnd);
}
}
uint32_t MicroProfileSymbolInitModule(const char* pString_, intptr_t nAddrBegin, intptr_t nAddrEnd)
{
const char* pString = MicroProfileStringInternSlash(pString_);
for(int i = 0; i < S.SymbolNumModules; ++i)
{
auto& M = S.SymbolModules[i];
for(int j = 0; j < M.nNumExecutableRegions; ++j)
{
if(M.Regions[j].nBegin <= nAddrBegin && nAddrEnd < M.Regions[j].nEnd)
{
MP_ASSERT(pString == M.pBaseString);
return i;
}
}
}
for(int i = 0; i < S.SymbolNumModules; ++i)
{
auto& M = S.SymbolModules[i];
if(M.pBaseString == pString)
{
MP_ASSERT((intptr_t)pString != -2);
for(int j = 0; j < M.nNumExecutableRegions; ++j)
if(nAddrBegin == M.Regions[j].nBegin)
return i;
if(M.nNumExecutableRegions == MICROPROFILE_MAX_MODULE_EXEC_REGIONS)
{
return (uint32_t)-1;
}
M.Regions[M.nNumExecutableRegions].nBegin = nAddrBegin;
M.Regions[M.nNumExecutableRegions].nEnd = nAddrEnd;
// uprintf("added module region %d %p %p %s \n", M.nNumExecutableRegions, (void*)nAddrBegin, (void*)nAddrEnd, pString);
M.nNumExecutableRegions++;
return i;
}
}
MP_ASSERT((intptr_t)pString != -2);
// trim untill last path char
const char* pTrimmedString = pString;
const char* pWork = pTrimmedString;
bool bLastSeperator = false;
while(*pWork != '\0')
{
if(bLastSeperator)
pTrimmedString = pWork;
bLastSeperator = *pWork == '\\' || *pWork == '/';
pWork++;
}
int nLen = (int)strlen(pTrimmedString) + 1;
// uprintf("STRING '%s' :: trimmedstring %s . len %d\n", pString, pTrimmedString, nLen);
const char* pTrimmedIntern = MicroProfileStringIntern(pTrimmedString);
if(S.SymbolModuleNameOffset + nLen > MICROPROFILE_INSTRUMENT_MAX_MODULE_CHARS)
return 0;
memcpy(S.SymbolModuleNameOffset + &S.SymbolModuleNameBuffer[0], pTrimmedString, nLen);
MP_ASSERT(S.SymbolNumModules < MICROPROFILE_INSTRUMENT_MAX_MODULES);
S.SymbolModules[S.SymbolNumModules].nModuleBase = nAddrBegin;
S.SymbolModules[S.SymbolNumModules].nMatchOffset = 0;
S.SymbolModules[S.SymbolNumModules].nStringOffset = S.SymbolModuleNameOffset;
S.SymbolModules[S.SymbolNumModules].pBaseString = (const char*)pString;
S.SymbolModules[S.SymbolNumModules].pTrimmedString = pTrimmedIntern;
S.SymbolModules[S.SymbolNumModules].Regions[0].nBegin = nAddrBegin;
S.SymbolModules[S.SymbolNumModules].Regions[0].nEnd = nAddrEnd;
S.SymbolModules[S.SymbolNumModules].nNumExecutableRegions = 1;
S.SymbolModules[S.SymbolNumModules].bDownloading = false;
S.SymbolModules[S.SymbolNumModules].nProgress = 0;
S.SymbolModules[S.SymbolNumModules].nProgressTarget = 0;
S.SymbolModuleNameOffset += nLen;
return S.SymbolNumModules++;
}
const char* MicroProfileSymbolModuleGetString(uint32_t nIndex)
{
MP_ASSERT(S.SymbolNumModules > (int)nIndex);
return S.SymbolModules[nIndex].nStringOffset + &S.SymbolModuleNameBuffer[0];
}
bool MicroProfileSymbolIgnoreSymbol(const char* pName)
{
if(strstr(pName, "MicroProfile"))
{
#if MICROPROFILE_INSTRUMENT_MICROPROFILE == 0
return true;
#else
if(strstr(pName, "Log") || strstr(pName, "Scope") || strstr(pName, "Tick") || strstr(pName, "Enter") || strstr(pName, "Leave") || strstr(pName, "Thread") || strstr(pName, "Thread") ||
strstr(pName, "Mutex")) // just for debugging: skip these so we can play around with the sample projects
{
return true;
}
#endif
}
#ifdef _WIN32
if(pName[0] == '_' && pName[1] == '_')
return true;
if(strstr(pName, "__security_check_cookie") || strstr(pName, "_RTC_CheckStackVars") || strstr(pName, "__chkstk") || strstr(pName, "std::_Atomic") || strstr(pName, "_Init_thread_header") ||
strstr(pName, "_Init_thread_footer"))
{
return true;
}
#endif
return false;
}
void MicroProfileSymbolInitializeInternal()
{
uprintf("Starting load...\n");
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSymbolInitialize", MP_CYAN);
auto AllocBlock = []() -> MicroProfileSymbolBlock*
{
MicroProfileSymbolBlock* pBlock = MP_ALLOC_OBJECT(MicroProfileSymbolBlock);
MICROPROFILE_COUNTER_ADD("/MicroProfile/Symbols/Allocs", 1);
MICROPROFILE_COUNTER_ADD("/MicroProfile/Symbols/Memory", sizeof(MicroProfileSymbolBlock));
MICROPROFILE_COUNTER_CONFIG_ONCE("/MicroProfile/Symbols/Memory", MICROPROFILE_COUNTER_FORMAT_BYTES, 0, 0);
memset(pBlock, 0, sizeof(MicroProfileSymbolBlock));
return pBlock;
};
auto SymbolCallback = [&](const char* pName, const char* pShortName, intptr_t nAddress, intptr_t nAddressEnd, uint32_t nModuleId)
{
MICROPROFILE_SCOPEI("microprofile", "SymbolCallback", MP_AUTO);
uint32_t nModule = nModuleId;
if(MicroProfileHashTableGetPtr(&S.SymbolModules[nModule].AddressToSymbol, (void*)nAddress))
{
return;
}
char Demangled[1024];
if(MicroProfileDemangleName(pName, Demangled, sizeof(Demangled)))
{
pName = &Demangled[0];
pShortName = &Demangled[0];
}
intptr_t delta = nAddressEnd - nAddress;
S.SymbolModules[nModule].nProgress = MicroProfileMax(delta, S.SymbolModules[nModule].nProgress);
S.nSymbolsDirty++;
int nIgnoreSymbol = MicroProfileSymbolIgnoreSymbol(pName) ? 1 : 0;
MicroProfileSymbolBlock* pActiveBlock = S.SymbolModules[nModule].pSymbolBlock;
if(!pActiveBlock)
{
pActiveBlock = AllocBlock();
pActiveBlock->pNext = S.SymbolModules[nModule].pSymbolBlock;
S.SymbolModules[nModule].pSymbolBlock = pActiveBlock;
}
if(pName == pShortName)
{
pShortName = 0;
}
uint32_t nLen = (uint32_t)strlen(pName) + 1;
if(nLen > MICROPROFILE_INSTRUMENT_SYMBOLNAME_MAXLEN)
nLen = MICROPROFILE_INSTRUMENT_SYMBOLNAME_MAXLEN;
uint32_t nLenShort = (uint32_t)(pShortName ? 1 + strlen(pShortName) : 0);
if(nLenShort && nLenShort > MICROPROFILE_INSTRUMENT_SYMBOLNAME_MAXLEN)
nLenShort = MICROPROFILE_INSTRUMENT_SYMBOLNAME_MAXLEN;
uint32_t S0 = sizeof(MicroProfileSymbolDesc) * pActiveBlock->nNumSymbols;
uint32_t S1 = pActiveBlock->nNumChars;
uint32_t S3 = nLenShort + nLen + sizeof(MicroProfileSymbolDesc) + 64;
if(S0 + S1 + S3 >= MicroProfileSymbolBlock::ESIZE)
{
MicroProfileSymbolBlock* pNewBlock = AllocBlock();
MP_ASSERT(pActiveBlock == S.SymbolModules[nModule].pSymbolBlock);
pNewBlock->pNext = pActiveBlock;
S.SymbolModules[nModule].pSymbolBlock = pNewBlock;
pActiveBlock = pNewBlock;
}
S0 = sizeof(MicroProfileSymbolDesc) * pActiveBlock->nNumSymbols;
S1 = pActiveBlock->nNumChars;
S3 = nLenShort + nLen + sizeof(MicroProfileSymbolDesc);
MP_ASSERT(S0 + S1 + S3 < MicroProfileSymbolBlock::ESIZE);
pActiveBlock->nNumChars += nLen;
char* pStr = &pActiveBlock->Chars[MicroProfileSymbolBlock::ESIZE - pActiveBlock->nNumChars - 1];
memcpy(pStr, pName, nLen);
pStr[nLen - 1] = '\0';
MicroProfileSymbolDesc& E = pActiveBlock->Symbols[pActiveBlock->nNumSymbols++];
MicroProfileHashTableSetPtr(&S.SymbolModules[nModule].AddressToSymbol, (void*)nAddress, &E);
E.pName = pStr;
E.nAddress = nAddress;
E.nAddressEnd = nAddressEnd;
E.nIgnoreSymbol = nIgnoreSymbol;
E.nModule = nModule;
if(pShortName && strlen(pShortName))
{
pActiveBlock->nNumChars += nLenShort;
char* pStrShort = &pActiveBlock->Chars[MicroProfileSymbolBlock::ESIZE - pActiveBlock->nNumChars - 1];
memcpy(pStrShort, pShortName, nLenShort);
pStrShort[nLenShort - 1] = '\0';
E.pShortName = pStrShort;
}
else
{
E.pShortName = E.pName;
}
#define SYMDBG 0
#if SYMDBG
uprintf("Got symbol %lld %lld %f .. %llx %llx %llx %s\n",
S.SymbolModules[nModule].nProgress,
S.SymbolModules[nModule].nProgressTarget,
S.SymbolModules[nModule].nProgressTarget ? float(S.SymbolModules[nModule].nProgress) / float(S.SymbolModules[nModule].nProgressTarget) : 0.f,
(int64_t)E.nAddress,
(int64_t)S.SymbolModules[nModule].nAddrBegin,
(int64_t)S.SymbolModules[nModule].nAddrEnd,
E.pName);
if(E.nAddress < (int64_t)S.SymbolModules[nModule].nAddrBegin || E.nAddress > (int64_t)S.SymbolModules[nModule].nAddrEnd)
{
MP_BREAK();
}
#endif
E.nMask = MicroProfileCharacterMaskString(E.pShortName);
MicroProfileCharacterMaskString2(E.pShortName, pActiveBlock->MatchMask);
pActiveBlock->nMask |= E.nMask;
MICROPROFILE_COUNTER_ADD("/MicroProfile/Symbols/Count", 1);
if(nIgnoreSymbol)
{
MICROPROFILE_COUNTER_ADD("/MicroProfile/Symbols/Ignored", 1);
}
#if SYMDBG
MicroProfileSleep(10);
#endif
#undef SYMDBG
S.SymbolModules[nModule].nSymbolsLoaded.fetch_add(1);
S.nSymbolsDirty.exchange(1);
S.SymbolState.nSymbolsLoaded.fetch_add(1);
MP_ASSERT((intptr_t)E.pShortName >= (intptr_t)&E); // assert pointer arithmetic is correct.
};
do
{
uint32_t nModuleLoad[MICROPROFILE_INSTRUMENT_MAX_MODULES];
uint32_t nNumModulesRequested = 0;
for(int i = 0; i < S.SymbolNumModules; ++i)
{
if(S.SymbolModules[i].nModuleLoadRequested.load() != 0 && S.SymbolModules[i].nModuleLoadFinished.load() == 0)
{
nModuleLoad[nNumModulesRequested] = i;
S.SymbolModules[i].nProgress = 0;
MicroProfileHashTableInit(&S.SymbolModules[i].AddressToSymbol, 256, 64, MicroProfileHashTableComparePtr, MicroProfileHashTableHashPtr);
nNumModulesRequested++;
}
}
if(0 == nNumModulesRequested)
{
break;
}
MicroProfileIterateSymbols(SymbolCallback, nModuleLoad, nNumModulesRequested);
S.SymbolState.nModuleLoadsFinished.fetch_add(nNumModulesRequested);
for(uint32_t i = 0; i < nNumModulesRequested; ++i)
{
if(S.SymbolModules[nModuleLoad[i]].nModuleLoadRequested.load() == S.SymbolModules[nModuleLoad[i]].nModuleLoadFinished.load())
{
S.SymbolModules[nModuleLoad[i]].nProgress = S.SymbolModules[nModuleLoad[i]].nProgressTarget;
S.nSymbolsDirty.exchange(1);
}
}
} while(1);
}
MicroProfileSymbolDesc* MicroProfileSymbolFindFuction(void* pAddress)
{
for(int i = 0; i < S.SymbolNumModules; ++i)
{
MicroProfileSymbolDesc* pDesc = nullptr;
if(MicroProfileHashTableGetPtr(&S.SymbolModules[i].AddressToSymbol, pAddress, &pDesc))
{
if(0 == pDesc->nIgnoreSymbol)
return pDesc;
else
return nullptr;
}
}
return nullptr;
}
#define MICROPROFILE_MAX_FILTER 32
#define MICROPROFILE_MAX_QUERY_RESULTS 32
#define MICROPROFILE_MAX_FILTER_STRING 1024
struct MicroProfileFunctionQuery
{
MicroProfileFunctionQuery* pNext;
uint32_t nState;
const char* pFilterStrings[MICROPROFILE_MAX_FILTER];
uint32_t nPatternLength[MICROPROFILE_MAX_FILTER];
int nMaxFilter;
uint32_t nModuleFilterMatch[MICROPROFILE_INSTRUMENT_MAX_MODULES]; // prematch the modules, so it can be skipped during search
uint32_t nMask[MICROPROFILE_MAX_FILTER]; // masks for subpatterns skipped
MicroProfileStringMatchMask MatchMask[MICROPROFILE_MAX_FILTER]; // masks for subpatterns skipped
// results
MicroProfileSymbolDesc* Results[MICROPROFILE_MAX_QUERY_RESULTS];
uint32_t nNumResults;
char FilterString[MICROPROFILE_MAX_FILTER_STRING];
uint32_t QueryId;
};
MicroProfileFunctionQuery* MicroProfileAllocFunctionQuery()
{
MicroProfileScopeLock L(MicroProfileMutex());
MicroProfileFunctionQuery* pQ = nullptr;
S.nNumQueryAllocated++;
if(S.pQueryFreeList != 0)
{
pQ = S.pQueryFreeList;
S.pQueryFreeList = pQ->pNext;
S.nNumQueryFree--;
}
else
{
pQ = MP_ALLOC_OBJECT(MicroProfileFunctionQuery);
MICROPROFILE_COUNTER_ADD("MicroProfile/Symbols/FunctionQuery", 1);
MICROPROFILE_COUNTER_ADD("MicroProfile/Symbols/FunctionQueryMem", sizeof(MicroProfileFunctionQuery));
S.nNumQueryAllocated++;
}
memset(pQ, 0, sizeof(MicroProfileFunctionQuery));
return pQ;
}
void MicroProfileFreeFunctionQuery(MicroProfileFunctionQuery* pQ)
{
pQ->pNext = S.pQueryFreeList;
S.pQueryFreeList = pQ;
}
void MicroProfileProcessQuery(MicroProfileFunctionQuery* pQuery)
{
MicroProfileFunctionQuery& Q = *pQuery;
int nBlocksTested = 0, nSymbolsTested = 0, nStringsTested = 0, nStringsTested0 = 0;
int nBlocks = 0;
// (void)nBlocksTested;
// (void)nSymbolsTested;
// (void)nStringsTested;
// (void)nStringsTested0;
// (void)nBlocks;
int64_t t = MP_TICK();
int64_t tt = 0;
for(int i = 0; i < S.SymbolNumModules; ++i)
{
int nModule = i;
uint32_t nModuleMatchOffset = Q.nModuleFilterMatch[nModule];
MicroProfileSymbolBlock* pSymbols = S.SymbolModules[nModule].pSymbolBlock;
uint32_t nMaskQ = Q.nMask[nModuleMatchOffset];
MicroProfileStringMatchMask& MatchMaskQ = Q.MatchMask[nModuleMatchOffset];
{
while(pSymbols && 0 == S.pPendingQuery && Q.nNumResults < MICROPROFILE_MAX_QUERY_RESULTS)
{
MICROPROFILE_SCOPEI("MicroProfile", "SymbolQueryLoop", MP_YELLOW);
nBlocks++;
if(MicroProfileCharacterMatch(pSymbols->MatchMask, MatchMaskQ))
{
nBlocksTested++;
for(uint32_t i = 0; i < pSymbols->nNumSymbols && 0 == S.pPendingQuery && Q.nNumResults < MICROPROFILE_MAX_QUERY_RESULTS; ++i)
{
MicroProfileSymbolDesc& E = pSymbols->Symbols[i];
if(0 == E.nIgnoreSymbol)
{
nSymbolsTested++;
if(nMaskQ == (nMaskQ & E.nMask))
{
nStringsTested++;
MP_ASSERT((int)E.nModule < S.SymbolNumModules);
if(MicroProfileStringMatch(E.pShortName, nModuleMatchOffset, &Q.pFilterStrings[0], Q.nPatternLength, Q.nMaxFilter))
{
if(Q.nNumResults < MICROPROFILE_MAX_QUERY_RESULTS)
{
Q.Results[Q.nNumResults++] = &E;
if(Q.nNumResults == MICROPROFILE_MAX_QUERY_RESULTS)
tt = MP_TICK();
}
}
if(Q.nNumResults < MICROPROFILE_MAX_QUERY_RESULTS)
nStringsTested0++;
}
}
}
}
pSymbols = pSymbols->pNext;
}
}
}
int64_t tend = MP_TICK();
float ToMS = MicroProfileTickToMsMultiplierCpu();
float TIME = (tend - t) * ToMS;
float TIME0 = (tt - t) * ToMS;
uprintf(" %6.3fms [%6.3f]: %5d/%5d blocks tested. %5d symbols %5d/%5d string compares\n", TIME, TIME0, nBlocksTested, nBlocks, nSymbolsTested, nStringsTested, nStringsTested0);
}
void* MicroProfileQueryThread(void* p)
{
MicroProfileOnThreadCreate("MicroProfileSymbolThread");
{
while(1)
{
MicroProfileSleep(100); // todo:: use an event instead
MicroProfileScopeLock L(MicroProfileMutex());
if(S.pPendingQuery != nullptr)
{
MICROPROFILE_SCOPEI("MicroProfile", "SymbolQuery", MP_WHEAT);
MicroProfileFunctionQuery* pQuery = S.pPendingQuery;
MP_ASSERT(pQuery->QueryId > S.nQueryProcessed);
S.pPendingQuery = 0;
L.Unlock();
// uprintf("processing query %d\n", pQuery->QueryId);
MicroProfileProcessQuery(pQuery);
L.Lock();
S.nQueryProcessed = MicroProfileMax(pQuery->QueryId, S.nQueryProcessed);
pQuery->pNext = S.pFinishedQuery;
S.pFinishedQuery = pQuery;
}
if(S.SymbolState.nModuleLoadsRequested.load() != S.SymbolState.nModuleLoadsFinished.load())
{
L.Unlock();
MicroProfileSymbolInitializeInternal();
L.Lock();
}
}
S.SymbolThreadFinished = 1;
}
MicroProfileOnThreadExit();
return 0;
}
void MicroProfileQueryJoinThread()
{
if(S.SymbolThreadFinished)
{
MicroProfileThreadJoin(&S.SymbolThread);
S.SymbolThreadFinished = 0;
S.SymbolThreadRunning = 0;
}
}
void MicroProfileSymbolKickThread()
{
// MicroProfileQueryJoinThread();
if(S.SymbolThreadRunning == 0)
{
S.SymbolThreadRunning = 1;
MicroProfileThreadStart(&S.SymbolThread, MicroProfileQueryThread);
}
}
#if MICROPROFILE_WEBSERVER
void MicroProfileSymbolSendFunctionNames(MpSocket Connection)
{
if(S.WSFunctionsInstrumentedSent < S.DynamicTokenIndex)
{
MicroProfileWSPrintStart(Connection);
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":[", MSG_FUNCTION_NAMES);
bool bFirst = true;
for(uint32_t i = S.WSFunctionsInstrumentedSent; i < S.DynamicTokenIndex; ++i)
{
const char* pString = S.FunctionsInstrumentedName[i];
const char* pModuleString = S.FunctionsInstrumentedModuleNames[i];
MicroProfileWSPrintf(bFirst ? "[\"%s\",\"%s\",\"%s\"]" : ",[\"%s\",\"%s\",\"%s\"]", pString, pModuleString, "unused");
bFirst = false;
}
MicroProfileWSPrintf("]}");
MicroProfileWSFlush();
MicroProfileWSPrintEnd();
S.WSFunctionsInstrumentedSent = S.DynamicTokenIndex;
}
}
void MicroProfileSymbolSendErrors(MpSocket Connection)
{
if(S.nNumPatchErrors)
{
MicroProfileWSPrintStart(Connection);
MicroProfileWSPrintf("{\"k\":\"%d\",\"v\":{\"version\":\"%d.%d\",\"data\":[", MSG_INSTRUMENT_ERROR, MICROPROFILE_MAJOR_VERSION, MICROPROFILE_MINOR_VERSION);
bool bFirst = true;
for(int i = 0; i < S.nNumPatchErrors; ++i)
{
MicroProfilePatchError& E = S.PatchErrors[i];
(void)E;
if(!bFirst)
MicroProfileWSPrintf(",");
MicroProfileWSPrintf("{\"code\":\"");
for(int i = 0; i < E.nCodeSize; ++i)
MicroProfileWSPrintf("%02x", E.Code[i] & 0xff);
MicroProfileWSPrintf("\",\"message\":\"%s\",\"already\":%d}", &E.Message[0], E.AlreadyInstrumented);
bFirst = false;
}
MicroProfileWSPrintf("],\"functions\":[");
bFirst = true;
for(int i = 0; i < S.nNumPatchErrorFunctions; ++i)
{
if(!bFirst)
MicroProfileWSPrintf(",");
MicroProfileWSPrintf("\"%s\"", S.PatchErrorFunctionNames[i]);
bFirst = false;
}
MicroProfileWSPrintf("]}}");
MicroProfileWSFlush();
MicroProfileWSPrintEnd();
S.nNumPatchErrors = 0;
S.nNumPatchErrorFunctions = 0;
}
}
void MicroProfileSymbolQuerySendResult(MpSocket Connection)
{
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSymbolQuerySendResult", MP_PINK2);
MicroProfileFunctionQuery* pQuery = 0;
{
MicroProfileScopeLock L(MicroProfileMutex());
uint32_t nBest = 0;
while(S.pFinishedQuery != nullptr)
{
if(!pQuery)
{
pQuery = S.pFinishedQuery;
nBest = pQuery->QueryId;
S.pFinishedQuery = pQuery->pNext;
}
else
{
MicroProfileFunctionQuery* pQ = S.pFinishedQuery;
S.pFinishedQuery = pQ->pNext;
if(pQ->QueryId > nBest)
{
MicroProfileFreeFunctionQuery(pQuery);
nBest = pQ->QueryId;
pQuery = pQ;
}
else
{
MicroProfileFreeFunctionQuery(pQ);
}
}
}
}
if(pQuery)
{
uprintf("Sending result for query %d\n", pQuery->QueryId);
MicroProfileWSPrintStart(Connection);
MicroProfileWSPrintf("{\"k\":\"%d\",\"q\":%d,\"v\":[", MSG_FUNCTION_RESULTS, pQuery->QueryId);
bool bFirst = true;
for(uint32_t i = 0; i < pQuery->nNumResults; ++i)
{
MicroProfileSymbolDesc& E = *pQuery->Results[i];
if(bFirst)
{
MicroProfileWSPrintf("{\"a\":\"%p\",\"n\":\"%s\",\"sn\":\"%s\",\"m\":\"%s\"}", E.nAddress, E.pName, E.pShortName, MicroProfileSymbolModuleGetString(E.nModule));
bFirst = false;
}
else
{
MicroProfileWSPrintf(",{\"a\":\"%p\",\"n\":\"%s\",\"sn\":\"%s\",\"m\":\"%s\"}", E.nAddress, E.pName, E.pShortName, MicroProfileSymbolModuleGetString(E.nModule));
}
}
MicroProfileWSPrintf("]}");
MicroProfileWSFlush();
MicroProfileWSPrintEnd();
MicroProfileScopeLock L(MicroProfileMutex());
MicroProfileFreeFunctionQuery(pQuery);
}
}
#endif
void MicroProfileSymbolQueryFunctions(MpSocket Connection, const char* pFilter)
{
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSymbolQueryFunctions", MP_WHEAT);
if(!MicroProfileSymbolInitialize(false))
{
return;
}
{
int QueryId = atoi(pFilter);
pFilter = strchr(pFilter, 'x');
pFilter++;
MicroProfileScopeLock L(MicroProfileMutex());
if(0 == S.pPendingQuery || S.pPendingQuery->QueryId < (uint32_t)QueryId)
{
MicroProfileFunctionQuery* pQuery = S.pPendingQuery;
if(!pQuery)
{
S.pPendingQuery = pQuery = MicroProfileAllocFunctionQuery();
}
MP_ASSERT(pQuery->pNext == 0);
memset(pQuery, 0, sizeof(*pQuery));
MicroProfileFunctionQuery& Q = *pQuery;
Q.QueryId = QueryId;
uint32_t nLen = (uint32_t)strlen(pFilter) + 1;
if(nLen >= MICROPROFILE_MAX_FILTER_STRING)
nLen = MICROPROFILE_MAX_FILTER_STRING - 1;
memcpy(Q.FilterString, pFilter, nLen);
Q.FilterString[nLen] = '\0';
char* pBuffer = Q.FilterString;
bool bStartString = true;
for(uint32_t i = 0; i < nLen; ++i)
{
char c = pBuffer[i];
if(c == '\0')
{
break;
}
if(isspace(c) || c == '*')
{
pBuffer[i] = '\0';
bStartString = true;
}
else
{
if(bStartString)
{
if(Q.nMaxFilter < MICROPROFILE_MAX_FILTER)
{
const char* pstr = &pBuffer[i];
Q.nMask[Q.nMaxFilter] = MicroProfileCharacterMaskString(pstr);
MicroProfileCharacterMaskString2(pstr, Q.MatchMask[Q.nMaxFilter]);
Q.pFilterStrings[Q.nMaxFilter++] = &pBuffer[i];
}
}
bStartString = false;
}
}
memset(Q.nModuleFilterMatch, 0xff, sizeof(Q.nModuleFilterMatch));
for(int i = 0; i < S.SymbolNumModules; ++i)
{
Q.nModuleFilterMatch[i] = MicroProfileStringMatchOffset(MicroProfileSymbolModuleGetString(i), Q.pFilterStrings, Q.nPatternLength, Q.nMaxFilter);
}
#if 0
uprintf("query %d::",QueryId);
for(int i = 0; i < Q.nMaxFilter; ++i)
{
Q.nPatternLength[i] = (uint32_t)strlen(Q.pFilterStrings[i]);
uprintf("'%s' ", Q.pFilterStrings[i]);
}
uprintf("\n");
#endif
}
}
MicroProfileSymbolKickThread();
}
#if defined(_WIN32)
// '##::::'##::'#######:::'#######::'##:::'##::::'##:::::'##:'####:'##::: ##::'#######:::'#######::
// ##:::: ##:'##.... ##:'##.... ##: ##::'##::::: ##:'##: ##:. ##:: ###:: ##:'##.... ##:'##.... ##:
// ##:::: ##: ##:::: ##: ##:::: ##: ##:'##:::::: ##: ##: ##:: ##:: ####: ##:..::::: ##:..::::: ##:
// #########: ##:::: ##: ##:::: ##: #####::::::: ##: ##: ##:: ##:: ## ## ##::'#######:::'#######::
// ##.... ##: ##:::: ##: ##:::: ##: ##. ##:::::: ##: ##: ##:: ##:: ##. ####::...... ##:'##::::::::
// ##:::: ##: ##:::: ##: ##:::: ##: ##:. ##::::: ##: ##: ##:: ##:: ##:. ###:'##:::: ##: ##::::::::
// ##:::: ##:. #######::. #######:: ##::. ##::::. ###. ###::'####: ##::. ##:. #######:: #########:
// ..:::::..:::.......::::.......:::..::::..::::::...::...:::....::..::::..:::.......:::.........::
#ifdef _WIN32
static void* MicroProfileAllocExecutableMemory(void* pBase, size_t s);
static void* MicroProfileAllocExecutableMemoryFar(size_t s);
static void MicroProfileMakeMemoryExecutable(void* p, size_t s);
static void MicroProfileMakeWriteable(void* p_, size_t size, DWORD* oldFlags);
static void MicroProfileRestore(void* p_, size_t size, DWORD* oldFlags);
extern "C" void microprofile_tramp_enter_patch();
extern "C" void microprofile_tramp_enter();
extern "C" void microprofile_tramp_code_begin();
extern "C" void microprofile_tramp_code_end();
extern "C" void microprofile_tramp_intercept0();
extern "C" void microprofile_tramp_end();
extern "C" void microprofile_tramp_exit();
extern "C" void microprofile_tramp_leave();
extern "C" void microprofile_tramp_trunk();
extern "C" void microprofile_tramp_call_patch_pop();
extern "C" void microprofile_tramp_call_patch_push();
bool MicroProfilePatchFunction(void* f, int Argument, MicroProfileHookFunc enter, MicroProfileHookFunc leave, MicroProfilePatchError* pError)
{
char* pOriginal = (char*)f;
f = MicroProfileX64FollowJump(f);
if(MicroProfilePatchHasSuspendedThread((intptr_t)f, (intptr_t)f + 32))
{
uprintf("failed to patch, thread running in patch position");
return false;
}
intptr_t t_enter = (intptr_t)microprofile_tramp_enter;
intptr_t t_enter_patch_offset = (intptr_t)microprofile_tramp_enter_patch - t_enter;
intptr_t t_code_begin_offset = (intptr_t)microprofile_tramp_code_begin - t_enter;
intptr_t t_code_end_offset = (intptr_t)microprofile_tramp_code_end - t_enter;
intptr_t t_code_intercept0_offset = (intptr_t)microprofile_tramp_intercept0 - t_enter;
intptr_t t_code_exit_offset = (intptr_t)microprofile_tramp_exit - t_enter;
intptr_t t_code_leave_offset = (intptr_t)microprofile_tramp_leave - t_enter;
intptr_t t_code_call_patch_push_offset = (intptr_t)microprofile_tramp_call_patch_push - t_enter;
intptr_t t_code_call_patch_pop_offset = (intptr_t)microprofile_tramp_call_patch_pop - t_enter;
intptr_t codemaxsize = t_code_end_offset - t_code_begin_offset;
intptr_t t_end_offset = (intptr_t)microprofile_tramp_end - t_enter;
intptr_t t_trunk_offset = (intptr_t)microprofile_tramp_trunk - t_enter;
int t_trunk_size = (int)((intptr_t)microprofile_tramp_end - (intptr_t)microprofile_tramp_trunk);
char* ptramp = (char*)MicroProfileAllocExecutableMemory(f, t_end_offset);
if(!ptramp)
ptramp = (char*)MicroProfileAllocExecutableMemoryFar(t_end_offset);
intptr_t offset = ((intptr_t)f + 6 - (intptr_t)ptramp);
uint32_t nBytesToCopy = 14;
if(offset < 0x80000000 && offset > -0x7fffffff)
{
/// offset is small enough to insert a relative jump
nBytesToCopy = 5;
}
memcpy(ptramp, (void*)t_enter, t_end_offset);
int nInstructionBytesDest = 0;
char* pInstructionMoveDest = ptramp + t_code_begin_offset;
char* pTrunk = ptramp + t_trunk_offset;
int nInstructionBytesSrc = 0;
uint32_t nRegsWritten = 0;
uint32_t nRetSafe = 1;
uint32_t nUsableJumpRegs = (1 << R_RAX) | (1 << R_R10) | (1 << R_R11);
static_assert(R_RAX == 0, "R_RAX must be 0");
if(!MicroProfileCopyInstructionBytes(
pInstructionMoveDest, f, nBytesToCopy, (int)codemaxsize, pTrunk, t_trunk_size, nUsableJumpRegs, &nInstructionBytesDest, &nInstructionBytesSrc, &nRegsWritten, &nRetSafe))
{
if(pError)
{
const char* pCode = (const char*)f;
memset(pError->Code, 0, sizeof(pError->Code));
memcpy(pError->Code, pCode, nInstructionBytesSrc);
int off = stbsp_snprintf(pError->Message, sizeof(pError->Message), "Failed to move %d code bytes ", nInstructionBytesSrc);
pError->nCodeSize = nInstructionBytesSrc;
for(int i = 0; i < nInstructionBytesSrc; ++i)
{
off += stbsp_snprintf(off + pError->Message, sizeof(pError->Message) - off, "%02x ", 0xff & pCode[i]);
}
uprintf("%s\n", pError->Message);
}
return false;
}
intptr_t phome = nInstructionBytesSrc + (intptr_t)f;
uint32_t reg = nUsableJumpRegs & ~nRegsWritten;
if(0 == reg)
{
if(0 == nRetSafe)
MP_BREAK(); // should be caught earlier
MicroProfileInsertRetJump(pInstructionMoveDest + nInstructionBytesDest, phome);
}
else
{
int r = R_RAX;
while((reg & 1) == 0)
{
reg >>= 1;
r++;
}
MicroProfileInsertRegisterJump(pInstructionMoveDest + nInstructionBytesDest, phome, r);
}
// PATCH 1 TRAMP EXIT
intptr_t microprofile_tramp_exit = (intptr_t)ptramp + t_code_exit_offset;
memcpy(ptramp + t_enter_patch_offset + 2, (void*)&microprofile_tramp_exit, 8);
char* pintercept = t_code_intercept0_offset + ptramp;
// PATCH 1.5 Argument
memcpy(pintercept - 4, (void*)&Argument, 4);
// PATCH 2 INTERCEPT0
intptr_t addr = (intptr_t)enter; //&intercept0;
memcpy(pintercept + 2, (void*)&addr, 8);
// PATHC 2.5 argument
memcpy(ptramp + t_code_exit_offset + 3, (void*)&Argument, 4);
intptr_t microprofile_tramp_leave = (intptr_t)ptramp + t_code_leave_offset;
// PATCH 3 INTERCEPT1
intptr_t addr1 = (intptr_t)leave; //&intercept1;
memcpy((char*)microprofile_tramp_leave + 2, (void*)&addr1, 8);
intptr_t patch_push_addr = (intptr_t)(&MicroProfile_Patch_TLS_PUSH);
intptr_t patch_pop_addr = (intptr_t)(&MicroProfile_Patch_TLS_POP);
memcpy((char*)ptramp + t_code_call_patch_push_offset + 2, &patch_push_addr, 8);
memcpy((char*)ptramp + t_code_call_patch_pop_offset + 2, &patch_pop_addr, 8);
MicroProfileMakeMemoryExecutable(ptramp, t_end_offset);
{
// PATCH 4 DEST FUNC
DWORD OldFlags[2] = { 0 };
MicroProfileMakeWriteable(f, nInstructionBytesSrc, OldFlags);
char* pp = (char*)f;
char* ppend = pp + nInstructionBytesSrc;
if(nInstructionBytesSrc < 14)
{
pp = MicroProfileInsertRelativeJump((char*)pp, (intptr_t)ptramp);
}
else
{
pp = MicroProfileInsertRegisterJump((char*)pp, (intptr_t)ptramp, R_RAX);
}
while(pp != ppend)
{
char c = (unsigned char)0x90;
MP_ASSERT((unsigned char)c == (unsigned char)0x90);
*pp++ = (unsigned char)0x90;
}
MicroProfileRestore(f, nInstructionBytesSrc, OldFlags);
}
return true;
}
static void MicroProfileMakeWriteable(void* p_, size_t s, DWORD* oldFlags)
{
static uint64_t nPageSize = 4 << 10;
intptr_t aligned = (intptr_t)p_;
aligned = (aligned & (~(nPageSize - 1)));
intptr_t aligned_end = (intptr_t)p_;
aligned_end += s;
aligned_end = (aligned_end + nPageSize - 1) & (~(nPageSize - 1));
uint32_t nNumPages = (uint32_t)((aligned_end - aligned) / nPageSize);
MP_ASSERT(nNumPages >= 1 && nNumPages <= 2);
for(uint32_t i = 0; i < nNumPages; ++i)
{
if(!VirtualProtect((void*)(aligned + nPageSize * i), nPageSize, PAGE_EXECUTE_READWRITE, oldFlags + i))
{
MP_BREAK();
}
}
//*(unsigned char*)p_ = 0x90;
}
static void MicroProfileRestore(void* p_, size_t s, DWORD* oldFlags)
{
static uint64_t nPageSize = 4 << 10;
intptr_t aligned = (intptr_t)p_;
aligned = (aligned & (~(nPageSize - 1)));
intptr_t aligned_end = (intptr_t)p_;
aligned_end += s;
aligned_end = (aligned_end + nPageSize - 1) & (~(nPageSize - 1));
uint32_t nNumPages = (uint32_t)((aligned_end - aligned) / nPageSize);
DWORD Dummy;
for(uint32_t i = 0; i < nNumPages; ++i)
{
if(!VirtualProtect((void*)(aligned + nPageSize * i), nPageSize, oldFlags[i], &Dummy))
{
MP_BREAK();
}
}
}
void* MicroProfileAllocExecutableMemoryUp(intptr_t nBase, size_t s, uint32_t RegionIndex)
{
SYSTEM_INFO si;
GetSystemInfo(&si);
size_t Granularity = si.dwAllocationGranularity << 1;
nBase = (nBase / Granularity) * Granularity;
intptr_t nEnd = nBase + 0x80000000;
for(uint32_t i = RegionIndex; i < S.MemoryRegions.Size; i++)
{
// try and allocate 2x before
nBase = S.MemoryRegions[i].Start + S.MemoryRegions[i].Size + Granularity;
nBase = (nBase / Granularity) * Granularity;
if(nBase >= nEnd)
break;
void* pMemory = VirtualAlloc((void*)nBase, s, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
if(pMemory)
{
return pMemory;
}
}
return nullptr;
}
static void MicroProfileUpdateMemoryRegions()
{
MicroProfileArrayClear(S.MemoryRegions);
SYSTEM_INFO si;
GetSystemInfo(&si);
BYTE* Addr = (BYTE*)si.lpMinimumApplicationAddress;
BYTE* MaxAddr = (BYTE*)si.lpMaximumApplicationAddress;
// uprintf("updating memory regions\n");
uint32_t idx = 0;
(void)idx;
while(Addr < MaxAddr)
{
MEMORY_BASIC_INFORMATION mbi;
SIZE_T Result = VirtualQuery(Addr, &mbi, sizeof(mbi));
if(Result == 0)
break;
MicroProfileInstrumentMemoryRegion region;
region.Start = (intptr_t)mbi.BaseAddress;
region.Size = (intptr_t)mbi.RegionSize;
MicroProfileArrayPushBack(S.MemoryRegions, region);
// uprintf("Memory Region %d: %p(%p) %p .. State=%08x Protect=%08x Type=%08x\n", idx++, mbi.BaseAddress, mbi.AllocationBase, (intptr_t)mbi.BaseAddress + mbi.RegionSize, mbi.State, mbi.Protect,
// mbi.Type);
Addr = (BYTE*)mbi.BaseAddress + mbi.RegionSize;
}
uprintf("Iterated %d regions\n", S.MemoryRegions.Size);
}
static void* MicroProfileAllocExecutableMemoryDown(intptr_t nBase, size_t s, uint32_t RegionIndex)
{
SYSTEM_INFO si;
GetSystemInfo(&si);
size_t Granularity = si.dwAllocationGranularity << 1;
intptr_t nEnd = nBase - 0x80000000;
for(int32_t i = RegionIndex; i >= 0; i--)
{
// try and allocate 2x before
nBase = S.MemoryRegions[i].Start - Granularity;
nBase = (nBase / Granularity) * Granularity;
if(nBase < nEnd)
break;
void* pMemory = VirtualAlloc((void*)nBase, s, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
if(pMemory)
{
return pMemory;
}
}
return nullptr;
}
static void* MicroProfileAllocExecutableMemory(void* pBase, size_t s)
{
uint32_t RegionIndex = 0;
for(uint32_t i = 0; i < S.MemoryRegions.Size; ++i)
{
auto& R = S.MemoryRegions[i];
if(R.Start <= (intptr_t)pBase && (intptr_t)pBase < R.Start + R.Size)
{
RegionIndex = i;
break;
}
}
s = (s + 4095) & ~(4095);
intptr_t nBase = (intptr_t)pBase;
void* pResult = 0;
if(0 == pResult && nBase > 0x40000000)
{
pResult = MicroProfileAllocExecutableMemoryDown(nBase - 0x40000000, s, RegionIndex);
if(0 == pResult)
{
pResult = MicroProfileAllocExecutableMemoryUp(nBase - 0x40000000, s, RegionIndex);
}
}
if(0 == pResult && nBase < 0xffffffff40000000)
{
pResult = MicroProfileAllocExecutableMemoryUp(nBase + 0x40000000, s, RegionIndex);
if(0 == pResult)
{
pResult = MicroProfileAllocExecutableMemoryUp(nBase + 0x40000000, s, RegionIndex);
}
}
return pResult;
}
static void* MicroProfileAllocExecutableMemoryFar(size_t s)
{
static uint64_t nPageSize = 4 << 10;
s = (s + (nPageSize - 1)) & (~(nPageSize - 1));
void* pMem = VirtualAlloc(0, s, MEM_COMMIT, PAGE_READWRITE);
MP_ASSERT(pMem);
// uprintf("Allocating %zu %p\n", s, pMem);
return pMem;
}
static void MicroProfileMakeMemoryExecutable(void* p, size_t s)
{
static uint64_t nPageSize = 4 << 10;
s = (s + (nPageSize - 1)) & (~(nPageSize - 1));
DWORD Unused;
if(!VirtualProtect(p, s, PAGE_EXECUTE_READ, &Unused))
{
MP_BREAK();
}
}
#endif
int MicroProfileTrimFunctionName(const char* pStr, char* pOutBegin, char* pOutEnd)
{
const char* pStart = pOutBegin;
int l = (int)strlen(pStr) - 1;
int sz = 0;
pOutEnd--;
if(l < 1024 && pOutBegin != pOutEnd)
{
const char* p = pStr;
const char* pEnd = pStr + l + 1;
int in = 0;
while(p != pEnd && pOutBegin != pOutEnd)
{
char c = *p++;
if(c == '(' || c == '<')
{
in++;
}
else if(c == ')' || c == '>')
{
in--;
continue;
}
if(in == 0)
{
*pOutBegin++ = c;
sz++;
}
}
*pOutBegin++ = '\0';
}
return sz;
}
int MicroProfileFindFunctionName(const char* pStr, const char** ppStart)
{
int l = (int)strlen(pStr) - 1;
if(l < 1024)
{
char b[1024] = { 0 };
char* put = &b[0];
const char* p = pStr;
const char* pEnd = pStr + l + 1;
int in = 0;
while(p != pEnd)
{
char c = *p++;
if(c == '(' || c == '<')
{
in++;
}
else if(c == ')' || c == '>')
{
in--;
continue;
}
if(in == 0)
{
*put++ = c;
}
}
*put++ = '\0';
uprintf("trimmed %s\n", b);
}
// int nFirstParen = l;
int nNumParen = 0;
int c = 0;
while(l >= 0 && pStr[l] != ')' && c++ < sizeof(" const") - 1)
{
l--;
}
if(pStr[l] == ')')
{
do
{
if(pStr[l] == ')')
{
nNumParen++;
}
else if(pStr[l] == '(')
{
nNumParen--;
}
l--;
} while(nNumParen > 0 && l >= 0);
}
else
{
*ppStart = pStr;
return 0;
}
while(l >= 0 && isspace(pStr[l]))
{
--l;
}
int nLast = l;
while(l >= 0 && !isspace(pStr[l]))
{
l--;
}
int nFirst = l;
if(nFirst == nLast)
return 0;
int nCount = nLast - nFirst + 1;
*ppStart = pStr + nFirst;
return nCount;
}
#include <dbghelp.h>
#include <psapi.h>
#include <tlhelp32.h>
#include <winternl.h>
struct MicroProfileQueryContext
{
const char* pFilterStrings[MICROPROFILE_MAX_FILTER];
uint32_t nPatternLength[MICROPROFILE_MAX_FILTER];
int nMaxFilter = 0;
char TempBuffer[128];
uint32_t size = 0;
bool bFirst = false;
};
BOOL CALLBACK MicroProfileEnumModules(_In_ PCTSTR ModuleName, _In_ DWORD64 BaseOfDll, _In_opt_ PVOID UserContext)
{
MODULEINFO MI;
GetModuleInformation(GetCurrentProcess(), (HMODULE)BaseOfDll, &MI, sizeof(MI));
MEMORY_BASIC_INFORMATION B;
int r = VirtualQuery((LPCVOID)BaseOfDll, (MEMORY_BASIC_INFORMATION*)&B, sizeof(B));
char buffer[1024];
int r1 = GetLastError();
if(r == 0)
{
stbsp_snprintf(buffer, sizeof(buffer) - 1, "Error %d\n", r1);
OutputDebugString(buffer);
MP_BREAK();
}
MicroProfileSymbolInitModule(ModuleName, BaseOfDll, BaseOfDll + MI.SizeOfImage);
return true;
}
namespace
{
struct QueryCallbackBase // fucking c++, this is a pain in the ass
{
virtual void CB(const char* pName, const char* pShortName, intptr_t addr, intptr_t addrend, uint32_t nModuleId) = 0;
};
template <typename T>
struct QueryCallbackImpl : public QueryCallbackBase
{
T t;
QueryCallbackImpl(T t)
: t(t)
{
}
virtual void CB(const char* pName, const char* pShortName, intptr_t addr, intptr_t addrend, uint32_t nModuleId)
{
t(pName, pShortName, addr, addrend, nModuleId);
}
};
} // namespace
static uint32_t nLastModuleIdWin32 = (uint32_t)-1;
static intptr_t nLastModuleBaseWin32 = (intptr_t)-1;
BOOL MicroProfileQueryContextEnumSymbols(_In_ PSYMBOL_INFO pSymInfo, _In_ ULONG SymbolSize, _In_opt_ PVOID UserContext)
{
uint32_t nModuleId = nLastModuleIdWin32;
if(nLastModuleBaseWin32 != (intptr_t)pSymInfo->ModBase)
{
nLastModuleIdWin32 = nModuleId = MicroProfileSymbolGetModule((const char*)(intptr_t)-2, pSymInfo->ModBase);
nLastModuleBaseWin32 = (intptr_t)pSymInfo->ModBase;
}
if(pSymInfo->Tag == 5 || pSymInfo->Tag == 10)
{
char FunctionName[1024];
int ret = 0;
int l = MicroProfileTrimFunctionName(pSymInfo->Name, &FunctionName[0], &FunctionName[1024]);
QueryCallbackBase* pCB = (QueryCallbackBase*)UserContext;
pCB->CB(pSymInfo->Name, l ? &FunctionName[0] : 0, (intptr_t)pSymInfo->Address, pSymInfo->Size + (intptr_t)pSymInfo->Address, nModuleId);
}
return TRUE;
};
bool MicroProfileDemangleName(const char* pName, char* OutName, uint32_t Size)
{
MICROPROFILE_SCOPEI("microprofile", "SymbolDemangle", MP_AUTO);
if(UnDecorateSymbolName(pName, OutName, Size, UNDNAME_NAME_ONLY))
{
return true;
}
return false;
}
bool MicroProfileExtractPdbInfo(HMODULE hMod, GUID& guid, DWORD& age, char pdbName[MAX_PATH])
{
struct CV_INFO_PDB70
{
DWORD CvSignature; // "RSDS"
GUID Signature; // GUID
DWORD Age; // Age
char PdbFileName[1]; // Null-terminated string
};
BYTE* base = (BYTE*)hMod;
IMAGE_DOS_HEADER* dos = (IMAGE_DOS_HEADER*)base;
if(dos->e_magic != IMAGE_DOS_SIGNATURE)
return false;
IMAGE_NT_HEADERS* nt = (IMAGE_NT_HEADERS*)(base + dos->e_lfanew);
if(nt->Signature != IMAGE_NT_SIGNATURE)
return false;
auto& dd = nt->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG];
if(!dd.VirtualAddress || !dd.Size)
return false;
IMAGE_DEBUG_DIRECTORY* debugDir = (IMAGE_DEBUG_DIRECTORY*)(base + dd.VirtualAddress);
int count = dd.Size / sizeof(IMAGE_DEBUG_DIRECTORY);
for(int i = 0; i < count; i++)
{
if(debugDir[i].Type == IMAGE_DEBUG_TYPE_CODEVIEW)
{
auto cv = (CV_INFO_PDB70*)(base + debugDir[i].AddressOfRawData);
if(cv->CvSignature != 'SDSR')
continue; // "RSDS"
guid = cv->Signature;
age = cv->Age;
strcpy_s(pdbName, MAX_PATH, cv->PdbFileName);
return true;
}
}
return false;
}
bool MicroProfileDownloadPDB(HMODULE Module, HANDLE Process, char outPath[MAX_PATH])
{
GUID guid;
DWORD age;
char pdbName[MAX_PATH];
if(!MicroProfileExtractPdbInfo(Module, guid, age, pdbName))
{
uprintf("Failed to download pdb\n");
MP_BREAK();
return false;
}
uprintf("pdb name %s age %d\n", pdbName, age);
FILE* f = fopen(pdbName, "r");
if(f)
{
fclose(f);
strcpy_s(outPath, MAX_PATH, pdbName);
return true;
}
char localPath[MAX_PATH] = {};
BOOL ok = SymFindFileInPath(Process,
NULL,
pdbName,
(PVOID)&guid, // GUID
age, // Age
0, // FileSize (not used for PDBs)
SSRVOPT_GUIDPTR, // we're passing GUID pointer
outPath,
NULL,
NULL);
return ok != 0;
}
#include "PDB.h"
#include "PDB_DBIStream.h"
#include "PDB_IPIStream.h"
#include "PDB_InfoStream.h"
#include "PDB_NamesStream.h"
#include "PDB_RawFile.h"
#include "PDB_TPIStream.h"
template <typename Callback>
void MicroProfileLoadRawPDB(Callback CB, const char* Filename, uint64_t Base, uint32_t nModuleId)
{
auto OnSymbol = [CB, Base, nModuleId](const char* Sym, uint32_t Offset, uint32_t Size)
{
char FunctionName[1024];
int ret = 0;
int l = MicroProfileTrimFunctionName(Sym, &FunctionName[0], &FunctionName[1024]);
const char* fname = l ? &FunctionName[0] : nullptr;
CB(Sym, fname, (intptr_t)Offset + Base, (intptr_t)Offset + Base + Size, nModuleId);
};
void* File = CreateFileA(Filename, GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, nullptr);
if(File == INVALID_HANDLE_VALUE)
{
MP_BREAK();
}
void* FileMapping = CreateFileMappingA(File, nullptr, PAGE_READONLY, 0, 0, nullptr);
if(FileMapping == nullptr)
{
CloseHandle(File);
MP_BREAK();
}
void* BaseAddress = MapViewOfFile(FileMapping, FILE_MAP_READ, 0, 0, 0);
if(BaseAddress == nullptr)
{
CloseHandle(FileMapping);
CloseHandle(File);
}
BY_HANDLE_FILE_INFORMATION FileInformation;
const bool GetInformationResult = GetFileInformationByHandle(File, &FileInformation);
if(!GetInformationResult)
{
UnmapViewOfFile(BaseAddress);
CloseHandle(FileMapping);
CloseHandle(File);
MP_BREAK();
}
const size_t FileSizeHighBytes = static_cast<size_t>(FileInformation.nFileSizeHigh) << 32;
const size_t FileSizeLowBytes = FileInformation.nFileSizeLow;
const size_t FileSize = FileSizeHighBytes | FileSizeLowBytes;
const PDB::RawFile RawPdbFile = PDB::CreateRawFile(BaseAddress);
if(PDB::HasValidDBIStream(RawPdbFile) != PDB::ErrorCode::Success)
{
MP_BREAK();
}
const PDB::InfoStream InfoStream(RawPdbFile);
if(InfoStream.UsesDebugFastLink())
{
MP_BREAK();
}
// const PDB::Header* h = InfoStream.GetHeader();
// uprintf("Version %u, signature %u, age %u, GUID %08x-%04x-%04x-%02x%02x%02x%02x%02x%02x%02x%02x\n",
// static_cast<uint32_t>(h->version), h->signature, h->age,
// h->guid.Data1, h->guid.Data2, h->guid.Data3,
// h->guid.Data4[0], h->guid.Data4[1], h->guid.Data4[2], h->guid.Data4[3], h->guid.Data4[4], h->guid.Data4[5], h->guid.Data4[6], h->guid.Data4[7]);
const PDB::DBIStream DbiStream = PDB::CreateDBIStream(RawPdbFile);
if(PDB::ErrorCode::Success != DbiStream.HasValidSymbolRecordStream(RawPdbFile))
{
MP_BREAK();
}
if(PDB::ErrorCode::Success != DbiStream.HasValidPublicSymbolStream(RawPdbFile))
{
MP_BREAK();
}
if(PDB::ErrorCode::Success != DbiStream.HasValidGlobalSymbolStream(RawPdbFile))
{
MP_BREAK();
}
if(PDB::ErrorCode::Success != DbiStream.HasValidSectionContributionStream(RawPdbFile))
{
MP_BREAK();
}
if(PDB::ErrorCode::Success != DbiStream.HasValidImageSectionStream(RawPdbFile))
{
MP_BREAK();
}
const PDB::ImageSectionStream ImageSectionStream = DbiStream.CreateImageSectionStream(RawPdbFile);
const PDB::ModuleInfoStream ModuleInfoStream = DbiStream.CreateModuleInfoStream(RawPdbFile);
const PDB::CoalescedMSFStream SymbolRecordStream = DbiStream.CreateSymbolRecordStream(RawPdbFile);
const PDB::ArrayView<PDB::ModuleInfoStream::Module> modules = ModuleInfoStream.GetModules();
for(const PDB::ModuleInfoStream::Module& module : modules)
{
if(!module.HasSymbolStream())
{
continue;
}
const PDB::ModuleSymbolStream moduleSymbolStream = module.CreateSymbolStream(RawPdbFile);
moduleSymbolStream.ForEachSymbol(
[&ImageSectionStream, &OnSymbol](const PDB::CodeView::DBI::Record* record)
{
// only grab function symbols from the module streams
const char* name = nullptr;
uint32_t rva = 0u;
uint32_t size = 0u;
if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_FRAMEPROC)
{
// functionSymbols[functionSymbols.size() - 1].frameProc = record;
return;
}
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_THUNK32)
{
if(record->data.S_THUNK32.thunk == PDB::CodeView::DBI::ThunkOrdinal::TrampolineIncremental)
{
// we have never seen incremental linking thunks stored inside a S_THUNK32 symbol, but better safe than sorry
name = "ILT";
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_THUNK32.section, record->data.S_THUNK32.offset);
size = 5u;
}
}
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_TRAMPOLINE)
{
// incremental linking thunks are stored in the linker module
name = "ILT";
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_TRAMPOLINE.thunkSection, record->data.S_TRAMPOLINE.thunkOffset);
size = 5u;
}
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_LPROC32)
{
name = record->data.S_LPROC32.name;
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_LPROC32.section, record->data.S_LPROC32.offset);
size = record->data.S_LPROC32.codeSize;
}
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_GPROC32)
{
name = record->data.S_GPROC32.name;
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_GPROC32.section, record->data.S_GPROC32.offset);
size = record->data.S_GPROC32.codeSize;
}
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_LPROC32_ID)
{
name = record->data.S_LPROC32_ID.name;
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_LPROC32_ID.section, record->data.S_LPROC32_ID.offset);
size = record->data.S_LPROC32_ID.codeSize;
}
else if(record->header.kind == PDB::CodeView::DBI::SymbolRecordKind::S_GPROC32_ID)
{
name = record->data.S_GPROC32_ID.name;
rva = ImageSectionStream.ConvertSectionOffsetToRVA(record->data.S_GPROC32_ID.section, record->data.S_GPROC32_ID.offset);
size = record->data.S_GPROC32_ID.codeSize;
}
if(rva == 0u)
{
return;
}
// uprintf("func %p / %d .. %s \n", rva, size, name);
OnSymbol(name, rva, size);
});
}
const PDB::PublicSymbolStream PublicSymbolStream = DbiStream.CreatePublicSymbolStream(RawPdbFile);
{
const PDB::ArrayView<PDB::HashRecord> HashRecords = PublicSymbolStream.GetRecords();
const size_t Count = HashRecords.GetLength();
for(const PDB::HashRecord& HashRecord : HashRecords)
{
const PDB::CodeView::DBI::Record* Record = PublicSymbolStream.GetRecord(SymbolRecordStream, HashRecord);
if(Record->header.kind != PDB::CodeView::DBI::SymbolRecordKind::S_PUB32)
{
continue;
}
if((PDB_AS_UNDERLYING(Record->data.S_PUB32.flags) & PDB_AS_UNDERLYING(PDB::CodeView::DBI::PublicSymbolFlags::Function)) == 0u)
{
continue;
}
const uint32_t rva = ImageSectionStream.ConvertSectionOffsetToRVA(Record->data.S_PUB32.section, Record->data.S_PUB32.offset);
if(rva == 0u)
{
continue;
}
OnSymbol(Record->data.S_PUB32.name, rva, 0);
}
}
UnmapViewOfFile(BaseAddress);
CloseHandle(FileMapping);
CloseHandle(File);
}
bool MicroProfilePatchHasSuspendedThread(intptr_t Begin, intptr_t End)
{
MicroProfileSuspendState& State = S.SuspendState;
for(uint32_t i = 0; i < State.NumSuspended; ++i)
{
intptr_t ip = State.SuspendedIP[i];
if(Begin <= ip && ip <= End)
return true;
}
return false;
}
bool MicroProfilePatchBeginSuspend()
{
MicroProfileSuspendState& State = S.SuspendState;
if(State.SuspendCounter++ > 0)
return true;
MicroProfileUpdateMemoryRegions();
MicroProfileMutex().lock();
MP_ASSERT(State.NumSuspended == 0);
DWORD ProcessId = GetCurrentProcessId();
DWORD ThreadId = GetCurrentThreadId();
HANDLE hSnap = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
if(hSnap == INVALID_HANDLE_VALUE)
{
return false;
}
THREADENTRY32 te{};
te.dwSize = sizeof(te);
State.NumSuspended = 0;
if(Thread32First(hSnap, &te))
{
do
{
if(te.th32OwnerProcessID != ProcessId)
continue;
if(te.th32ThreadID == ThreadId)
continue;
HANDLE hThread = OpenThread(THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT | THREAD_QUERY_INFORMATION, FALSE, te.th32ThreadID);
if(!hThread)
{
continue;
}
DWORD PrevCount = SuspendThread(hThread);
if(PrevCount == (DWORD)-1)
{
CloseHandle(hThread);
continue;
}
CONTEXT ctx{};
ctx.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER; // Rip + registers
if(GetThreadContext(hThread, &ctx))
{
State.SuspendedIP[State.NumSuspended] = (intptr_t)ctx.Rip;
}
if(State.NumSuspended < MICROPROFILE_SUSPEND_MAX)
{
State.Suspended[State.NumSuspended++] = hThread;
}
} while(Thread32Next(hSnap, &te));
}
else
{
uprintf("Thread32First failed %08x\n", GetLastError());
CloseHandle(hSnap);
return false;
}
CloseHandle(hSnap);
return State.NumSuspended > 0;
}
void MicroProfilePatchEndSuspend()
{
MicroProfileSuspendState& State = S.SuspendState;
if(0 == --State.SuspendCounter)
{
for(uint32_t i = 0; i < State.NumSuspended; ++i)
{
ResumeThread(State.Suspended[i]);
CloseHandle(State.Suspended[i]);
}
State.NumSuspended = 0;
MicroProfileMutex().unlock();
}
}
template <typename Callback>
void MicroProfileIterateSymbols(Callback CB, uint32_t* nModules, uint32_t nNumModules)
{
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileIterateSymbols", MP_PINK3);
QueryCallbackImpl<Callback> Context(CB);
if(MicroProfileSymInit())
{
// uprintf("symbols loaded!\n");
// API_VERSION* pv = ImagehlpApiVersion();
// uprintf("VERSION %d.%d.%d\n", pv->MajorVersion, pv->MinorVersion, pv->Revision);
nLastModuleBaseWin32 = -1;
if(SymEnumerateModules64(GetCurrentProcess(), (PSYM_ENUMMODULES_CALLBACK64)MicroProfileEnumModules, NULL))
{
}
QueryCallbackBase* pBase = &Context;
if(nNumModules)
{
HANDLE hProcess = GetCurrentProcess();
char buffer[sizeof(SYMBOL_INFO) + MAX_SYM_NAME * sizeof(TCHAR)];
PSYMBOL_INFO pSymbol = (PSYMBOL_INFO)buffer;
uint64_t t0 = MP_TICK();
for(uint32_t i = 0; i < nNumModules; ++i)
{
uint32_t nModule = nModules[i];
int64_t nBytes = 0;
MEMORY_BASIC_INFORMATION B;
for(int j = 0; j < S.SymbolModules[nModule].nNumExecutableRegions; ++j)
{
intptr_t b = S.SymbolModules[nModule].Regions[j].nBegin;
intptr_t e = S.SymbolModules[nModule].Regions[j].nEnd;
while(b < e)
{
int r = VirtualQuery((LPCVOID)b, &B, sizeof(B));
if(!r)
break;
switch(B.Protect)
{
case PAGE_EXECUTE:
case PAGE_EXECUTE_READ:
case PAGE_EXECUTE_READWRITE:
case PAGE_EXECUTE_WRITECOPY:
nBytes += B.RegionSize;
// uprintf("RANGE %p, %p .. %5.2fkb %08x, %08x\n", B.BaseAddress, (void*)(intptr_t(B.BaseAddress) + B.RegionSize), B.RegionSize / 1024.f, B.State, B.Protect);
}
b = intptr_t(B.BaseAddress) + B.RegionSize;
}
}
S.SymbolModules[nModule].nProgressTarget = nBytes;
char pdbPath[MAX_PATH];
HMODULE Module = (HMODULE)S.SymbolModules[nModule].nModuleBase;
S.nSymbolsDirty++;
S.SymbolModules[nModule].bDownloading = true;
if(MicroProfileDownloadPDB(Module, hProcess, pdbPath))
{
S.SymbolModules[nModule].bDownloading = false;
S.nSymbolsDirty++;
MicroProfileLoadRawPDB<Callback>(CB, pdbPath, S.SymbolModules[nModule].nModuleBase, nModule);
}
S.SymbolModules[nModule].bDownloading = false;
S.nSymbolsDirty++;
S.SymbolModules[nModule].nProgress = S.SymbolModules[nModule].nProgressTarget;
S.SymbolModules[nModule].nModuleLoadFinished.exchange(1);
}
uint64_t t1 = MP_TICK();
float fTime = float(MicroProfileTickToMsMultiplierCpu()) * (t1 - t0);
uprintf("load symbol time %6.2fms\n", fTime);
}
MicroProfileSymCleanup();
}
}
static int MicroProfileWin32SymInitCount = 0;
static int MicroProfileWin32SymInitSuccess = 0;
bool MicroProfileSymInit()
{
if(0 == MicroProfileWin32SymInitCount++)
{
auto h = GetCurrentProcess();
SymCleanup(h);
SymSetOptions(SYMOPT_DEFERRED_LOADS);
if(SymInitialize(h, 0, FALSE))
{
MicroProfileWin32SymInitSuccess = 1;
char Path[MAX_PATH];
bool PathValid = SymGetSearchPath(h, Path, MAX_PATH) > 0;
if(PathValid)
{
PathValid = strlen(Path) > 3;
}
if(!PathValid)
{
SymSetSearchPath(h, "srv*C:\\symbols*https://msdl.microsoft.com/download/symbols");
}
}
else
{
MicroProfileWin32SymInitSuccess = 0;
}
}
return MicroProfileWin32SymInitSuccess != 0;
}
void MicroProfileSymCleanup()
{
if(0 == --MicroProfileWin32SymInitCount)
{
MicroProfileWin32SymInitSuccess = 0;
SymCleanup(GetCurrentProcess());
}
}
static void* g_pFunctionFoundHack = 0;
static const char* g_pFunctionpNameFound = 0;
static char g_Demangled[512];
BOOL MicroProfileQueryContextEnumSymbols1(_In_ PSYMBOL_INFO pSymInfo, _In_ ULONG SymbolSize, _In_opt_ PVOID UserContext)
{
if(pSymInfo->Tag == 5 || pSymInfo->Tag == 10)
{
char str[200];
stbsp_snprintf(str, sizeof(str) - 1, "%s : %p\n", pSymInfo->Name, (void*)pSymInfo->Address);
OutputDebugStringA(str);
g_pFunctionpNameFound = pSymInfo->Name;
g_pFunctionFoundHack = (void*)pSymInfo->Address;
return FALSE;
}
return TRUE;
};
const char* MicroProfileDemangleSymbol(const char* pSymbol)
{
return pSymbol; // todo: for some reasons all symbols im seaing right now are already undecorated?
}
void MicroProfileInstrumentWithoutSymbols(const char** pModules, const char** pSymbols, uint32_t nNumSymbols)
{
char SymString[512];
const char* pStr = 0;
if(MicroProfileSymInit())
{
HANDLE h = GetCurrentProcess();
for(uint32_t i = 0; i < nNumSymbols; ++i)
{
int nCount = stbsp_snprintf(SymString, sizeof(SymString) - 1, "%s!%s", pModules[i], pSymbols[i]);
if(nCount <= sizeof(SymString) - 1)
{
g_pFunctionFoundHack = 0;
if(SymEnumSymbols(h, 0, SymString, MicroProfileQueryContextEnumSymbols1, 0))
{
if(g_pFunctionFoundHack)
{
uint32_t nColor = MicroProfileColorFromString(pSymbols[i]);
const char* pDemangled = pSymbols[i]; // MicroProfileDemangleSymbol(pSymbols[i]);
MicroProfileInstrumentFunction(g_pFunctionFoundHack, pModules[i], pDemangled, nColor);
}
}
}
}
MicroProfileSymCleanup();
}
}
void MicroProfileSymbolEnumModules()
{
HMODULE modules[1024];
DWORD needed;
HANDLE h = GetCurrentProcess();
if(EnumProcessModules(h, modules, sizeof(modules), &needed))
{
int count = needed / sizeof(HMODULE);
for(int i = 0; i < count; i++)
{
char moduleName[MAX_PATH];
if(GetModuleFileNameEx(h, modules[i], moduleName, MAX_PATH))
{
MODULEINFO mi = {};
if(GetModuleInformation(h, modules[i], &mi, sizeof(mi)))
{
MicroProfileEnumModules(moduleName, (DWORD64)mi.lpBaseOfDll, 0);
}
}
}
}
}
void MicroProfileSymbolUpdateModuleList()
{
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileSymbolUpdateModuleList", MP_PINK3);
// QueryCallbackImpl<Callback> Context(CB);
if(MicroProfileSymInit())
{
uprintf("symbols loaded!\n");
API_VERSION* pv = ImagehlpApiVersion();
uprintf("VERSION %d.%d.%d\n", pv->MajorVersion, pv->MinorVersion, pv->Revision);
nLastModuleBaseWin32 = -1;
MicroProfileSymbolEnumModules();
MicroProfileSymCleanup();
}
}
#endif
#if defined(__APPLE__) && defined(__MACH__)
// '##::::'##::'#######:::'#######::'##:::'##:::::'#######:::'######::'##::::'##:
// ##:::: ##:'##.... ##:'##.... ##: ##::'##:::::'##.... ##:'##... ##:. ##::'##::
// ##:::: ##: ##:::: ##: ##:::: ##: ##:'##:::::: ##:::: ##: ##:::..:::. ##'##:::
// #########: ##:::: ##: ##:::: ##: #####::::::: ##:::: ##:. ######::::. ###::::
// ##.... ##: ##:::: ##: ##:::: ##: ##. ##:::::: ##:::: ##::..... ##::: ## ##:::
// ##:::: ##: ##:::: ##: ##:::: ##: ##:. ##::::: ##:::: ##:'##::: ##:: ##:. ##::
// ##:::: ##:. #######::. #######:: ##::. ##::::. #######::. ######:: ##:::. ##:
// ..:::::..:::.......::::.......:::..::::..::::::.......::::......:::..:::::..::
#include <cxxabi.h>
#include <distorm.h>
#include <dlfcn.h>
#include <mach/mach.h>
#include <mach/mach_vm.h>
#include <mnemonics.h>
#include <sys/mman.h>
#include <unistd.h>
static void* MicroProfileAllocExecutableMemory(void* f, size_t s);
static void MicroProfileMakeWriteable(void* p_);
extern "C" void microprofile_tramp_enter_patch();
extern "C" void microprofile_tramp_enter();
extern "C" void microprofile_tramp_code_begin();
extern "C" void microprofile_tramp_code_end();
extern "C" void microprofile_tramp_intercept0();
extern "C" void microprofile_tramp_end();
extern "C" void microprofile_tramp_exit();
extern "C" void microprofile_tramp_leave();
extern "C" void microprofile_tramp_trunk();
extern "C" void microprofile_tramp_call_patch_pop();
extern "C" void microprofile_tramp_call_patch_push();
bool MicroProfilePatchFunction(void* f, int Argument, MicroProfileHookFunc enter, MicroProfileHookFunc leave, MicroProfilePatchError* pError) __attribute__((optnone))
{
if(pError)
{
memcpy(&pError->Code[0], f, 12);
}
intptr_t t_enter = (intptr_t)microprofile_tramp_enter;
intptr_t t_enter_patch_offset = (intptr_t)microprofile_tramp_enter_patch - t_enter;
intptr_t t_code_begin_offset = (intptr_t)microprofile_tramp_code_begin - t_enter;
intptr_t t_code_end_offset = (intptr_t)microprofile_tramp_code_end - t_enter;
intptr_t t_code_intercept0_offset = (intptr_t)microprofile_tramp_intercept0 - t_enter;
intptr_t t_code_exit_offset = (intptr_t)microprofile_tramp_exit - t_enter;
intptr_t t_code_leave_offset = (intptr_t)microprofile_tramp_leave - t_enter;
intptr_t t_code_call_patch_push_offset = (intptr_t)microprofile_tramp_call_patch_push - t_enter;
intptr_t t_code_call_patch_pop_offset = (intptr_t)microprofile_tramp_call_patch_pop - t_enter;
intptr_t codemaxsize = t_code_end_offset - t_code_begin_offset;
intptr_t t_end_offset = (intptr_t)microprofile_tramp_end - t_enter;
intptr_t t_trunk_offset = (intptr_t)microprofile_tramp_trunk - t_enter;
intptr_t t_trunk_size = (intptr_t)microprofile_tramp_end - (intptr_t)microprofile_tramp_trunk;
char* ptramp = (char*)MicroProfileAllocExecutableMemory(f, t_end_offset);
intptr_t offset = ((intptr_t)f + 6 - (intptr_t)ptramp);
uint32_t nBytesToCopy = 14;
if(offset < 0x80000000 && offset > -0x7fffffff)
{
/// offset is small enough to insert a relative jump
nBytesToCopy = 5;
}
memcpy(ptramp, (void*)t_enter, t_end_offset);
int nInstructionBytesDest = 0;
char* pInstructionMoveDest = ptramp + t_code_begin_offset;
char* pTrunk = ptramp + t_trunk_offset;
int nInstructionBytesSrc = 0;
uint32_t nRegsWritten = 0;
uint32_t nRetSafe = 0;
uint32_t nUsableJumpRegs = (1 << R_RAX) | (1 << R_R10) | (1 << R_R11); // scratch && !parameter register
if(!MicroProfileCopyInstructionBytes(
pInstructionMoveDest, f, nBytesToCopy, codemaxsize, pTrunk, t_trunk_size, nUsableJumpRegs, &nInstructionBytesDest, &nInstructionBytesSrc, &nRegsWritten, &nRetSafe))
{
if(pError)
{
const char* pCode = (const char*)f;
memset(pError->Code, 0, sizeof(pError->Code));
memcpy(pError->Code, pCode, nInstructionBytesSrc);
int off = stbsp_snprintf(pError->Message, sizeof(pError->Message), "Failed to move %d code bytes ", nInstructionBytesSrc);
pError->nCodeSize = nInstructionBytesSrc;
for(int i = 0; i < nInstructionBytesSrc; ++i)
{
off += stbsp_snprintf(off + pError->Message, sizeof(pError->Message) - off, "%02x ", 0xff & pCode[i]);
}
uprintf("%s\n", pError->Message);
}
return false;
}
intptr_t phome = nInstructionBytesSrc + (intptr_t)f;
uint32_t reg = nUsableJumpRegs & ~nRegsWritten;
static_assert(R_RAX == 0, "R_RAX must be 0");
if(0 == reg)
{
if(nRetSafe == 0)
{
MP_BREAK(); // shout fail earlier
}
MicroProfileInsertRetJump(pInstructionMoveDest + nInstructionBytesDest, phome);
}
else
{
int r = R_RAX;
while((reg & 1) == 0)
{
reg >>= 1;
r++;
}
MicroProfileInsertRegisterJump(pInstructionMoveDest + nInstructionBytesDest, phome, r);
}
// PATCH 1 TRAMP EXIT
intptr_t microprofile_tramp_exit = (intptr_t)ptramp + t_code_exit_offset;
memcpy(ptramp + t_enter_patch_offset + 2, (void*)&microprofile_tramp_exit, 8);
char* pintercept = t_code_intercept0_offset + ptramp;
// PATCH 1.5 Argument
memcpy(pintercept - 4, (void*)&Argument, 4);
// PATCH 2 INTERCEPT0
intptr_t addr = (intptr_t)enter; //&intercept0;
memcpy(pintercept + 2, (void*)&addr, 8);
// PATHC 2.5 argument
memcpy(ptramp + t_code_exit_offset + 3, (void*)&Argument, 4);
intptr_t microprofile_tramp_leave = (intptr_t)ptramp + t_code_leave_offset;
// PATCH 3 INTERCEPT1
intptr_t addr1 = (intptr_t)leave; //&intercept1;
memcpy((char*)microprofile_tramp_leave + 2, (void*)&addr1, 8);
intptr_t patch_push_addr = (intptr_t)(&MicroProfile_Patch_TLS_PUSH);
intptr_t patch_pop_addr = (intptr_t)(&MicroProfile_Patch_TLS_POP);
memcpy((char*)ptramp + t_code_call_patch_push_offset + 2, &patch_push_addr, 8);
memcpy((char*)ptramp + t_code_call_patch_pop_offset + 2, &patch_pop_addr, 8);
{
// PATCH 4 DEST FUNC
MicroProfileMakeWriteable(f);
char* pp = (char*)f;
char* ppend = pp + nInstructionBytesSrc;
if(nInstructionBytesSrc < 14)
{
uprintf("inserting 5b jump\n");
pp = MicroProfileInsertRelativeJump((char*)pp, (intptr_t)ptramp);
}
else
{
uprintf("inserting 14b jump\n");
pp = MicroProfileInsertRegisterJump(pp, (intptr_t)ptramp, R_RAX);
}
while(pp != ppend)
{
*pp++ = 0x90;
}
}
return true;
}
static void MicroProfileMakeWriteable(void* p_)
{
#ifdef _PATCH_TEST
// for testing..
static const uint32_t WritableSize = 16;
static uint32_t WritableCount = 0;
static intptr_t WritableStart[WritableSize] = { 0 };
static intptr_t WritableEnd[WritableSize] = { 0 };
for(uint32_t i = 0; i < WritableCount; ++i)
{
intptr_t x = (intptr_t)p_;
if(x >= WritableStart[i] && x < WritableEnd[i])
{
return;
}
}
#endif
intptr_t p = (intptr_t)p_;
// uprintf("MicroProfilemakewriteable %lx\n", p);
mach_port_name_t task = mach_task_self();
vm_map_offset_t vmoffset = 0;
mach_vm_size_t vmsize = 0;
uint32_t nd;
kern_return_t kr;
vm_region_submap_info_64 vbr;
mach_msg_type_number_t vbrcount = sizeof(vbr) / 4;
while(KERN_SUCCESS == (kr = mach_vm_region_recurse(task, &vmoffset, &vmsize, &nd, (vm_region_recurse_info_t)&vbr, &vbrcount)))
{
if(p >= (intptr_t)vmoffset && p <= intptr_t(vmoffset + vmsize))
{
if(0 == (vbr.protection & VM_PROT_WRITE))
{
// uprintf("region match .. enabling write\n");
int x = mprotect((void*)vmoffset, vmsize, PROT_WRITE | PROT_READ | PROT_EXEC);
if(x)
{
// uprintf("mprotect failed ... err %d:: %d %s\n", errno, x, strerror(errno));
}
else
{
uprintf("region is [%llx,%llx] .. %08llx %d", vmoffset, vmoffset + vmsize, vmsize, vbr.is_submap);
uprintf("prot: %c%c%c %c%c%c\n",
vbr.protection & VM_PROT_READ ? 'r' : '-',
vbr.protection & VM_PROT_WRITE ? 'w' : '-',
vbr.protection & VM_PROT_EXECUTE ? 'x' : '-',
vbr.max_protection & VM_PROT_READ ? 'r' : '-',
vbr.max_protection & VM_PROT_WRITE ? 'w' : '-',
vbr.max_protection & VM_PROT_EXECUTE ? 'x' : '-');
continue;
}
}
else
{
#ifdef _PATCH_TEST
if(WritableCount < WritableSize)
{
WritableStart[WritableCount] = vmoffset;
WritableEnd[WritableCount] = vmoffset + vmsize;
WritableCount++;
}
#endif
}
}
vmoffset += vmsize;
vbrcount = sizeof(vbr) / 4;
}
}
int MicroProfileTrimFunctionName(const char* pStr, char* pOutBegin, char* pOutEnd)
{
int l = strlen(pStr) - 1;
int sz = 0;
pOutEnd--;
if(l < pOutEnd - pOutBegin && pOutBegin != pOutEnd)
{
const char* p = pStr;
const char* pEnd = pStr + l + 1;
int in = 0;
while(p != pEnd && pOutBegin != pOutEnd)
{
char c = *p++;
if(c == '(' || c == '<')
{
in++;
}
else if(c == ')' || c == '>')
{
in--;
continue;
}
if(in == 0)
{
*pOutBegin++ = c;
sz++;
}
}
*pOutBegin++ = '\0';
}
return sz;
}
int MicroProfileFindFunctionName(const char* pStr, const char** ppStart)
{
int l = strlen(pStr) - 1;
if(l < 1024)
{
char b[1024] = { 0 };
char* put = &b[0];
const char* p = pStr;
const char* pEnd = pStr + l + 1;
int in = 0;
while(p != pEnd)
{
char c = *p++;
if(c == '(' || c == '<')
{
in++;
}
else if(c == ')' || c == '>')
{
in--;
continue;
}
if(in == 0)
{
*put++ = c;
}
}
*put++ = '\0';
uprintf("trimmed %s\n", b);
}
// int nFirstParen = l;
int nNumParen = 0;
int c = 0;
while(l >= 0 && pStr[l] != ')' && c++ < (int)(sizeof(" const") - 1))
{
l--;
}
if(pStr[l] == ')')
{
do
{
if(pStr[l] == ')')
{
nNumParen++;
}
else if(pStr[l] == '(')
{
nNumParen--;
}
l--;
} while(nNumParen > 0 && l >= 0);
}
else
{
*ppStart = pStr;
return 0;
}
while(l >= 0 && isspace(pStr[l]))
{
--l;
}
int nLast = l;
while(l >= 0 && !isspace(pStr[l]))
{
l--;
}
int nFirst = l;
if(nFirst == nLast)
return 0;
int nCount = nLast - nFirst + 1;
*ppStart = pStr + nFirst;
return nCount;
}
const char* MicroProfileDemangleSymbol(const char* pSymbol)
{
static unsigned long size = 128;
static char* pTempBuffer = (char*)malloc(size); // needs to be malloc because demangle function might realloc it.
unsigned long len = size;
int ret = 0;
char* pBuffer = pTempBuffer;
pBuffer = abi::__cxa_demangle(pSymbol, pTempBuffer, &len, &ret);
if(ret == 0)
{
if(pBuffer != pTempBuffer)
{
pTempBuffer = pBuffer;
if(len < size)
__builtin_trap();
size = len;
}
return pTempBuffer;
}
else
{
return pSymbol;
}
}
template <typename Callback>
void MicroProfileIterateSymbols(Callback CB, uint32_t* nModules, uint32_t nNumModules)
{
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileIterateSymbols", MP_PINK3);
char FunctionName[1024];
(void)FunctionName;
mach_port_name_t task = mach_task_self();
vm_map_offset_t vmoffset = 0;
mach_vm_size_t vmsize = 0;
uint32_t nd;
kern_return_t kr;
vm_region_submap_info_64 vbr;
mach_msg_type_number_t vbrcount = sizeof(vbr) / 4;
intptr_t nCurrentModule = -1;
uint32_t nCurrentModuleId = -1;
auto OnFunction = [&](void* addr, void* addrend, const char* pSymbol, const char* pModuleName, void* pModuleAddr) -> bool
{
const char* pStr = MicroProfileDemangleSymbol(pSymbol);
;
int l = MicroProfileTrimFunctionName(pStr, &FunctionName[0], &FunctionName[1024]);
if(nCurrentModule != (intptr_t)pModuleAddr)
{
nCurrentModule = (intptr_t)pModuleAddr;
nCurrentModuleId = MicroProfileSymbolGetModule(pModuleName, nCurrentModule);
}
CB(l ? &FunctionName[0] : pStr, l ? &FunctionName[0] : 0, (intptr_t)addr, (intptr_t)addrend, nCurrentModuleId);
return true;
};
vm_offset_t addr_prev = 0;
while(KERN_SUCCESS == (kr = mach_vm_region_recurse(task, &vmoffset, &vmsize, &nd, (vm_region_recurse_info_t)&vbr, &vbrcount)))
{
{
addr_prev = vmoffset + vmsize;
if(0 != (vbr.protection & VM_PROT_EXECUTE))
{
bool bProcessModule = true;
int nModule = -1;
if(nNumModules)
{
bProcessModule = false;
for(uint32_t i = 0; i < nNumModules; ++i)
{
intptr_t nBase = S.SymbolModules[nModules[i]].Regions[0].nBegin;
if((intptr_t)vmoffset == nBase)
{
bProcessModule = true;
nModule = nModules[i];
break;
}
}
}
if(bProcessModule)
{
S.SymbolModules[nModule].nProgressTarget = S.SymbolModules[nModule].Regions[0].nEnd - S.SymbolModules[nModule].Regions[0].nBegin;
dl_info di;
int r = 0;
r = dladdr((void*)vmoffset, &di);
if(r)
{
OnFunction(di.dli_saddr, (void*)addr_prev, di.dli_sname, di.dli_fname, di.dli_fbase);
}
intptr_t addr = vmoffset + vmsize - 1;
while(1)
{
r = dladdr((void*)(addr), &di);
if(r)
{
if(!di.dli_sname)
{
break;
}
OnFunction(di.dli_saddr, (void*)addr_prev, di.dli_sname, di.dli_fname, di.dli_fbase);
}
else
{
break;
}
addr_prev = (vm_offset_t)di.dli_saddr;
addr = (intptr_t)di.dli_saddr - 1;
if(di.dli_saddr < (void*)vmoffset)
{
break;
}
}
for(int i = 0; i < S.SymbolNumModules; ++i)
{
if(S.SymbolModules[i].Regions[0].nBegin == (intptr_t)vmoffset)
{
S.SymbolModules[i].nModuleLoadFinished.store(1);
}
}
}
}
}
vmoffset += vmsize;
vbrcount = sizeof(vbr) / 4;
}
}
void MicroProfileSymbolUpdateModuleList()
{
char FunctionName[1024];
(void)FunctionName;
mach_port_name_t task = mach_task_self();
vm_map_offset_t vmoffset = 0;
mach_vm_size_t vmsize = 0;
uint32_t nd;
kern_return_t kr;
vm_region_submap_info_64 vbr;
mach_msg_type_number_t vbrcount = sizeof(vbr) / 4;
while(KERN_SUCCESS == (kr = mach_vm_region_recurse(task, &vmoffset, &vmsize, &nd, (vm_region_recurse_info_t)&vbr, &vbrcount)))
{
{
if(0 != (vbr.protection & VM_PROT_EXECUTE))
{
dl_info di;
int r = 0;
r = dladdr((void*)vmoffset, &di);
if(r)
{
uprintf("[0x%p-0x%p] (0x%p) %s %s\n", (void*)vmoffset, (void*)addr_prev, di.dli_fbase, di.dli_fname, di.dli_sname);
MicroProfileSymbolInitModule(di.dli_fname, (intptr_t)vmoffset, (intptr_t)vmoffset + vmsize);
}
}
}
vmoffset += vmsize;
vbrcount = sizeof(vbr) / 4;
}
}
static void* MicroProfileAllocExecutableMemory(void* f, size_t s)
{
static uint64_t nPageSize = 0;
if(!nPageSize)
{
nPageSize = getpagesize();
}
s = (s + (nPageSize - 1)) & (~(nPageSize - 1));
void* pMem = mmap((void*)f, s, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, 0, 0);
// uprintf("Allocating %zu %p\n", s, pMem);
return pMem;
}
bool MicroProfileDemangleName(const char* pName, char* OutName, uint32_t Size)
{
// demangle not implemented
strcpy(OutName, pName);
return true;
}
bool MicroProfilePatchBeginSuspend()
{
// Not implemented
return true;
}
void MicroProfilePatchEndSuspend()
{
// Not implemented
}
void MicroProfileInstrumentWithoutSymbols(const char** pModules, const char** pSymbols, uint32_t nNumSymbols)
{
void* M = dlopen(0, 0);
for(uint32_t i = 0; i < nNumSymbols; ++i)
{
// uprintf("trying to find symbol %s\n", pSym);
void* s = dlsym(M, pSymbols[i]);
uprintf("sym returned %p\n", s);
if(s)
{
uint32_t nColor = MicroProfileColorFromString(pSymbols[i]);
const char* pDemangled = MicroProfileDemangleSymbol(pSymbols[i]);
MicroProfileInstrumentFunction(s, pModules[i], pDemangled, nColor);
}
}
dlclose(M);
}
#endif
#if defined(__unix__) && defined(__x86_64__)
// '##::::'##::'#######:::'#######::'##:::'##::::'##:::::::'####:'##::: ##:'##::::'##:'##::::'##:
// ##:::: ##:'##.... ##:'##.... ##: ##::'##::::: ##:::::::. ##:: ###:: ##: ##:::: ##:. ##::'##::
// ##:::: ##: ##:::: ##: ##:::: ##: ##:'##:::::: ##:::::::: ##:: ####: ##: ##:::: ##::. ##'##:::
// #########: ##:::: ##: ##:::: ##: #####::::::: ##:::::::: ##:: ## ## ##: ##:::: ##:::. ###::::
// ##.... ##: ##:::: ##: ##:::: ##: ##. ##:::::: ##:::::::: ##:: ##. ####: ##:::: ##::: ## ##:::
// ##:::: ##: ##:::: ##: ##:::: ##: ##:. ##::::: ##:::::::: ##:: ##:. ###: ##:::: ##:: ##:. ##::
// ##:::: ##:. #######::. #######:: ##::. ##:::: ########:'####: ##::. ##:. #######:: ##:::. ##:
// ..:::::..:::.......::::.......:::..::::..:::::........::....::..::::..:::.......:::..:::::..::
#include <cxxabi.h>
#include <distorm.h>
#include <dlfcn.h>
#include <mnemonics.h>
#include <sys/mman.h>
#include <unistd.h>
static void* MicroProfileAllocExecutableMemory(void* f, size_t s);
static void MicroProfileMakeWriteable(void* p_);
extern "C" void microprofile_tramp_enter_patch() asm("_microprofile_tramp_enter_patch");
extern "C" void microprofile_tramp_enter() asm("_microprofile_tramp_enter");
extern "C" void microprofile_tramp_code_begin() asm("_microprofile_tramp_code_begin");
extern "C" void microprofile_tramp_code_end() asm("_microprofile_tramp_code_end");
extern "C" void microprofile_tramp_intercept0() asm("_microprofile_tramp_intercept0");
extern "C" void microprofile_tramp_end() asm("_microprofile_tramp_end");
extern "C" void microprofile_tramp_exit() asm("_microprofile_tramp_exit");
extern "C" void microprofile_tramp_leave() asm("_microprofile_tramp_leave");
extern "C" void microprofile_tramp_trunk() asm("_microprofile_tramp_trunk");
extern "C" void microprofile_tramp_call_patch_pop() asm("_microprofile_tramp_call_patch_pop");
extern "C" void microprofile_tramp_call_patch_push() asm("_microprofile_tramp_call_patch_push");
bool MicroProfilePatchFunction(void* f, int Argument, MicroProfileHookFunc enter, MicroProfileHookFunc leave, MicroProfilePatchError* pError)
{
if(pError)
{
memcpy(&pError->Code[0], f, 12);
}
intptr_t t_enter = (intptr_t)microprofile_tramp_enter;
intptr_t t_enter_patch_offset = (intptr_t)microprofile_tramp_enter_patch - t_enter;
intptr_t t_code_begin_offset = (intptr_t)microprofile_tramp_code_begin - t_enter;
intptr_t t_code_end_offset = (intptr_t)microprofile_tramp_code_end - t_enter;
intptr_t t_code_intercept0_offset = (intptr_t)microprofile_tramp_intercept0 - t_enter;
intptr_t t_code_exit_offset = (intptr_t)microprofile_tramp_exit - t_enter;
intptr_t t_code_leave_offset = (intptr_t)microprofile_tramp_leave - t_enter;
intptr_t t_code_call_patch_push_offset = (intptr_t)microprofile_tramp_call_patch_push - t_enter;
intptr_t t_code_call_patch_pop_offset = (intptr_t)microprofile_tramp_call_patch_pop - t_enter;
intptr_t codemaxsize = t_code_end_offset - t_code_begin_offset;
intptr_t t_end_offset = (intptr_t)microprofile_tramp_end - t_enter;
intptr_t t_trunk_offset = (intptr_t)microprofile_tramp_trunk - t_enter;
intptr_t t_trunk_size = (intptr_t)microprofile_tramp_end - (intptr_t)microprofile_tramp_trunk;
char* ptramp = (char*)MicroProfileAllocExecutableMemory(f, t_end_offset);
intptr_t offset = ((intptr_t)f + 6 - (intptr_t)ptramp);
uint32_t nBytesToCopy = 14;
if(offset < 0x80000000 && offset > -0x7fffffff)
{
/// offset is small enough to insert a relative jump
nBytesToCopy = 5;
}
memcpy(ptramp, (void*)t_enter, t_end_offset);
int nInstructionBytesDest = 0;
char* pInstructionMoveDest = ptramp + t_code_begin_offset;
char* pTrunk = ptramp + t_trunk_offset;
int nInstructionBytesSrc = 0;
uint32_t nRegsWritten = 0;
uint32_t nRetSafe = 0;
uint32_t nUsableJumpRegs = (1 << R_RAX) | (1 << R_R10) | (1 << R_R11); // scratch && !parameter register
if(!MicroProfileCopyInstructionBytes(
pInstructionMoveDest, f, nBytesToCopy, codemaxsize, pTrunk, t_trunk_size, nUsableJumpRegs, &nInstructionBytesDest, &nInstructionBytesSrc, &nRegsWritten, &nRetSafe))
{
if(pError)
{
const char* pCode = (const char*)f;
memset(pError->Code, 0, sizeof(pError->Code));
memcpy(pError->Code, pCode, nInstructionBytesSrc);
int off = stbsp_snprintf(pError->Message, sizeof(pError->Message), "Failed to move %d code bytes ", nInstructionBytesSrc);
pError->nCodeSize = nInstructionBytesSrc;
for(int i = 0; i < nInstructionBytesSrc; ++i)
{
off += stbsp_snprintf(off + pError->Message, sizeof(pError->Message) - off, "%02x ", 0xff & pCode[i]);
}
uprintf("%s\n", pError->Message);
}
return false;
}
intptr_t phome = nInstructionBytesSrc + (intptr_t)f;
uint32_t reg = nUsableJumpRegs & ~nRegsWritten;
static_assert(R_RAX == 0, "R_RAX must be 0");
if(0 == reg)
{
if(nRetSafe == 0)
{
MP_BREAK(); // shout fail earlier
}
MicroProfileInsertRetJump(pInstructionMoveDest + nInstructionBytesDest, phome);
}
else
{
int r = R_RAX;
while((reg & 1) == 0)
{
reg >>= 1;
r++;
}
MicroProfileInsertRegisterJump(pInstructionMoveDest + nInstructionBytesDest, phome, r);
}
// PATCH 1 TRAMP EXIT
intptr_t microprofile_tramp_exit = (intptr_t)ptramp + t_code_exit_offset;
memcpy(ptramp + t_enter_patch_offset + 2, (void*)&microprofile_tramp_exit, 8);
char* pintercept = t_code_intercept0_offset + ptramp;
// PATCH 1.5 Argument
memcpy(pintercept - 4, (void*)&Argument, 4);
// PATCH 2 INTERCEPT0
intptr_t addr = (intptr_t)enter; //&intercept0;
memcpy(pintercept + 2, (void*)&addr, 8);
// PATHC 2.5 argument
memcpy(ptramp + t_code_exit_offset + 3, (void*)&Argument, 4);
intptr_t microprofile_tramp_leave = (intptr_t)ptramp + t_code_leave_offset;
// PATCH 3 INTERCEPT1
intptr_t addr1 = (intptr_t)leave; //&intercept1;
memcpy((char*)microprofile_tramp_leave + 2, (void*)&addr1, 8);
intptr_t patch_push_addr = (intptr_t)(&MicroProfile_Patch_TLS_PUSH);
intptr_t patch_pop_addr = (intptr_t)(&MicroProfile_Patch_TLS_POP);
memcpy((char*)ptramp + t_code_call_patch_push_offset + 2, &patch_push_addr, 8);
memcpy((char*)ptramp + t_code_call_patch_pop_offset + 2, &patch_pop_addr, 8);
{
// PATCH 4 DEST FUNC
MicroProfileMakeWriteable(f);
char* pp = (char*)f;
char* ppend = pp + nInstructionBytesSrc;
if(nInstructionBytesSrc < 14)
{
uprintf("inserting 5b jump\n");
pp = MicroProfileInsertRelativeJump((char*)pp, (intptr_t)ptramp);
}
else
{
uprintf("inserting 14b jump\n");
pp = MicroProfileInsertRegisterJump(pp, (intptr_t)ptramp, R_RAX);
}
while(pp != ppend)
{
*pp++ = 0x90;
}
}
return true;
}
static void MicroProfileMakeWriteable(void* p_)
{
intptr_t nPageSize = (intptr_t)getpagesize();
intptr_t p = ((intptr_t)p_) & ~(nPageSize - 1);
intptr_t e = nPageSize + ((14 + (intptr_t)p_) & ~(nPageSize - 1));
size_t s = e - p;
mprotect((void*)p, s, PROT_READ | PROT_WRITE | PROT_EXEC);
}
int MicroProfileTrimFunctionName(const char* pStr, char* pOutBegin, char* pOutEnd)
{
int l = strlen(pStr) - 1;
int sz = 0;
pOutEnd--;
if(l < pOutEnd - pOutBegin && pOutBegin != pOutEnd)
{
const char* p = pStr;
const char* pEnd = pStr + l + 1;
int in = 0;
while(p != pEnd && pOutBegin != pOutEnd)
{
char c = *p++;
if(c == '(' || c == '<')
{
in++;
}
else if(c == ')' || c == '>')
{
in--;
continue;
}
if(in == 0)
{
*pOutBegin++ = c;
sz++;
}
}
*pOutBegin++ = '\0';
}
return sz;
}
int MicroProfileFindFunctionName(const char* pStr, const char** ppStart)
{
int l = strlen(pStr) - 1;
if(l < 1024)
{
char b[1024] = { 0 };
char* put = &b[0];
const char* p = pStr;
const char* pEnd = pStr + l + 1;
int in = 0;
while(p != pEnd)
{
char c = *p++;
if(c == '(' || c == '<')
{
in++;
}
else if(c == ')' || c == '>')
{
in--;
continue;
}
if(in == 0)
{
*put++ = c;
}
}
*put++ = '\0';
uprintf("trimmed %s\n", b);
}
// int nFirstParen = l;
int nNumParen = 0;
int c = 0;
while(l >= 0 && pStr[l] != ')' && c++ < (int)(sizeof(" const") - 1))
{
l--;
}
if(pStr[l] == ')')
{
do
{
if(pStr[l] == ')')
{
nNumParen++;
}
else if(pStr[l] == '(')
{
nNumParen--;
}
l--;
} while(nNumParen > 0 && l >= 0);
}
else
{
*ppStart = pStr;
return 0;
}
while(l >= 0 && isspace(pStr[l]))
{
--l;
}
int nLast = l;
while(l >= 0 && !isspace(pStr[l]))
{
l--;
}
int nFirst = l;
if(nFirst == nLast)
return 0;
int nCount = nLast - nFirst + 1;
*ppStart = pStr + nFirst;
return nCount;
}
const char* MicroProfileDemangleSymbol(const char* pSymbol)
{
static unsigned long size = 128;
static char* pTempBuffer = (char*)malloc(size); // needs to be malloc because demangle function might realloc it.
unsigned long len = size;
int ret = 0;
char* pBuffer = pTempBuffer;
pBuffer = abi::__cxa_demangle(pSymbol, pTempBuffer, &len, &ret);
if(ret == 0)
{
if(pBuffer != pTempBuffer)
{
pTempBuffer = pBuffer;
if(len < size)
__builtin_trap();
size = len;
}
return pTempBuffer;
}
else
{
return pSymbol;
}
}
template <typename Callback>
void MicroProfileIterateSymbols(Callback CB, uint32_t* nModules, uint32_t nNumModules)
{
MICROPROFILE_SCOPEI("MicroProfile", "MicroProfileIterateSymbols", MP_PINK3);
char FunctionName[1024];
intptr_t nCurrentModule = -1;
uint32_t nCurrentModuleId = -1;
auto OnFunction = [&](void* addr, void* addrend, const char* pSymbol, const char* pModuleName, void* pModuleAddr) -> bool
{
const char* pStr = MicroProfileDemangleSymbol(pSymbol);
;
int l = MicroProfileTrimFunctionName(pStr, &FunctionName[0], &FunctionName[1024]);
MP_ASSERT(nCurrentModule == (intptr_t)pModuleAddr);
CB(l ? &FunctionName[0] : pStr, l ? &FunctionName[0] : 0, (intptr_t)addr, (intptr_t)addrend, nCurrentModuleId);
return true;
};
for(int i = 0; i < S.SymbolNumModules; ++i)
{
auto& M = S.SymbolModules[i];
if(0 != nNumModules)
{
bool bProcess = false;
for(uint32_t j = 0; j < nNumModules; ++j)
{
if(nModules[j] == (uint32_t)i)
{
bProcess = true;
break;
}
}
if(!bProcess)
continue;
}
nCurrentModuleId = i;
Dl_info di;
int r = 0;
r = dladdr((void*)(M.Regions[0].nBegin), &di);
if(r)
{
nCurrentModule = (intptr_t)di.dli_fbase;
M.nProgressTarget = 0;
for(int j = 0; j < M.nNumExecutableRegions; ++j)
{
M.nProgressTarget += M.Regions[j].nEnd - M.Regions[j].nBegin;
}
for(int j = 0; j < M.nNumExecutableRegions; ++j)
{
const intptr_t nBegin = M.Regions[j].nBegin;
const intptr_t nEnd = M.Regions[j].nEnd;
int r = 0;
intptr_t nAddr = (nEnd - 8) & ~7;
intptr_t nAddrPrev = nEnd;
while(1)
{
r = dladdr((void*)(nAddr), &di);
if(r && di.dli_sname)
{
OnFunction(di.dli_saddr, (void*)nAddrPrev, di.dli_sname, di.dli_fname, di.dli_fbase);
nAddrPrev = (intptr_t)di.dli_saddr;
nAddr = (intptr_t)di.dli_saddr - 1;
}
else
{
nAddr = (nAddr - 7) & ~7; // pretty ineffecient, but it seems linux just returns 0 when there is no symbols, making this the only option I can come up with?
}
if(nAddr < nBegin)
{
break;
}
}
}
M.nProgress = M.nProgressTarget;
M.nModuleLoadFinished.store(1);
}
}
}
void MicroProfileSymbolUpdateModuleList()
{
// So, this was the only way I could find to do this..
// Is this seriously how they want this to be done?
FILE* F = fopen("/proc/self/maps", "r");
char* line = 0;
size_t len;
ssize_t read;
Dl_info di;
while((read = getline(&line, &len, F)) != -1)
{
void* pBase = 0;
void* pEnd = 0;
char c, r, w, x, p;
if(8 == sscanf(line, "%p%c%p%c%c%c%c%c", &pBase, &c, &pEnd, &c, &r, &w, &x, &p))
{
if('x' == x)
{
int r = 0;
r = dladdr(pBase, &di);
if(r)
{
if('[' != di.dli_fname[0])
{
MicroProfileSymbolInitModule(di.dli_fname, (intptr_t)pBase, (intptr_t)pEnd);
}
}
}
}
}
fclose(F);
MicroProfileSymbolMergeExecutableRegions();
}
static void* MicroProfileAllocExecutableMemory(void* f, size_t s)
{
static uint64_t nPageSize = 0;
if(!nPageSize)
{
nPageSize = getpagesize();
}
s = (s + (nPageSize - 1)) & (~(nPageSize - 1));
void* pMem = mmap(f, s, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, 0, 0);
return pMem;
}
bool MicroProfileDemangleName(const char* pName, char* OutName, uint32_t Size)
{
// demangle not implemented
strcpy(OutName, pName);
return true;
}
bool MicroProfilePatchBeginSuspend()
{
// Not implemented
return true;
}
void MicroProfilePatchEndSuspend()
{
// Not implemented
}
// not yet tested.
void MicroProfileInstrumentWithoutSymbols(const char** pModules, const char** pSymbols, uint32_t nNumSymbols)
{
void* M = dlopen(0, 0);
for(uint32_t i = 0; i < nNumSymbols; ++i)
{
// uprintf("trying to find symbol %s\n", pSym);
void* s = dlsym(M, pSymbols[i]);
uprintf("sym returned %p\n", s);
if(s)
{
uint32_t nColor = MicroProfileColorFromString(pSymbols[i]);
const char* pDemangled = MicroProfileDemangleSymbol(pSymbols[i]);
MicroProfileInstrumentFunction(s, pModules[i], pDemangled, nColor);
}
}
dlclose(M);
}
#endif
#endif
void MicroProfileHashTableInit(MicroProfileHashTable* pTable, uint32_t nInitialSize, uint32_t nSearchLimit, MicroProfileHashCompareFunction CompareFunc, MicroProfileHashFunction HashFunc)
{
pTable->nAllocated = nInitialSize;
pTable->nUsed = 0;
uint32_t nSize = nInitialSize * sizeof(MicroProfileHashTableEntry);
pTable->pEntries = (MicroProfileHashTableEntry*)MICROPROFILE_ALLOC(nSize, 8);
pTable->CompareFunc = CompareFunc;
pTable->HashFunc = HashFunc;
pTable->nSearchLimit = nSearchLimit;
pTable->nLim = pTable->nAllocated / 5;
if(pTable->nLim > pTable->nSearchLimit)
pTable->nLim = pTable->nSearchLimit;
memset(pTable->pEntries, 0, nSize);
}
void MicroProfileHashTableDestroy(MicroProfileHashTable* pTable)
{
MICROPROFILE_FREE(pTable->pEntries);
}
uint64_t MicroProfileHashTableHash(MicroProfileHashTable* pTable, uint64_t K)
{
uint64_t H = pTable->HashFunc ? (*pTable->HashFunc)(K) : K;
return H == 0 ? 1 : H;
}
void MicroProfileHashTableGrow(MicroProfileHashTable* pTable)
{
uint32_t nAllocated = pTable->nAllocated;
uint32_t nNewSize = nAllocated * 2;
uprintf("GROW %d -> %d\n", nAllocated, nNewSize);
MicroProfileHashTable New;
MicroProfileHashTableInit(&New, nNewSize, pTable->nSearchLimit, pTable->CompareFunc, pTable->HashFunc);
for(uint32_t i = 0; i < nAllocated; ++i)
{
MicroProfileHashTableEntry& E = pTable->pEntries[i];
if(E.Hash != 0)
{
MicroProfileHashTableSet(&New, E.Key, E.Value, E.Hash, false);
}
}
MicroProfileHashTableDestroy(pTable);
*pTable = New;
}
bool MicroProfileHashTableSet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t Value)
{
uint64_t H = MicroProfileHashTableHash(pTable, Key);
return MicroProfileHashTableSet(pTable, Key, Value, H, true);
}
MicroProfileHashTableIterator MicroProfileGetHashTableIteratorBegin(MicroProfileHashTable* HashTable)
{
return MicroProfileHashTableIterator(0, HashTable);
}
MicroProfileHashTableIterator MicroProfileGetHashTableIteratorEnd(MicroProfileHashTable* HashTable)
{
return MicroProfileHashTableIterator(HashTable->nAllocated, HashTable);
}
bool MicroProfileHashTableSet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t Value, uint64_t H, bool bAllowGrow)
{
if(H == 0)
MP_BREAK(); // not supported.
MicroProfileHashCompareFunction Cmp = pTable->CompareFunc;
while(1)
{
const uint32_t nLim = pTable->nLim;
uint32_t B = H % pTable->nAllocated;
MicroProfileHashTableEntry* pEntries = pTable->pEntries;
for(uint32_t i = 0; i < pTable->nAllocated; ++i)
{
uint32_t Idx = (B + i) % pTable->nAllocated;
if(pEntries[Idx].Hash == 0)
{
pEntries[Idx].Hash = H;
pEntries[Idx].Key = Key;
pEntries[Idx].Value = Value;
return true;
}
else if(pEntries[Idx].Hash == H && (Cmp ? (Cmp)(Key, pEntries[Idx].Key) : Key == pEntries[Idx].Key))
{
pEntries[Idx].Value = Value;
return true;
}
else if(i > nLim)
{
break;
}
}
if(bAllowGrow)
{
MicroProfileHashTableGrow(pTable);
}
else
{
MP_BREAK();
}
}
MP_BREAK();
}
bool MicroProfileHashTableGet(MicroProfileHashTable* pTable, uint64_t Key, uintptr_t* pValue)
{
uint64_t H = MicroProfileHashTableHash(pTable, Key);
uint32_t B = H % pTable->nAllocated;
MicroProfileHashTableEntry* pEntries = pTable->pEntries;
MicroProfileHashCompareFunction Cmp = pTable->CompareFunc;
for(uint32_t i = 0; i < pTable->nAllocated; ++i)
{
uint32_t Idx = (B + i) % pTable->nAllocated;
if(pEntries[Idx].Hash == 0)
{
return false;
}
else if(pEntries[Idx].Hash == H && (Cmp ? (Cmp)(Key, pEntries[Idx].Key) : Key == pEntries[Idx].Key))
{
*pValue = pEntries[Idx].Value;
return true;
}
}
return false;
}
bool MicroProfileHashTableRemove(MicroProfileHashTable* pTable, uint64_t Key)
{
uint64_t H = MicroProfileHashTableHash(pTable, Key);
uint32_t B = H % pTable->nAllocated;
MicroProfileHashTableEntry* pEntries = pTable->pEntries;
MicroProfileHashCompareFunction Cmp = pTable->CompareFunc;
uint32_t nBase = (uint32_t)-1;
uint32_t nAllocated = pTable->nAllocated;
for(uint32_t i = 0; i < nAllocated; ++i)
{
uint32_t Idx = (B + i) % nAllocated;
if(pEntries[Idx].Hash == 0)
{
return false;
}
else if(pEntries[Idx].Hash == H && (Cmp ? (Cmp)(Key, pEntries[Idx].Key) : Key == pEntries[Idx].Key))
{
nBase = Idx;
break;
}
}
pEntries[nBase].Hash = 0;
pEntries[nBase].Key = 0;
pEntries[nBase].Value = 0;
nBase++;
for(uint32_t i = 0; i < nAllocated; ++i)
{
uint32_t Idx = (nBase + i) % nAllocated;
if(pEntries[Idx].Hash == 0)
{
break;
}
else
{
MicroProfileHashTableEntry E = pEntries[Idx];
pEntries[Idx] = {};
MicroProfileHashTableSet(pTable, E.Key, E.Value, E.Hash, false);
}
}
return true;
}
uint64_t MicroProfileHashTableHashString(uint64_t pString)
{
return MicroProfileStringHash((const char*)pString);
}
bool MicroProfileHashTableCompareString(uint64_t L, uint64_t R)
{
return 0 == strcmp((const char*)L, (const char*)R);
}
uint64_t MicroProfileHashTableHashPtr(uint64_t x)
{
x ^= x >> 33;
x *= 0xff51afd7ed558ccdULL;
x ^= x >> 33;
x *= 0xc4ceb9fe1a85ec53ULL;
x ^= x >> 33;
return x;
}
bool MicroProfileHashTableComparePtr(uint64_t L, uint64_t R)
{
return L == R;
}
bool MicroProfileHashTableSetString(MicroProfileHashTable* pTable, const char* pKey, const char* pValue)
{
return MicroProfileHashTableSet(pTable, (uint64_t)pKey, (uintptr_t)pValue);
}
bool MicroProfileHashTableGetString(MicroProfileHashTable* pTable, const char* pKey, const char** pValue)
{
return MicroProfileHashTableGet(pTable, (uint64_t)pKey, (uintptr_t*)pValue);
}
bool MicroProfileHashTableRemoveString(MicroProfileHashTable* pTable, const char* pKey)
{
return MicroProfileHashTableRemove(pTable, (uint64_t)pKey);
}
bool MicroProfileHashTableSetPtr(MicroProfileHashTable* pTable, const void* pKey, void* pValue)
{
return MicroProfileHashTableSet(pTable, (uint64_t)pKey, (uintptr_t)pValue);
}
template <typename T>
bool MicroProfileHashTableGetPtr(MicroProfileHashTable* pTable, const void* pKey, T** pValue)
{
uintptr_t Dummy;
uintptr_t* Arg = pValue ? (uintptr_t*)pValue : &Dummy;
return MicroProfileHashTableGet(pTable, (uint64_t)pKey, Arg);
}
bool MicroProfileHashTableRemovePtr(MicroProfileHashTable* pTable, const char* pKey)
{
return MicroProfileHashTableRemove(pTable, (uint64_t)pKey);
}
template <typename T>
T& MicroProfileArray<T>::operator[](const uint32_t Index)
{
return Data[Index];
}
template <typename T>
const T& MicroProfileArray<T>::operator[](const uint32_t Index) const
{
MP_ASSERT(Index < Size);
return Data[Index];
}
template <typename T>
T* MicroProfileArray<T>::begin()
{
return Data;
}
template <typename T>
T* MicroProfileArray<T>::end()
{
return Data + Size;
}
template <typename T>
void MicroProfileArrayInit(MicroProfileArray<T>& Array, uint32_t InitialCapacity)
{
MP_ASSERT(Array.Data == nullptr);
MP_ASSERT(Array.Size == 0);
MP_ASSERT(Array.Capacity == 0);
Array.Capacity = InitialCapacity;
Array.Data = MP_ALLOC_OBJECT_ARRAY(T, InitialCapacity);
Array.Size = 0;
}
template <typename T>
void MicroProfileArrayDestroy(MicroProfileArray<T>& Array, uint32_t InitialCapacity)
{
if(Array.Data)
MP_FREE(Array.Data);
memset(Array, 0, sizeof(*Array));
}
template <typename T>
void MicroProfileArrayClear(MicroProfileArray<T>& Array)
{
Array.Size = 0;
}
template <typename T>
void MicroProfileArrayPushBack(MicroProfileArray<T>& Array, const T& v)
{
uint32_t& Size = Array.Size;
uint32_t& Capacity = Array.Capacity;
if(Size >= Capacity)
{
uint32_t NewCapacity = (MicroProfileMax<uint32_t>(1u, Capacity) + 1) * 3 / 2;
T* NewData = MP_ALLOC_OBJECT_ARRAY(T, NewCapacity);
memcpy(NewData, Array.Data, Size * sizeof(T));
if(Array.Data)
{
MP_FREE(Array.Data);
}
Array.Data = NewData;
Capacity = NewCapacity;
}
Array.Data[Size++] = v;
}
void MicroProfileStringBlockFree(MicroProfileStringBlock* pBlock)
{
MicroProfileCounterAdd(S.CounterToken_StringBlock_Count, -1);
MicroProfileCounterAdd(S.CounterToken_StringBlock_Memory, -(int64_t)(pBlock->nSize + sizeof(MicroProfileStringBlock)));
MP_FREE(pBlock);
}
MicroProfileStringBlock* MicroProfileStringBlockAlloc(uint32_t nSize)
{
nSize = MicroProfileMax(nSize, (uint32_t)(MicroProfileStringBlock::DEFAULT_SIZE - sizeof(MicroProfileStringBlock)));
nSize += sizeof(MicroProfileStringBlock);
MicroProfileCounterAdd(S.CounterToken_StringBlock_Count, 1);
MicroProfileCounterAdd(S.CounterToken_StringBlock_Memory, nSize);
// uprintf("alloc string block %d sizeof strings is %d\n", nSize, (int)sizeof(MicroProfileStringBlock));
MicroProfileStringBlock* pBlock = (MicroProfileStringBlock*)MP_ALLOC(nSize, 8);
pBlock->pNext = 0;
pBlock->nSize = nSize - sizeof(MicroProfileStringBlock);
pBlock->nUsed = 0;
return pBlock;
}
void MicroProfileStringsInit(MicroProfileStrings* pStrings)
{
MicroProfileHashTableInit(&pStrings->HashTable, 1, 25, MicroProfileHashTableCompareString, MicroProfileHashTableHashString);
pStrings->pFirst = 0;
pStrings->pLast = 0;
}
void MicroProfileStringsDestroy(MicroProfileStrings* pStrings)
{
MicroProfileStringBlock* pBlock = pStrings->pFirst;
while(pBlock)
{
MicroProfileStringBlock* pNext = pBlock->pNext;
MicroProfileStringBlockFree(pBlock);
pBlock = pNext;
}
MicroProfileCounterSet(S.CounterToken_StringBlock_Waste, 0);
MicroProfileCounterSet(S.CounterToken_StringBlock_Strings, 0);
memset(pStrings, 0, sizeof(*pStrings));
}
const char* MicroProfileStringIntern(const char* pStr)
{
return MicroProfileStringIntern(pStr, (uint32_t)strlen(pStr), 0);
}
const char* MicroProfileStringInternLower(const char* pStr)
{
return MicroProfileStringIntern(pStr, (uint32_t)strlen(pStr), ESTRINGINTERN_LOWERCASE);
}
const char* MicroProfileStringInternSlash(const char* pStr)
{
return MicroProfileStringIntern(pStr, (uint32_t)strlen(pStr), ESTRINGINTERN_FORCEFORWARDSLASH);
}
const char* MicroProfileStringIntern(const char* pStr_, uint32_t nLen, uint32_t nFlags)
{
MicroProfileStrings* pStrings = &S.Strings;
const char* pStr = pStr_;
char* pLowerCaseStr = (char*)alloca(nLen + 1);
if(0 != (nFlags & (ESTRINGINTERN_FORCEFORWARDSLASH | ESTRINGINTERN_LOWERCASE)))
{
for(uint32_t i = 0; i < nLen; ++i)
{
char c = pStr[i];
if(nFlags & ESTRINGINTERN_LOWERCASE)
{
c = tolower(c);
}
if(nFlags & ESTRINGINTERN_FORCEFORWARDSLASH)
{
if(c == '\\')
c = '/';
}
pLowerCaseStr[i] = c;
}
pLowerCaseStr[nLen] = '\0';
pStr = pLowerCaseStr;
}
const char* pRet;
if(MicroProfileHashTableGetString(&pStrings->HashTable, pStr, &pRet))
{
if(0 != strcmp(pStr, pRet))
{
MP_BREAK();
}
return pRet;
}
else
{
if(pStr[nLen] != '\0')
MP_BREAK(); // string should be 0 terminated.
nLen += 1;
MicroProfileStringBlock* pBlock = pStrings->pLast;
if(0 == pBlock || pBlock->nUsed + nLen > pBlock->nSize)
{
MicroProfileStringBlock* pNewBlock = MicroProfileStringBlockAlloc(nLen);
if(pBlock)
{
pBlock->pNext = pNewBlock;
pStrings->pLast = pNewBlock;
MicroProfileCounterAdd(S.CounterToken_StringBlock_Waste, pBlock->nSize - pBlock->nUsed);
}
else
{
pStrings->pLast = pStrings->pFirst = pNewBlock;
}
pBlock = pNewBlock;
}
MicroProfileCounterAdd(S.CounterToken_StringBlock_Strings, 1);
char* pDest = &pBlock->Memory[pBlock->nUsed];
pBlock->nUsed += nLen;
MP_ASSERT(pBlock->nUsed <= pBlock->nSize);
memcpy(pDest, pStr, nLen);
MicroProfileHashTableSetString(&pStrings->HashTable, pDest, pDest);
#if 0
void DumpTableStr(MicroProfileHashTable* pTable);
DumpTableStr(&pStrings->HashTable);
#endif
return pDest;
}
}
void DumpTable(MicroProfileHashTable* pTable)
{
for(uint32_t i = 0; i < pTable->nAllocated; ++i)
{
if(pTable->pEntries[i].Hash != 0)
{
uprintf("[%05d,%05" PRIu64 "] ::::%" PRIx64 ", %p .. hash %" PRIx64 "\n",
i,
pTable->pEntries[i].Hash % pTable->nAllocated,
pTable->pEntries[i].Key,
(void*)pTable->pEntries[i].Value,
pTable->pEntries[i].Hash);
}
}
};
void DumpTableStr(MicroProfileHashTable* pTable)
{
int c = 0;
(void)c;
for(uint32_t i = 0; i < pTable->nAllocated; ++i)
{
if(pTable->pEntries[i].Hash != 0)
{
uprintf("%03d [%05d,%05" PRIu64 "] ::::%s, %s .. hash %" PRIx64 "\n",
c++,
i,
pTable->pEntries[i].Hash % pTable->nAllocated,
(const char*)pTable->pEntries[i].Key,
(const char*)pTable->pEntries[i].Value,
pTable->pEntries[i].Hash);
}
}
uprintf("FillPrc %f\n", 100.f * c / (float)pTable->nAllocated);
};
static const char* txt[] = { "gaudy", "chilly", "obtain", "suspend", "jelly", "peel", "nauseating", "complain", "cave", "practise", "sail", "close",
"drawer", "mature", "impossible", "exist", "sister", "poke", "ancient", "paddle", "ask", "shallow", "outrageous", "healthy",
"reading", "obey", "water", "elbow", "abnormal", "trap", "wholesale", "lovely", "stupid", "comparison", "swim", "brash",
"towering", "accept", "invention", "plantation", "spooky", "tiger", "knot", "literate", "awake", "itch", "medical", "ticket",
"tawdry", "correct", "mine", "accidental", "dinner", "produce", "protective", "red", "dreary", "toe", "drain", "zesty",
"inform", "boundless", "ghost", "attend", "rely", "fill", "liquid", "pump", "continue", "spark", "church", "fortunate",
"truthful", "conscious", "possible", "motion", "evanescent", "branch", "skirt", "number", "meek", "hour", "form", "work",
"car", "post", "talk", "fear", "tightfisted", "dress", "perform", "fry", "courageous", "dysfunctional", "page", "one",
"annoy", "abrasive", "dependent", "payment" };
void MicroProfileStringInternTest()
{
MicroProfileStringsInit(&S.Strings);
uint32_t nCount = sizeof(txt) / sizeof(txt[0]);
const char* pStrings[100];
const char* pStrings2[100];
DumpTableStr(&S.Strings.HashTable);
for(uint32_t i = 0; i < nCount; ++i)
{
pStrings[i] = MicroProfileStringIntern(txt[i]);
pStrings2[i] = MicroProfileStrDup(txt[i]);
}
for(uint32_t i = 0; i < nCount; ++i)
{
const char* pStr = MicroProfileStringIntern(pStrings2[i]);
if(pStr != pStrings[i])
{
MP_BREAK();
}
}
DumpTableStr(&S.Strings.HashTable);
MicroProfileStringsDestroy(&S.Strings);
}
void MicroProfileHashTableTest()
{
MicroProfileStringInternTest();
MicroProfileHashTable T;
MicroProfileHashTable* pTable = &T;
MicroProfileHashTableInit(pTable, 1, 100, 0, 0);
#define NUM_ITEMS 100
uint64_t Keys[NUM_ITEMS];
uint64_t Values[NUM_ITEMS];
memset(Keys, 0xff, sizeof(Keys));
memset(Values, 0xff, sizeof(Values));
static int l = 0;
auto RR = [&]() -> uint64_t
{
if(l++ % 4 < 2)
{
return l;
}
uint64_t l2 = rand();
uint64_t u = rand();
return l2 | (u << 32);
};
auto RRUnique = [&]()
{
bool bFound = false;
uint64_t V = 0;
do
{
V = RR();
for(uint32_t i = 0; i != NUM_ITEMS; ++i)
{
if(V == Keys[i])
{
bFound = true;
}
}
if(!bFound)
{
return V;
}
} while(bFound);
MP_BREAK();
return (uint64_t)0;
};
Keys[0] = 0;
Values[0] = 42;
for(uint32_t i = 1; i < NUM_ITEMS; ++i)
{
Keys[i] = RRUnique();
Values[i] = RR();
}
for(uint32_t i = 0; i < NUM_ITEMS; ++i)
{
MicroProfileHashTableSet(pTable, Keys[i], Values[i]);
}
for(uint32_t i = 0; i < NUM_ITEMS; ++i)
{
uintptr_t V;
if(MicroProfileHashTableGet(pTable, Keys[i], &V))
{
if(V != Values[i])
{
MP_BREAK();
}
}
else
{
MP_BREAK();
}
uint64_t nonkey = RRUnique();
if(MicroProfileHashTableGet(pTable, nonkey, &V))
{
MP_BREAK();
}
}
DumpTable(pTable);
if(!MicroProfileHashTableRemove(pTable, 0))
{
MP_BREAK();
}
uprintf("removed\n");
DumpTable(pTable);
uintptr_t v;
if(MicroProfileHashTableGet(pTable, 0, &v))
{
MP_BREAK();
}
if(MicroProfileHashTableGet(pTable, 1, &v))
{
if(v != 2)
MP_BREAK();
}
MicroProfileHashTableDestroy(pTable);
MicroProfileHashTable Strings;
MicroProfileHashTableInit(&Strings, 1, 25, MicroProfileHashTableCompareString, MicroProfileHashTableHashString);
uint32_t nCount = sizeof(txt) / sizeof(txt[0]);
for(uint32_t i = 0; i < nCount; i += 2)
{
MicroProfileHashTableSetString(&Strings, txt[i], txt[i + 1]);
}
DumpTableStr(&Strings);
for(uint32_t i = 0; i < nCount; i += 2)
{
const char* pKey = txt[i];
const char* pValue = txt[i + 1];
const char* pRes = 0;
if(MicroProfileHashTableGetString(&Strings, pKey, &pRes))
{
if(pRes != pValue)
{
MP_BREAK();
}
}
else
{
MP_BREAK();
}
}
uint32_t nRem = nCount / 2;
for(uint32_t i = 0; i < nRem; i += 2)
{
const char* pKey = txt[i];
const char* pValue = txt[i + 1];
if(!MicroProfileHashTableRemoveString(&Strings, pKey))
{
MP_BREAK();
}
if(MicroProfileHashTableRemoveString(&Strings, pValue))
{
MP_BREAK();
}
}
for(uint32_t i = 0; i < nRem; i += 2)
{
const char* pKey = txt[i];
if(MicroProfileHashTableRemoveString(&Strings, pKey))
{
MP_BREAK();
}
}
for(uint32_t i = 0; i < nCount; i += 2)
{
const char* pKey = txt[i];
const char* pValue = txt[i + 1];
const char* V;
if(MicroProfileHashTableGetString(&Strings, pKey, &V))
{
if(i < nRem)
{
MP_BREAK();
}
else
{
if(V != pValue)
MP_BREAK();
}
}
else
{
if(i >= nRem)
MP_BREAK();
}
}
DumpTableStr(&Strings);
MicroProfileHashTableDestroy(&Strings);
}
uint32_t MicroProfileGetColor(uint32_t TimerIndex)
{
MicroProfileTimerInfo& TI = S.TimerInfo[TimerIndex];
if(TI.nColor == MP_AUTO)
{
return MicroProfileColorFromString(TI.pName);
}
else
{
return TI.nColor;
}
}
#if MICROPROFILE_IMGUI
#include "imgui.h"
#ifndef MICROPROFILE_IMGUI_MAX_GRAPHS
#define MICROPROFILE_IMGUI_MAX_GRAPHS 64
#endif
#define MICROPROFILE_IMGUI_GRAPH_SIZE 256
struct MicroProfileImguiTimerState
{
int TimerIndex = -1;
uint64_t FrameFetched = (uint64_t)-1;
uint32_t nColor = 0;
float fValues[MICROPROFILE_IMGUI_GRAPH_SIZE];
};
struct MicroProfileImguiState
{
MicroProfileImguiTimerState Timers[MICROPROFILE_IMGUI_MAX_GRAPHS];
uint32_t NumTimers = 0;
uint32_t GraphPut;
};
static MicroProfileImguiState ImguiState;
void MicroProfileImguiGather()
{
MICROPROFILE_SCOPEI("MicroProfile", "ImguiGather", MP_AUTO);
uint32_t Put = ImguiState.GraphPut;
for(uint32_t i = 0; i < ImguiState.NumTimers; ++i)
{
MicroProfileImguiTimerState* pGraphInfo = &ImguiState.Timers[i];
uint64_t Ticks = S.Frame[pGraphInfo->TimerIndex].nTicks;
float fToMs = S.TimerInfo[pGraphInfo->TimerIndex].Type == MicroProfileTokenTypeGpu ? MicroProfileTickToMsMultiplierGpu() : MicroProfileTickToMsMultiplierCpu();
pGraphInfo->fValues[Put] = fToMs * Ticks;
}
ImguiState.GraphPut = (ImguiState.GraphPut + 1) % MICROPROFILE_IMGUI_GRAPH_SIZE;
}
uint32_t MicroProfileImGuiColor(uint32_t Color)
{
uint32_t A = 0xff;
uint32_t R = 0xff & (Color >> 16);
uint32_t G = 0xff & (Color >> 8);
uint32_t B = 0xff & (Color);
return (A << IM_COL32_A_SHIFT) | (R << IM_COL32_R_SHIFT) | (G << IM_COL32_G_SHIFT) | (B << IM_COL32_B_SHIFT);
}
void MicroProfileImguiControls()
{
using namespace ImGui;
uint32_t IdCounter = 42;
{
PushID(IdCounter++);
int Aggr = MicroProfileGetAggregateFrames();
Text("Aggregate Frames %7d", MicroProfileGetCurrentAggregateFrames());
SameLine();
if(RadioButton("Inf", Aggr == 0))
MicroProfileSetAggregateFrames(0);
int AggrFrameOptions[] = {
30,
60,
100,
1000,
};
for(int i = 0; i < sizeof(AggrFrameOptions) / sizeof(AggrFrameOptions[0]); ++i)
{
int v = AggrFrameOptions[i];
char Buffer[32];
stbsp_snprintf(Buffer, sizeof(Buffer) - 1, "%d", v);
SameLine();
if(RadioButton(Buffer, Aggr == v))
MicroProfileSetAggregateFrames(v);
}
if(Aggr == 0)
{
if(Button("Clear Inf Aggregate"))
S.nAggregateClear = 1;
}
PopID();
}
Separator();
{
PushID(IdCounter++);
Text("Categories");
if(BeginTable("CategoryTable", 3, 0))
{
TableSetupColumn("Name", ImGuiTableColumnFlags_WidthStretch);
TableSetupColumn("On", ImGuiTableColumnFlags_WidthFixed, 70);
TableSetupColumn("Off", ImGuiTableColumnFlags_WidthFixed, 70);
for(uint32_t i = 0; i < S.nCategoryCount; ++i)
{
PushID(i);
TableNextRow();
TableSetColumnIndex(0);
Text(S.CategoryInfo[i].pName);
bool bEnabled = MicroProfileCategoryEnabled(i);
bool bDisabled = MicroProfileCategoryDisabled(i);
TableSetColumnIndex(1);
if(RadioButton("On", bEnabled))
MicroProfileEnableCategory(S.CategoryInfo[i].pName);
TableSetColumnIndex(2);
if(RadioButton("Off", bDisabled))
MicroProfileDisableCategory(S.CategoryInfo[i].pName);
PopID();
}
EndTable();
}
PopID();
}
Separator();
{
PushID(IdCounter++);
Text("Groups");
if(BeginTable("GroupTable", 3, 0))
{
TableSetupColumn("Name", ImGuiTableColumnFlags_WidthStretch);
TableSetupColumn("On", ImGuiTableColumnFlags_WidthFixed, 70);
TableSetupColumn("Off", ImGuiTableColumnFlags_WidthFixed, 70);
for(uint32_t i = 0; i < S.nGroupCount; ++i)
{
TableNextRow();
PushID(i);
const char* pName = S.GroupInfo[i].pName;
bool bEnabled = MicroProfileGroupEnabled(i);
TableSetColumnIndex(0);
Text(pName);
TableSetColumnIndex(1);
if(RadioButton("On", bEnabled))
MicroProfileToggleGroup(i);
TableSetColumnIndex(2);
if(RadioButton("Off", !bEnabled))
MicroProfileToggleGroup(i);
PopID();
}
EndTable();
}
PopID();
}
}
MicroProfileImguiTimerState* MicroProfileImguiGetTimerState(int TimerIndex)
{
MicroProfileImguiTimerState* ptr = nullptr;
for(uint32_t i = 0; i < ImguiState.NumTimers; ++i)
if(ImguiState.Timers[i].TimerIndex == TimerIndex)
return &ImguiState.Timers[i];
if(ImguiState.NumTimers < MICROPROFILE_IMGUI_MAX_GRAPHS)
{
MicroProfileImguiTimerState* pState = &ImguiState.Timers[ImguiState.NumTimers++];
pState->TimerIndex = TimerIndex;
pState->nColor = MicroProfileGetColor(TimerIndex);
memset(&pState->fValues[0], 0, sizeof(pState->fValues));
return pState;
}
return nullptr;
}
void MicroProfileImguiTable(const MicroProfileImguiWindowDesc& Window, const MicroProfileImguiEntryDesc* Entries, uint32_t NumEntries)
{
using namespace ImGui;
const uint32_t NumColumns = 6;
ImGuiIO& io = ImGui::GetIO();
float Padding = GetStyle().CellPadding.x * 2;
float GroupWidth = CalcTextSize("Group").x;
float NameWidth = CalcTextSize("Name").x;
float BaseWidth = CalcTextSize("100000.00").x;
float Height = CalcTextSize("G").y + Padding;
for(uint32_t i = 0; i < NumEntries; ++i)
{
uint32_t TimerIndex = MicroProfileGetTimerIndex(Entries[i].GraphTimer);
const MicroProfileTimerInfo& TI = S.TimerInfo[TimerIndex];
const MicroProfileGroupInfo& GI = S.GroupInfo[TI.nGroupIndex];
GroupWidth = MicroProfileMax(GroupWidth, CalcTextSize(GI.pName).x);
NameWidth = MicroProfileMax(NameWidth, CalcTextSize(TI.pName).x);
}
float TableWidth = GroupWidth + NameWidth + BaseWidth * 4 + NumColumns * Padding + (NumColumns - 1) * GetStyle().ItemSpacing.x;
float TableHeight = Height * (NumEntries + 1);
ImVec2 TablePos = ImVec2(0.f, 0.f);
if(Window.Align == MICROPROFILE_IMGUI_ALIGN_TOP_RIGHT || Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_RIGHT)
TablePos.x = io.DisplaySize.x - TableWidth;
if(Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_LEFT || Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_RIGHT)
TablePos.y = io.DisplaySize.y - TableHeight;
TablePos.x += Window.OffsetX;
TablePos.y += Window.OffsetY;
SetCursorScreenPos(TablePos);
if(BeginTable("MicroProfileImguiTable", NumColumns, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg | ImGuiTableFlags_SizingFixedFit | ImGuiTableFlags_NoHostExtendX))
{
TableSetupColumn("Group", ImGuiTableColumnFlags_WidthFixed, GroupWidth);
TableSetupColumn("Name", ImGuiTableColumnFlags_WidthFixed, NameWidth);
TableSetupColumn("Max", ImGuiTableColumnFlags_WidthFixed, BaseWidth);
TableSetupColumn("Min", ImGuiTableColumnFlags_WidthFixed, BaseWidth);
TableSetupColumn("Avg", ImGuiTableColumnFlags_WidthFixed, BaseWidth);
TableSetupColumn("Time", ImGuiTableColumnFlags_WidthFixed, BaseWidth);
TableHeadersRow();
for(uint32_t i = 0; i < NumEntries; ++i)
{
uint32_t TimerIndex = MicroProfileGetTimerIndex(Entries[i].GraphTimer);
MicroProfileTimerValues Values;
MicroProfileCalcTimers(TimerIndex, Values);
const MicroProfileTimerInfo& TI = S.TimerInfo[TimerIndex];
const MicroProfileGroupInfo& GI = S.GroupInfo[TI.nGroupIndex];
TableNextRow();
ImU32 RowBGColor = GetColorU32((i % 2) ? ImVec4(0.1f, 0.1f, 0.1f, 0.85f) : ImVec4(0.2f, 0.2f, 0.2f, 0.85f));
TableSetBgColor(ImGuiTableBgTarget_RowBg1, RowBGColor);
PushID(i);
float fMax = 0.f, fMin = 0.f, fAvg = 0.f, fTime = 0.f;
auto RightAlignedFloat = [](float f)
{
float CellWidth = GetContentRegionAvail().x;
char Buffer[32];
stbsp_snprintf(Buffer, sizeof(Buffer) - 1, "%.2f", f);
ImVec2 TextSize = CalcTextSize(Buffer);
SetCursorPosX(GetCursorPosX() + (CellWidth - TextSize.x));
TextUnformatted(Buffer);
};
TableSetColumnIndex(0);
Text(GI.pName);
TableSetColumnIndex(1);
Text(TI.pName);
TableSetColumnIndex(2);
RightAlignedFloat(Values.MaxMs);
TableSetColumnIndex(3);
RightAlignedFloat(Values.MinMs);
TableSetColumnIndex(4);
RightAlignedFloat(Values.AverageMs);
TableSetColumnIndex(5);
RightAlignedFloat(Values.TimeMs);
PopID();
}
EndTable();
}
}
void MicroProfileImguiGraphs(const MicroProfileImguiWindowDesc& Window, const MicroProfileImguiEntryDesc* Entries, uint32_t NumEntries)
{
using namespace ImGui;
ImGuiIO& io = ImGui::GetIO();
uint32_t Width = Window.GraphWidth;
uint32_t Height = (Window.GraphHeight + GetStyle().ItemSpacing.y) * NumEntries;
ImVec2 Pos = ImVec2(0.f, 0.f);
if(Window.Align == MICROPROFILE_IMGUI_ALIGN_TOP_RIGHT || Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_RIGHT)
Pos.x = io.DisplaySize.x - Width;
if(Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_LEFT || Window.Align == MICROPROFILE_IMGUI_ALIGN_BOTTOM_RIGHT)
Pos.y = io.DisplaySize.y - Height;
Pos.x += Window.OffsetX;
Pos.y += Window.OffsetY;
for(uint32_t i = 0; i < NumEntries; ++i)
{
SetCursorScreenPos(Pos);
uint32_t TimerIndex = MicroProfileGetTimerIndex(Entries[i].GraphTimer);
float GraphMax = Entries[i].GraphMax;
const MicroProfileTimerInfo& TI = S.TimerInfo[TimerIndex];
MicroProfileImguiTimerState* TimerState = MicroProfileImguiGetTimerState(TimerIndex);
PushID(i << 16 | TimerIndex);
if(TimerState->nColor == 0)
TimerState->nColor = MicroProfileGetColor(TimerIndex);
ImVec4 FrameBg = GetStyleColorVec4(ImGuiCol_FrameBg);
FrameBg.x = 0.15f;
FrameBg.y = 0.15f;
FrameBg.z = 0.15f;
FrameBg.w = 0.8f;
PushStyleColor(ImGuiCol_PlotLines, MicroProfileImGuiColor(TimerState->nColor));
PushStyleColor(ImGuiCol_FrameBg, FrameBg);
uint32_t Start = (ImguiState.GraphPut) % MICROPROFILE_IMGUI_GRAPH_SIZE;
uint32_t Last = (ImguiState.GraphPut + MICROPROFILE_IMGUI_GRAPH_SIZE - 1) % MICROPROFILE_IMGUI_GRAPH_SIZE;
PlotLines("", &TimerState->fValues[0], MICROPROFILE_IMGUI_GRAPH_SIZE, Start, nullptr, 0.f, GraphMax, ImVec2(Window.GraphWidth, Window.GraphHeight));
char TimeStr[32];
stbsp_snprintf(TimeStr, sizeof(TimeStr) - 1, "%.3fms", TimerState->fValues[Last]);
ImVec2 PlotMin = GetItemRectMin();
ImVec2 PlotMax = GetItemRectMax();
ImVec2 NameSize = CalcTextSize(TI.pName);
ImVec2 NamePos = ImVec2(PlotMin.x + 1, PlotMax.y - NameSize.y - 1);
ImVec2 TimeSize = CalcTextSize(TimeStr);
ImVec2 TimePos = ImVec2(PlotMax.x - TimeSize.x - 1, PlotMax.y - TimeSize.y - 1);
GetWindowDrawList()->AddText(NamePos, GetColorU32(ImGuiCol_Text), TI.pName);
GetWindowDrawList()->AddText(TimePos, GetColorU32(ImGuiCol_Text), TimeStr);
PopStyleColor();
PopStyleColor();
PopID();
Pos.y += Window.GraphHeight + GetStyle().ItemSpacing.y;
}
}
#endif
#undef uprintf
#undef S
#ifdef _WIN32
#pragma warning(pop)
#undef microprofile_fopen_helper
#endif
#ifdef MICROPROFILE_PS4
#define MICROPROFILE_PS4_IMPL
#include "microprofile_ps4.h"
#endif
#ifdef MICROPROFILE_XBOXONE
#define MICROPROFILE_XBOXONE_IMPL
#include "microprofile_xboxone.h"
#endif
#endif // #if MICROPROFILE_ENABLED
#include "microprofile_html.h"
#include "microprofile_icons.h"
#endif