683 lines
19 KiB
C++
Raw Permalink Normal View History

/*
** Command & Conquer Generals Zero Hour(tm)
** Copyright 2025 Electronic Arts Inc.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
////////////////////////////////////////////////////////////////////////////////
// //
// (c) 2001-2003 Electronic Arts Inc. //
// //
////////////////////////////////////////////////////////////////////////////////
// FILE: PerfTimer.cpp ///////////////////////////////////////////////////////////////////////////
// Author:
///////////////////////////////////////////////////////////////////////////////////////////////////
#include "PreRTS.h" // This must go first in EVERY cpp file int the GameEngine
#include "Common/PerfTimer.h"
#include "Common/GlobalData.h"
#include "GameClient/DebugDisplay.h"
#include "GameClient/Display.h"
#include "GameClient/GraphDraw.h"
__forceinline void ProfileGetTime(__int64 &t)
{
_asm
{
mov ecx,[t]
push eax
push edx
rdtsc
mov [ecx],eax
mov [ecx+4],edx
pop edx
pop eax
};
}
#ifdef _INTERNAL
// for occasional debugging...
//#pragma optimize("", off)
//#pragma MESSAGE("************************************** WARNING, optimization disabled for debugging purposes")
#endif
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
#if defined(PERF_TIMERS) || defined(DUMP_PERF_STATS)
//-------------------------------------------------------------------------------------------------
static Int64 s_ticksPerSec = 0;
static double s_ticksPerMSec = 0;
static double s_ticksPerUSec = 0;
//-------------------------------------------------------------------------------------------------
void GetPrecisionTimerTicksPerSec(Int64* t)
{
*t = s_ticksPerSec;
}
//Kris: Plugged in Martin's code to optimize timer setup.
#define HOFFESOMMER_REPLACEMENT_CODE
//-------------------------------------------------------------------------------------------------
void InitPrecisionTimer()
{
#ifdef HOFFESOMMER_REPLACEMENT_CODE
// measure clock cycles 3 times for 20 msec each
// then take the 2 counts that are closest, average
_int64 n[ 3 ];
for( int k = 0; k < 3; k++ )
{
// wait for end of current tick
unsigned timeEnd = timeGetTime() + 2;
while( timeGetTime() < timeEnd ); //do nothing
// get cycles
_int64 start, startQPC, endQPC;
QueryPerformanceCounter( (LARGE_INTEGER *)&startQPC );
ProfileGetTime( start );
timeEnd += 20;
while( timeGetTime() < timeEnd ); //do nothing
ProfileGetTime( n[ k ] );
n[ k ] -= start;
// convert to 1 second
if( QueryPerformanceCounter( (LARGE_INTEGER*)&endQPC ) )
{
QueryPerformanceFrequency( (LARGE_INTEGER*)&s_ticksPerSec );
n[ k ] = ( n[ k ] * s_ticksPerSec ) / ( endQPC - startQPC );
}
else
{
n[ k ] = ( n[ k ] * 1000 ) / 20;
}
}
// find two closest values
_int64 d01 = n[ 1 ] - n[ 0 ];
_int64 d02 = n[ 2 ] - n[ 0 ];
_int64 d12 = n[ 2 ] - n[ 1 ];
if( d01 < 0 )
{
d01 = -d01;
}
if( d02 < 0 )
{
d02 = -d02;
}
if( d12 < 0 )
{
d12 = -d12;
}
_int64 avg;
if( d01 < d02 )
{
avg = d01 < d12 ? n[ 0 ] + n[ 1 ] : n[ 1 ] + n[ 2 ];
}
else
{
avg = d02 < d12 ? n[ 0 ] + n[ 2 ] : n[ 1 ] + n[ 2 ];
}
//s_ticksPerMSec = 1.0 * TotalTicks / totalTime;
s_ticksPerMSec = avg / 2000.0f;
s_ticksPerSec = s_ticksPerMSec * 1000.0f;
s_ticksPerUSec = s_ticksPerSec / 1000000.0f;
#else
//Kris: With total disrespect, this code costs 5 real seconds of init time
//whenever we fire up the game.
#ifdef USE_QPF
QueryPerformanceFrequency((LARGE_INTEGER*)&s_ticksPerSec);
#else
// Init the precision timers
Int64 totalTime = 0;
Int64 TotalTicks = 0;
static int TESTS = 5;
for (int i = 0; i < TESTS; ++i)
{
int TimeStart;
int TimeStop;
Int64 StartTicks;
Int64 EndTicks;
TimeStart = timeGetTime();
GetPrecisionTimer(&StartTicks);
for(;;)
{
TimeStop = timeGetTime();
if ((TimeStop - TimeStart) > 1000)
{
GetPrecisionTimer(&EndTicks);
break;
}
}
TotalTicks += (EndTicks - StartTicks);
totalTime += (TimeStop - TimeStart);
}
s_ticksPerMSec = 1.0 * TotalTicks / totalTime;
s_ticksPerSec = s_ticksPerMSec * 1000.0f;
#endif
s_ticksPerMSec = s_ticksPerSec / 1000.0f;
s_ticksPerUSec = s_ticksPerSec / 1000000.0f;
#ifdef NOT_IN_USE
Int64 bogus[8];
GetPrecisionTimer(&start);
for (Int ii = 0; ii < ITERS; ++ii)
{
GetPrecisionTimer(&bogus[0]);
GetPrecisionTimer(&bogus[1]);
GetPrecisionTimer(&bogus[2]);
GetPrecisionTimer(&bogus[3]);
GetPrecisionTimer(&bogus[4]);
GetPrecisionTimer(&bogus[5]);
GetPrecisionTimer(&bogus[6]);
GetPrecisionTimer(&bogus[7]);
}
TheTicksToGetTicks = (bogus[7] - start) / (ITERS*8);
DEBUG_LOG(("TheTicksToGetTicks is %d (%f usec)\n",(int)TheTicksToGetTicks,TheTicksToGetTicks/s_ticksPerUSec));
#endif
#endif
}
#endif // defined(PERF_TIMERS) || defined(DUMP_PERF_STATS)
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
#ifdef PERF_TIMERS
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
/*static*/ Bool AutoPerfGatherIgnore::s_ignoring = false;
//-------------------------------------------------------------------------------------------------
typedef std::vector< std::pair< AsciiString, AsciiString > > StringPairVec;
//-------------------------------------------------------------------------------------------------
// PerfMetrics class. Basically, request a handle with your name and it will return. We use a vector
// of pairs of asciistrings for this
class PerfMetricsOutput
{
private:
StringPairVec m_outputStats;
public:
AsciiString& getStatsString(const AsciiString& id)
{
for (int i = 0; i < m_outputStats.size(); ++i)
{
if (m_outputStats[i].first == id)
return m_outputStats[i].second;
}
std::pair<AsciiString, AsciiString> newPair;
newPair.first = id;
m_outputStats.push_back(newPair);
return m_outputStats.back().second;
}
void clearStatsString(const AsciiString& id)
{
for (int i = 0; i < m_outputStats.size(); ++i)
{
if (m_outputStats[i].first == id)
{
m_outputStats.erase(&m_outputStats[i]);
return;
}
}
}
StringPairVec& friend_getAllStatsStrings() { return m_outputStats; }
};
//-------------------------------------------------------------------------------------------------
static PerfMetricsOutput s_output;
static FILE* s_perfStatsFile = NULL;
static Int s_perfDumpOptions = 0;
static UnsignedInt s_lastDumpedFrame = 0;
static char s_buf[256] = "";
PerfGather* PerfGather::m_active[MAX_ACTIVE_STACK] = { 0 };
PerfGather** PerfGather::m_activeHead = &PerfGather::m_active[0];
Int64 PerfGather::s_stopStartOverhead = -1;
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
/*static*/ PerfGather*& PerfGather::getHeadPtr()
{
// funky technique for order-of-init problem. trust me. (srj)
static PerfGather* s_head = NULL;
return s_head;
}
//-------------------------------------------------------------------------------------------------
void PerfGather::addToList()
{
PerfGather*& head = getHeadPtr();
this->m_next = head;
if (head)
head->m_prev = this;
head = this;
}
//-------------------------------------------------------------------------------------------------
void PerfGather::removeFromList()
{
PerfGather*& head = getHeadPtr();
if (this->m_next)
this->m_next->m_prev = this->m_prev;
if (this->m_prev)
this->m_prev->m_next = this->m_next;
else
head = this->m_next;
this->m_prev = 0;
this->m_next = 0;
}
//-------------------------------------------------------------------------------------------------
PerfGather::PerfGather(const char *identifier) :
m_identifier(identifier),
m_startTime(0),
m_runningTimeGross(0),
m_runningTimeNet(0),
m_callCount(0),
m_next(0),
m_prev(0)
{
//Added By Sadullah Nader
//Initializations inserted
m_ignore = FALSE;
//
DEBUG_ASSERTCRASH(strchr(m_identifier, ',') == NULL, ("PerfGather names must not contain commas"));
addToList();
}
//-------------------------------------------------------------------------------------------------
PerfGather::~PerfGather()
{
removeFromList();
}
//-------------------------------------------------------------------------------------------------
void PerfGather::reset()
{
m_startTime = 0;
m_runningTimeGross = 0;
m_runningTimeNet = 0;
m_callCount = 0;
}
//-------------------------------------------------------------------------------------------------
/*static*/ void PerfGather::resetAll()
{
for (PerfGather* head = getHeadPtr(); head != NULL; head = head->m_next)
{
head->reset();
}
}
//-------------------------------------------------------------------------------------------------
/*static*/ void PerfGather::initPerfDump(const char* fname, Int options)
{
PerfGather::termPerfDump();
strcpy(s_buf, fname);
char tmp[256];
strcpy(tmp, s_buf);
strcat(tmp, ".csv");
s_perfStatsFile = fopen(tmp, "w");
s_perfDumpOptions = options;
if (s_perfStatsFile == NULL)
{
DEBUG_CRASH(("could not open/create perf file %s -- is it open in another app?",s_buf));
return;
}
if (s_stopStartOverhead == -1)
{
const Int ITERS = 100000;
Int64 start, end;
PerfGather pf("timer");
GetPrecisionTimer(&start);
for (Int ii = 0; ii < ITERS; ++ii)
{
pf.startTimer(); pf.stopTimer();
pf.startTimer(); pf.stopTimer();
pf.startTimer(); pf.stopTimer();
pf.startTimer(); pf.stopTimer();
pf.startTimer(); pf.stopTimer();
pf.startTimer(); pf.stopTimer();
pf.startTimer(); pf.stopTimer();
pf.startTimer(); pf.stopTimer();
}
GetPrecisionTimer(&end);
s_stopStartOverhead = (end - start) / (ITERS*8);
DEBUG_LOG(("s_stopStartOverhead is %d (%f usec)\n",(int)s_stopStartOverhead,s_stopStartOverhead/s_ticksPerUSec));
}
}
//-------------------------------------------------------------------------------------------------
/*static*/ void PerfGather::dumpAll(UnsignedInt frame)
{
if (frame < s_lastDumpedFrame)
{
// must have reset or started a new game.
termPerfDump();
initPerfDump(s_buf, s_perfDumpOptions);
}
if (!s_perfStatsFile)
{
DEBUG_CRASH(("not inited"));
return;
}
if (frame >= 1 && frame <= 30)
{
// always skip the first second or so, since it loads everything and skews the results horribly
}
else
{
if (s_lastDumpedFrame == 0)
{
fprintf(s_perfStatsFile, "Frame");
if (s_perfDumpOptions & PERF_GROSSTIME)
{
for (const PerfGather* head = getHeadPtr(); head != NULL; head = head->m_next)
{
fprintf(s_perfStatsFile, ",Gross:%s", head->m_identifier);
}
}
if (s_perfDumpOptions & PERF_NETTIME)
{
for (const PerfGather* head = getHeadPtr(); head != NULL; head = head->m_next)
{
fprintf(s_perfStatsFile, ",Net:%s", head->m_identifier);
}
}
if (s_perfDumpOptions & PERF_CALLCOUNT)
{
for (const PerfGather* head = getHeadPtr(); head != NULL; head = head->m_next)
{
fprintf(s_perfStatsFile, ",Count:%s", head->m_identifier);
}
}
fprintf(s_perfStatsFile, "\n");
}
// a strange value so we can find it in the dump, if necessary.
// there's nothing magic about this value, it's purely determined from sample dumps...
// const Real CLIP_BIG_SPIKES = 1e10f;
const Real CLIP_BIG_SPIKES = 100000.0f;
// make this a nonnumeric thing so Excel won't try to graph it...
fprintf(s_perfStatsFile, "Frame%08d", frame);
if (s_perfDumpOptions & PERF_GROSSTIME)
{
for (const PerfGather* head = getHeadPtr(); head != NULL; head = head->m_next)
{
double t = head->m_runningTimeGross;
t /= s_ticksPerUSec;
if (t > CLIP_BIG_SPIKES)
t = CLIP_BIG_SPIKES;
fprintf(s_perfStatsFile, ",%f", t);
}
}
if (s_perfDumpOptions & PERF_NETTIME)
{
for (const PerfGather* head = getHeadPtr(); head != NULL; head = head->m_next)
{
double t = head->m_runningTimeNet;
t /= s_ticksPerUSec;
if (t > CLIP_BIG_SPIKES)
t = CLIP_BIG_SPIKES;
fprintf(s_perfStatsFile, ",%f", t);
}
}
if (s_perfDumpOptions & PERF_CALLCOUNT)
{
for (const PerfGather* head = getHeadPtr(); head != NULL; head = head->m_next)
{
fprintf(s_perfStatsFile, ",%d", head->m_callCount);
}
}
fprintf(s_perfStatsFile, "\n");
fflush(s_perfStatsFile);
s_lastDumpedFrame = frame;
}
}
//-------------------------------------------------------------------------------------------------
// This function will queue up stuff to draw on the next frame. We also need to adjust the
// perf timers to not include time spent paused by the script engine.
/*static*/ void PerfGather::displayGraph(UnsignedInt frame)
{
if (!TheGraphDraw) {
return;
}
if (frame >= 1 && frame <= 30)
{
// always skip the first second or so, since it loads everything and skews the results horribly
}
else
{
const Real CLIP_BIG_SPIKES = 100000.0f;
if (s_perfDumpOptions & PERF_GROSSTIME)
{
for (const PerfGather* head = getHeadPtr(); head != NULL; head = head->m_next)
{
Real t = head->m_runningTimeGross;
t /= s_ticksPerUSec;
if (t > CLIP_BIG_SPIKES)
t = CLIP_BIG_SPIKES;
TheGraphDraw->addEntry(head->m_identifier, REAL_TO_INT(t));
}
}
if (s_perfDumpOptions & PERF_NETTIME)
{
for (const PerfGather* head = getHeadPtr(); head != NULL; head = head->m_next)
{
Real t = head->m_runningTimeNet;
t /= s_ticksPerUSec;
if (t > CLIP_BIG_SPIKES)
t = CLIP_BIG_SPIKES;
TheGraphDraw->addEntry(head->m_identifier, REAL_TO_INT(t));
}
}
if (s_perfDumpOptions & PERF_CALLCOUNT)
{
for (const PerfGather* head = getHeadPtr(); head != NULL; head = head->m_next)
{
Real t = head->m_callCount;
TheGraphDraw->addEntry(head->m_identifier, REAL_TO_INT(t));
}
}
}
}
//-------------------------------------------------------------------------------------------------
/*static*/ void PerfGather::termPerfDump()
{
if (s_perfStatsFile)
{
fflush(s_perfStatsFile);
fclose(s_perfStatsFile);
s_perfStatsFile = NULL;
}
s_lastDumpedFrame = 0;
}
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
PerfTimer::PerfTimer( const char *identifier, Bool crashWithInfo, Int startFrame, Int endFrame) :
m_identifier(identifier),
m_crashWithInfo(crashWithInfo),
m_startFrame(startFrame),
m_endFrame(endFrame),
m_callCount(0),
m_runningTime(0),
m_outputInfo(true),
//Added By Sadullah Nader
//Intializations inserted
m_lastFrame(-1)
{
}
//-------------------------------------------------------------------------------------------------
PerfTimer::~PerfTimer( )
{
if (m_endFrame == -1) {
outputInfo();
}
}
//-------------------------------------------------------------------------------------------------
void PerfTimer::outputInfo( void )
{
if (TheGlobalData->m_showMetrics) {
return;
}
if (m_outputInfo && !TheGlobalData->m_showMetrics) {
m_outputInfo = false;
} else {
return;
}
if (!s_ticksPerSec) {
// DEBUG_CRASH here
return;
}
#if defined(_DEBUG) || defined(_INTERNAL)
double totalTimeInMS = 1000.0 * m_runningTime / s_ticksPerSec;
double avgTimePerFrame = totalTimeInMS / (m_lastFrame - m_startFrame + 1);
double avgTimePerCall = totalTimeInMS / m_callCount;
#endif
if (m_crashWithInfo) {
DEBUG_CRASH(("%s\n"
"Average Time (per call): %.4f ms\n"
"Average Time (per frame): %.4f ms\n"
"Average calls per frame: %.2f\n"
"Number of calls: %d\n"
"Max possible FPS: %.4f\n",
m_identifier,
avgTimePerCall,
avgTimePerFrame,
1.0f * m_callCount / (m_lastFrame - m_startFrame + 1),
m_callCount,
1000.0f / avgTimePerFrame));
} else {
DEBUG_LOG(("%s\n"
"Average Time (per call): %.4f ms\n"
"Average Time (per frame): %.4f ms\n"
"Average calls per frame: %.2f\n"
"Number of calls: %d\n"
"Max possible FPS: %.4f\n",
m_identifier,
avgTimePerCall,
avgTimePerFrame,
1.0f * m_callCount / (m_lastFrame - m_startFrame + 1),
m_callCount,
1000.0f / avgTimePerFrame));
}
}
//-------------------------------------------------------------------------------------------------
void PerfTimer::showMetrics( void )
{
#if defined(_DEBUG) || defined(_INTERNAL)
double totalTimeInMS = 1000.0 * m_runningTime / s_ticksPerSec;
double avgTimePerFrame = totalTimeInMS / (m_lastFrame - m_startFrame + 1);
double avgTimePerCall = totalTimeInMS / m_callCount;
#endif
// we want to work on the thing in the array, so just store a reference.
AsciiString &outputStats = s_output.getStatsString(m_identifier);
outputStats.format("%s: %.2fms / call, %.2fms / frame \n",
m_identifier,
avgTimePerCall,
avgTimePerFrame);
m_callCount = 0;
m_runningTime = 0;
UnsignedInt frm = (TheGameLogic ? TheGameLogic->getFrame() : m_startFrame);
m_startFrame = frm + 1;
m_endFrame = m_startFrame + PERFMETRICS_BETWEEN_METRICS;
}
//-------------------------------------------------------------------------------StatMetricsDisplay
void StatMetricsDisplay( DebugDisplayInterface *dd, void *, FILE *fp )
{
dd->printf("Performance Metrics: \n");
// no copies will take place because we are storing a reference to the thing
StringPairVec &stats = s_output.friend_getAllStatsStrings();
for (int i = 0; i < stats.size(); ++i) {
dd->printf("%s", stats[i].second.str());
}
}
#endif // PERF_TIMERS