2
3
4
5
10#include "Render/FrameInfo.h"
12#include <NvPerfCounterConfiguration.h>
13#include <NvPerfCpuMarkerTrace.h>
14#include <NvPerfVulkan.h>
21
22
23
25 "gpc__cycles_elapsed.avg.per_second",
26 "sys__cycles_elapsed.avg.per_second",
27 "lts__cycles_elapsed.avg.per_second",
38
39
43
44
45
53 if (!m_NsightPerfGPUProfilerContinuous)
55 m_NsightPerfGPUProfilerContinuous = std::make_shared<NsightPerfGPUProfilerContinuous>(state);
64 NSPERF_CHECK(nv::perf::VulkanIsNvidiaDevice(state.m_PhysicalDevice))
65 const size_t deviceIndex = nv::perf::VulkanGetNvperfDeviceIndex(state.m_Instance, state.m_PhysicalDevice, state.m_Device);
68
69
71 const nv::perf::DeviceIdentifiers deviceIdentifiers = sampler.GetDeviceIdentifiers();
74
75
77 std::vector<uint8_t> metricsEvaluatorScratchBuffer;
78 NVPW_MetricsEvaluator* pMetricsEvaluator = nv::perf::sampler::DeviceCreateMetricsEvaluator(metricsEvaluatorScratchBuffer, deviceIdentifiers.pChipName);
81
82
83 metricsEvaluator = nv::perf::MetricsEvaluator(pMetricsEvaluator, std::move(metricsEvaluatorScratchBuffer));
87
88
89 nv::perf::MetricsConfigBuilder configBuilder;
91 NVPA_RawMetricsConfig* pRawMetricsConfig = nv::perf::sampler::DeviceCreateRawMetricsConfig(deviceIdentifiers.pChipName);
94
95
96 NSPERF_CHECK(configBuilder.Initialize(metricsEvaluator, pRawMetricsConfig, deviceIdentifiers.pChipName))
100
101
102 for (size_t ii = 0; ii <
sizeof(Metrics) /
sizeof(Metrics[0]); ++ii)
104 const char*
const pMetric = Metrics[ii];
105 NVPW_MetricEvalRequest request{};
106 NSPERF_CHECK(ToMetricEvalRequest(metricsEvaluator, pMetric, request))
109
110
111
112 constexpr bool keepInstances =
false;
113 NSPERF_CHECK(configBuilder.AddMetrics(&request, 1, keepInstances))
114 metricEvalRequests.emplace_back(std::move(request));
118
119
120 nv::perf::CounterConfiguration counterConfiguration;
121 NSPERF_CHECK(CreateConfiguration(configBuilder, counterConfiguration))
124
125
126 assert(counterConfiguration.numPasses == 1);
129
130
131
132
133
134 constexpr uint32_t MaxSamples = 1024;
135 constexpr bool Validate =
true;
140 uint32_t maxSamples ,
141 NVPW_PeriodicSampler_CounterData_AppendMode appendMode ,
142 std::vector<uint8_t>& counterData
145 return nv::perf::sampler::GpuPeriodicSamplerCreateCounterData(
147 counterConfiguration.counterDataPrefix.data() ,
148 counterConfiguration.counterDataPrefix.size() ,
156
157
160 counterData.GetCounterData().data() ,
161 counterData.GetCounterData().size()
165
166
168 std::cout <<
"StartTime, EndTime, Duration";
169 const auto countersEnumerator = EnumerateCounters(metricsEvaluator);
170 const auto ratiosEnumerator = EnumerateRatios(metricsEvaluator);
171 const auto throughputsEnumerator = EnumerateThroughputs(metricsEvaluator);
172 for (
const NVPW_MetricEvalRequest& metricEvalRequest : metricEvalRequests)
174 std::cout <<
", " << ToString(countersEnumerator, ratiosEnumerator, throughputsEnumerator, metricEvalRequest);
180
181
182 constexpr size_t SamplingFrequency = 120;
183 constexpr size_t samplingIntervalInNanoSeconds = 1000 * 1000 * 1000 / SamplingFrequency;
184 constexpr size_t MaxDecodeLatencyInNanoSeconds = 1000 * 1000 * 1000 * 10;
185 const nv::perf::sampler::GpuPeriodicSampler::GpuPulseSamplingInterval samplingInterval = sampler.GetGpuPulseSamplingInterval(samplingIntervalInNanoSeconds);
186 const size_t maxNumUndecodedSamples = MaxDecodeLatencyInNanoSeconds / samplingIntervalInNanoSeconds;
187 size_t recordBufferSize = 0;
188 NSPERF_CHECK(nv::perf::sampler::GpuPeriodicSamplerCalculateRecordBufferSize(deviceIndex, counterConfiguration.configImage, maxNumUndecodedSamples, recordBufferSize))
190 const size_t MaxNumUndecodedSamplingRanges = 1;
193 MaxNumUndecodedSamplingRanges ,
194 { samplingInterval.triggerSource } ,
195 samplingInterval.samplingInterval
199
200
201 constexpr size_t passIndex = 0;
202 NSPERF_CHECK(sampler.SetConfig(counterConfiguration.configImage, passIndex))
205
206
207
208
209
213
214
223
224
238 nv::perf::sampler::GpuPeriodicSampler::GetRecordBufferStatusParams getRecordBufferStatusParams = {};
239 getRecordBufferStatusParams.queryOverflow =
true;
240 getRecordBufferStatusParams.queryNumUnreadBytes =
true;
241 const bool success = sampler.GetRecordBufferStatus(getRecordBufferStatusParams);
246 if (getRecordBufferStatusParams.overflow)
248 SPICES_CORE_ERROR(
"Record buffer has overflowed. Please ensure that the value of `maxNumUndecodedSamples` is sufficiently large.")
251 if (getRecordBufferStatusParams.numUnreadBytes == 0)
257
258
259 NVPW_GPU_PeriodicSampler_DecodeStopReason decodeStopReason = NVPW_GPU_PERIODIC_SAMPLER_DECODE_STOP_REASON_OTHER;
260 size_t numSamplesMerged = 0;
261 size_t numBytesConsumed = 0;
262 if (!sampler.DecodeCounters(
263 counterData.GetCounterData() ,
264 getRecordBufferStatusParams.numUnreadBytes ,
270 SPICES_CORE_WARN(
"Failed to decode counters.")
273 if (numSamplesMerged)
275 SPICES_CORE_WARN(
"Samples appear to be merged, this can reduce the accuracy of the collected samples. Please check for any back-to-back triggers!")
277 if (decodeStopReason != NVPW_GPU_PERIODIC_SAMPLER_DECODE_STOP_REASON_ALL_GIVEN_BYTES_READ)
279 SPICES_CORE_WARN(
"DecodeCounters stopped unexpectedly.")
281 if (!sampler.AcknowledgeRecordBuffer(numBytesConsumed))
283 SPICES_CORE_WARN(
"Failed to acknowledge record buffer")
285 if (!counterData.UpdatePut())
287 SPICES_CORE_WARN(
"Failed to update counter data's put pointer.")
290 const uint32_t numUnreadRanges = counterData.GetNumUnreadRanges();
293 std::vector<
double> metricValues(metricEvalRequests.size());
294 uint32_t numRangesConsumed = 0;
297 const uint8_t* pCounterDataImage ,
298 size_t counterDataImageSize ,
299 uint32_t rangeIndex ,
303 nv::perf::sampler::SampleTimestamp timestamp{};
304 if (!CounterDataGetSampleTime(pCounterDataImage, rangeIndex, timestamp))
309 if (!nv::perf::EvaluateToGpuValues(
312 counterDataImageSize,
314 metricEvalRequests.size(),
315 metricEvalRequests.data(),
316 metricValues.data()))
321 std::cout << std::fixed << std::setprecision(0) << timestamp.start <<
", " << timestamp.end <<
", " << (timestamp.end - timestamp.start);
322 for (
const double& metricValue : metricValues)
324 std::cout <<
", " << metricValue;
328 if (++numRangesConsumed == numUnreadRanges)
331
332
338 std::cout << std::flush;
339 if (!counterData.UpdateGet(numRangesConsumed))
341 SPICES_CORE_WARN(
"Counter data failed to update get pointer.")
346
347
#define NSPERF_CHECK(val)
#define SPICES_PROFILE_ZONE
void EndFrame()
ConsumeSample each frame.
void CaptureFrame()
Capture this frame.
static void CreateInstance(VulkanState &state)
Create this Single Instance.
NsightPerfGPUProfilerContinuous(VulkanState &state)
Constructor Function.
void Reset()
Reset on quit application.
void Create(VulkanState &state)
Begin this Session.
bool m_IsInSession
True if in session.
bool m_EnableCaptureNextFrame
True if want capture next frame.
static std::shared_ptr< NsightPerfGPUProfilerContinuous > m_NsightPerfGPUProfilerContinuous
This Single Instance.
void BeginFrame(VulkanState &state)
Begin a frame.
VulkanState & m_VulkanState
VulkanState.
Wrapper of Nvidia Nsight Performance Metrics.
const char * Metrics[]
The following metrics are for demonstration purposes only. For a more comprehensive set of single-pas...
This struct contains all Vulkan object in used global.