From 01acf051cf58a5dbd7f63378e4a129121aeb133a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=84=E6=AD=A6?= Date: Mon, 8 Jun 2026 14:42:29 +0800 Subject: [PATCH] Wait for KernelCh start before profiler completion --- src/transport/profiler.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/transport/profiler.cc b/src/transport/profiler.cc index 25cd01f59..4929200f1 100644 --- a/src/transport/profiler.cc +++ b/src/transport/profiler.cc @@ -34,17 +34,18 @@ static ncclResult_t profilerProxyProgress(struct ncclProxyState* proxyState, str struct ncclProxySubArgs* sub = args->subs + s; struct ncclDevProfiler* workStarted = (struct ncclDevProfiler*)sub->sendbuff; struct ncclDevProfiler* workCompleted = (struct ncclDevProfiler*)sub->recvbuff; + int idx = sub->base % MAX_PROFILER_EVENTS_PER_CHANNEL; if (sub->posted < sub->nsteps && - sub->base <= workStarted[sub->channelId].data[sub->base % MAX_PROFILER_EVENTS_PER_CHANNEL].counter) { + sub->base <= workStarted[sub->channelId].data[idx].counter) { ncclProfilerStartKernelChEvent( - args, s, workStarted[sub->channelId].data[sub->base % MAX_PROFILER_EVENTS_PER_CHANNEL].timestamp); + args, s, workStarted[sub->channelId].data[idx].timestamp); sub->posted = sub->nsteps; continue; // allow events on every channel to start } - if (sub->transmitted < sub->nsteps && - sub->base <= workCompleted[sub->channelId].data[sub->base % MAX_PROFILER_EVENTS_PER_CHANNEL].counter) { + if (sub->posted == sub->nsteps && sub->transmitted < sub->nsteps && + sub->base <= workCompleted[sub->channelId].data[idx].counter) { ncclProfilerStopKernelChEvent( - args, s, workCompleted[sub->channelId].data[sub->base % MAX_PROFILER_EVENTS_PER_CHANNEL].timestamp); + args, s, workCompleted[sub->channelId].data[idx].timestamp); sub->transmitted = sub->nsteps; args->done++; }