Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 58 additions & 16 deletions src/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
#include "nvtx.h"
#include "utils.h"

NCCL_PARAM(ShadowMempoolMaxSize, "SHADOW_MEMPOOL_MAX_SIZE", 1LL << 30);

Comment on lines -14 to -15

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was this removed? It seems out of scope.

NCCL_API(ncclResult_t, ncclMemAlloc, void **ptr, size_t size);
ncclResult_t ncclMemAlloc(void **ptr, size_t size) {
NCCL_NVTX3_FUNC_RANGE;
Expand Down Expand Up @@ -331,6 +329,7 @@ ncclResult_t ncclShadowPoolAlloc(
struct ncclShadowPool* pool, size_t size, void** outDevObj, void** outHostObj,
cudaStream_t stream
) {
ncclResult_t ret = ncclSuccess;
if (size == 0) {
if (outDevObj) *outDevObj = nullptr;
if (outHostObj) *outHostObj = nullptr;
Expand All @@ -344,18 +343,33 @@ ncclResult_t ncclShadowPoolAlloc(
props.handleTypes = cudaMemHandleTypeNone;
props.location.type = cudaMemLocationTypeDevice;
cudaGetDevice(&props.location.id);
props.maxSize = (size_t)ncclParamShadowMempoolMaxSize();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here? Why was this removed?

CUDACHECK(cudaMemPoolCreate(&pool->memPool, &props));

// Failing steps first, operate on state on the stack
cudaMemPool_t memPool = nullptr;
CUDACHECK(cudaMemPoolCreate(&memPool, &props));

struct ncclShadowObject** table = (struct ncclShadowObject**)malloc(sizeof(struct ncclShadowObject*)<<4);
if (table == nullptr) {
WARN("Failed to allocate hash table for shadow pool");
cudaMemPoolDestroy(memPool);
return ncclSystemError;
}

// Non-failing steps last - update pool state only after all operations succeed
pool->memPool = memPool;
pool->table = table;
pool->hbits = hbits = 4;
pool->table = (struct ncclShadowObject**)malloc(sizeof(struct ncclShadowObject*)<<hbits);
for (int i=0; i < 1<<hbits; i++) pool->table[i] = nullptr;
}

// Check for hash table size increase before inserting. Maintain 2:1 object:bucket ratio.
if (pool->count+1 > 2<<hbits) {
struct ncclShadowObject** table0 = pool->table;
struct ncclShadowObject** table1 = (struct ncclShadowObject**)malloc(sizeof(struct ncclShadowObject*)<<(hbits+1));
if (table1 == nullptr) {
WARN("Failed to allocate expanded hash table for shadow pool");
return ncclSystemError;
}
pool->table = table1;
pool->hbits = hbits+1;
for (int i1=0; i1 < 2<<hbits; i1++) table1[i1] = nullptr;
Expand All @@ -371,8 +385,18 @@ ncclResult_t ncclShadowPoolAlloc(
free(table0);
}

struct ncclShadowPage* page;
void *devObj;
// Allocate obj early, before any device state modifications
struct ncclShadowObject* obj = (struct ncclShadowObject*)malloc(
sizeof(struct ncclShadowObject) + /*padding=*/alignof(max_align_t)-1 + size
);
if (obj == nullptr) {
WARN("Failed to allocate shadow pool object");
return ncclSystemError;
}

struct ncclShadowPage* page = nullptr;
struct ncclShadowPage* newPage = nullptr;
void *devObj = nullptr;
if ((64<<10)/size >= 3) {
int shift = std::max<int>(0, (int)log2Down(size) + 1 - 4);
int pageObjSize = ((size + (1<<shift)-1)>>shift)<<shift;
Expand All @@ -382,12 +406,22 @@ ncclResult_t ncclShadowPoolAlloc(
if (page == nullptr) {
size_t pageSize = std::min<size_t>(64<<10, 64*pageObjSize);
page = (struct ncclShadowPage*)malloc(sizeof(struct ncclShadowPage));
if (page == nullptr) {
WARN("Failed to allocate shadow pool page metadata");
free(obj);
return ncclSystemError;
}
page->objSize = pageObjSize;
page->freeMask = uint64_t(-1)>>(64 - pageSize/pageObjSize);
page->next = nullptr;
page->devObjs = nullptr;
newPage = page;
CUDACHECKGOTO(cudaMallocFromPoolAsync(&page->devObjs, pageSize, pool->memPool, stream), ret, fail);
CUDACHECKGOTO(cudaMemsetAsync(page->devObjs, 0, pageSize, stream), ret, fail);
newPage = nullptr;
// Only link page into pool after CUDA operations succeed
page->next = pool->pages;
pool->pages = page;
CUDACHECK(cudaMallocFromPoolAsync(&page->devObjs, pageSize, pool->memPool, stream));
CUDACHECK(cudaMemsetAsync(page->devObjs, 0, pageSize, stream));
// fall through...
}
if (page->objSize == pageObjSize) {
Expand All @@ -400,13 +434,9 @@ ncclResult_t ncclShadowPoolAlloc(
}
} else {
page = nullptr;
CUDACHECK(cudaMallocFromPoolAsync(&devObj, size, pool->memPool, stream));
CUDACHECK(cudaMemsetAsync(devObj, 0, size, stream));
CUDACHECKGOTO(cudaMallocFromPoolAsync(&devObj, size, pool->memPool, stream), ret, fail);
CUDACHECKGOTO(cudaMemsetAsync(devObj, 0, size, stream), ret, fail);
}

struct ncclShadowObject* obj = (struct ncclShadowObject*)malloc(
sizeof(struct ncclShadowObject) + /*padding=*/alignof(max_align_t)-1 + size
);
obj->page = page;
obj->devObj = devObj;
obj->hostObj = alignUp((char*)(obj+1), alignof(max_align_t));
Expand All @@ -416,6 +446,16 @@ ncclResult_t ncclShadowPoolAlloc(
if (outDevObj) *outDevObj = devObj;
if (outHostObj) *outHostObj = obj->hostObj;
return ncclSuccess;

fail:
if (newPage != nullptr) {
if (newPage->devObjs != nullptr) cudaFreeAsync(newPage->devObjs, stream);
free(newPage);
} else if (page == nullptr && devObj != nullptr) {
cudaFreeAsync(devObj, stream);
}
free(obj);
return ret;
}

ncclResult_t ncclShadowPoolFree(struct ncclShadowPool* pool, void* devObj, cudaStream_t stream) {
Expand All @@ -425,7 +465,9 @@ ncclResult_t ncclShadowPoolFree(struct ncclShadowPool* pool, void* devObj, cudaS
struct ncclShadowObject** pobj = &pool->table[b];
while (true) {
if (*pobj == nullptr) {
WARN("Device object does not exist in shadow pool.");
WARN("ncclShadowPoolFree: Device object %p not found in shadow pool (hash bucket %lu). "
"This may indicate a use-after-free or double-free error. Pool has %d objects.",
devObj, b, pool->count);
return ncclInternalError;
}
if ((*pobj)->devObj == devObj) break;
Expand Down