NVML (libwrap) : import the needed definitions
This commit is contained in:
parent
8bb06c94be
commit
bf7d1514f7
@ -8,16 +8,15 @@
|
|||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
#include "core.h"
|
#include "core.h"
|
||||||
|
|
||||||
typedef enum { SUCCESS = 0 } RetCode;
|
|
||||||
int symbolsLoaded = 0;
|
int symbolsLoaded = 0;
|
||||||
|
|
||||||
static RetCode (*nvmlInternalInit)(void);
|
static nvmlReturn_t (*nvmlInternalInit)(void);
|
||||||
static RetCode (*nvmlInternalShutdown)(void);
|
static nvmlReturn_t (*nvmlInternalShutdown)(void);
|
||||||
static RetCode (*nvmlInternalDeviceGetHandleByPciBusId)(const char* pciBusId, nvmlDevice_t* device);
|
static nvmlReturn_t (*nvmlInternalDeviceGetHandleByPciBusId)(const char* pciBusId, nvmlDevice_t* device);
|
||||||
static RetCode (*nvmlInternalDeviceGetIndex)(nvmlDevice_t device, unsigned* index);
|
static nvmlReturn_t (*nvmlInternalDeviceGetIndex)(nvmlDevice_t device, unsigned* index);
|
||||||
static RetCode (*nvmlInternalDeviceSetCpuAffinity)(nvmlDevice_t device);
|
static nvmlReturn_t (*nvmlInternalDeviceSetCpuAffinity)(nvmlDevice_t device);
|
||||||
static RetCode (*nvmlInternalDeviceClearCpuAffinity)(nvmlDevice_t device);
|
static nvmlReturn_t (*nvmlInternalDeviceClearCpuAffinity)(nvmlDevice_t device);
|
||||||
static const char* (*nvmlInternalErrorString)(RetCode r);
|
static const char* (*nvmlInternalErrorString)(nvmlReturn_t r);
|
||||||
|
|
||||||
ncclResult_t wrapSymbols(void) {
|
ncclResult_t wrapSymbols(void) {
|
||||||
|
|
||||||
@ -76,8 +75,8 @@ ncclResult_t wrapNvmlInit(void) {
|
|||||||
WARN("lib wrapper not initialized.");
|
WARN("lib wrapper not initialized.");
|
||||||
return ncclLibWrapperNotSet;
|
return ncclLibWrapperNotSet;
|
||||||
}
|
}
|
||||||
RetCode ret = nvmlInternalInit();
|
nvmlReturn_t ret = nvmlInternalInit();
|
||||||
if (ret != SUCCESS) {
|
if (ret != NVML_SUCCESS) {
|
||||||
WARN("nvmlInit() failed: %s",
|
WARN("nvmlInit() failed: %s",
|
||||||
nvmlInternalErrorString(ret));
|
nvmlInternalErrorString(ret));
|
||||||
return ncclSystemError;
|
return ncclSystemError;
|
||||||
@ -90,8 +89,8 @@ ncclResult_t wrapNvmlShutdown(void) {
|
|||||||
WARN("lib wrapper not initialized.");
|
WARN("lib wrapper not initialized.");
|
||||||
return ncclLibWrapperNotSet;
|
return ncclLibWrapperNotSet;
|
||||||
}
|
}
|
||||||
RetCode ret = nvmlInternalShutdown();
|
nvmlReturn_t ret = nvmlInternalShutdown();
|
||||||
if (ret != SUCCESS) {
|
if (ret != NVML_SUCCESS) {
|
||||||
WARN("nvmlShutdown() failed: %s ",
|
WARN("nvmlShutdown() failed: %s ",
|
||||||
nvmlInternalErrorString(ret));
|
nvmlInternalErrorString(ret));
|
||||||
return ncclSystemError;
|
return ncclSystemError;
|
||||||
@ -104,8 +103,8 @@ ncclResult_t wrapNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_
|
|||||||
WARN("lib wrapper not initialized.");
|
WARN("lib wrapper not initialized.");
|
||||||
return ncclLibWrapperNotSet;
|
return ncclLibWrapperNotSet;
|
||||||
}
|
}
|
||||||
RetCode ret = nvmlInternalDeviceGetHandleByPciBusId(pciBusId, device);
|
nvmlReturn_t ret = nvmlInternalDeviceGetHandleByPciBusId(pciBusId, device);
|
||||||
if (ret != SUCCESS) {
|
if (ret != NVML_SUCCESS) {
|
||||||
WARN("nvmlDeviceGetHandleByPciBusId() failed: %s ",
|
WARN("nvmlDeviceGetHandleByPciBusId() failed: %s ",
|
||||||
nvmlInternalErrorString(ret));
|
nvmlInternalErrorString(ret));
|
||||||
return ncclSystemError;
|
return ncclSystemError;
|
||||||
@ -118,8 +117,8 @@ ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index) {
|
|||||||
WARN("lib wrapper not initialized.");
|
WARN("lib wrapper not initialized.");
|
||||||
return ncclLibWrapperNotSet;
|
return ncclLibWrapperNotSet;
|
||||||
}
|
}
|
||||||
RetCode ret = nvmlInternalDeviceGetIndex(device, index);
|
nvmlReturn_t ret = nvmlInternalDeviceGetIndex(device, index);
|
||||||
if (ret != SUCCESS) {
|
if (ret != NVML_SUCCESS) {
|
||||||
WARN("nvmlDeviceGetIndex() failed: %s ",
|
WARN("nvmlDeviceGetIndex() failed: %s ",
|
||||||
nvmlInternalErrorString(ret));
|
nvmlInternalErrorString(ret));
|
||||||
return ncclSystemError;
|
return ncclSystemError;
|
||||||
@ -132,8 +131,8 @@ ncclResult_t wrapNvmlDeviceSetCpuAffinity(nvmlDevice_t device) {
|
|||||||
WARN("lib wrapper not initialized.");
|
WARN("lib wrapper not initialized.");
|
||||||
return ncclLibWrapperNotSet;
|
return ncclLibWrapperNotSet;
|
||||||
}
|
}
|
||||||
RetCode ret = nvmlInternalDeviceSetCpuAffinity(device);
|
nvmlReturn_t ret = nvmlInternalDeviceSetCpuAffinity(device);
|
||||||
if (ret != SUCCESS) {
|
if (ret != NVML_SUCCESS) {
|
||||||
WARN("nvmlDeviceSetCpuAffinity() failed: %s ",
|
WARN("nvmlDeviceSetCpuAffinity() failed: %s ",
|
||||||
nvmlInternalErrorString(ret));
|
nvmlInternalErrorString(ret));
|
||||||
return ncclSystemError;
|
return ncclSystemError;
|
||||||
@ -146,12 +145,11 @@ ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device) {
|
|||||||
WARN("lib wrapper not initialized.");
|
WARN("lib wrapper not initialized.");
|
||||||
return ncclLibWrapperNotSet;
|
return ncclLibWrapperNotSet;
|
||||||
}
|
}
|
||||||
RetCode ret = nvmlInternalDeviceClearCpuAffinity(device);
|
nvmlReturn_t ret = nvmlInternalDeviceClearCpuAffinity(device);
|
||||||
if (ret != SUCCESS) {
|
if (ret != NVML_SUCCESS) {
|
||||||
WARN("nvmlDeviceClearCpuAffinity() failed: %s ",
|
WARN("nvmlDeviceClearCpuAffinity() failed: %s ",
|
||||||
nvmlInternalErrorString(ret));
|
nvmlInternalErrorString(ret));
|
||||||
return ncclSystemError;
|
return ncclSystemError;
|
||||||
}
|
}
|
||||||
return ncclSuccess;
|
return ncclSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*************************************************************************
|
/*************************************************************************
|
||||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* See LICENSE.txt for license information
|
* See LICENSE.txt for license information
|
||||||
************************************************************************/
|
************************************************************************/
|
||||||
@ -12,16 +12,34 @@
|
|||||||
|
|
||||||
#include "core.h"
|
#include "core.h"
|
||||||
|
|
||||||
|
/* Extracted from nvml.h */
|
||||||
typedef struct nvmlDevice_st* nvmlDevice_t;
|
typedef struct nvmlDevice_st* nvmlDevice_t;
|
||||||
|
|
||||||
/**
|
typedef enum nvmlReturn_enum
|
||||||
* Generic enable/disable enum.
|
|
||||||
*/
|
|
||||||
typedef enum nvmlEnableState_enum
|
|
||||||
{
|
{
|
||||||
NVML_FEATURE_DISABLED = 0, //!< Feature disabled
|
NVML_SUCCESS = 0, //!< The operation was successful
|
||||||
NVML_FEATURE_ENABLED = 1 //!< Feature enabled
|
NVML_ERROR_UNINITIALIZED = 1, //!< NVML was not first initialized with nvmlInit()
|
||||||
} nvmlEnableState_t;
|
NVML_ERROR_INVALID_ARGUMENT = 2, //!< A supplied argument is invalid
|
||||||
|
NVML_ERROR_NOT_SUPPORTED = 3, //!< The requested operation is not available on target device
|
||||||
|
NVML_ERROR_NO_PERMISSION = 4, //!< The current user does not have permission for operation
|
||||||
|
NVML_ERROR_ALREADY_INITIALIZED = 5, //!< Deprecated: Multiple initializations are now allowed through ref counting
|
||||||
|
NVML_ERROR_NOT_FOUND = 6, //!< A query to find an object was unsuccessful
|
||||||
|
NVML_ERROR_INSUFFICIENT_SIZE = 7, //!< An input argument is not large enough
|
||||||
|
NVML_ERROR_INSUFFICIENT_POWER = 8, //!< A device's external power cables are not properly attached
|
||||||
|
NVML_ERROR_DRIVER_NOT_LOADED = 9, //!< NVIDIA driver is not loaded
|
||||||
|
NVML_ERROR_TIMEOUT = 10, //!< User provided timeout passed
|
||||||
|
NVML_ERROR_IRQ_ISSUE = 11, //!< NVIDIA Kernel detected an interrupt issue with a GPU
|
||||||
|
NVML_ERROR_LIBRARY_NOT_FOUND = 12, //!< NVML Shared Library couldn't be found or loaded
|
||||||
|
NVML_ERROR_FUNCTION_NOT_FOUND = 13, //!< Local version of NVML doesn't implement this function
|
||||||
|
NVML_ERROR_CORRUPTED_INFOROM = 14, //!< infoROM is corrupted
|
||||||
|
NVML_ERROR_GPU_IS_LOST = 15, //!< The GPU has fallen off the bus or has otherwise become inaccessible
|
||||||
|
NVML_ERROR_RESET_REQUIRED = 16, //!< The GPU requires a reset before it can be used again
|
||||||
|
NVML_ERROR_OPERATING_SYSTEM = 17, //!< The GPU control device has been blocked by the operating system/cgroups
|
||||||
|
NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18, //!< RM detects a driver/library version mismatch
|
||||||
|
NVML_ERROR_IN_USE = 19, //!< An operation cannot be performed because the GPU is currently in use
|
||||||
|
NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred
|
||||||
|
} nvmlReturn_t;
|
||||||
|
/* End of nvml.h */
|
||||||
|
|
||||||
ncclResult_t wrapSymbols(void);
|
ncclResult_t wrapSymbols(void);
|
||||||
|
|
||||||
@ -31,7 +49,6 @@ ncclResult_t wrapNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_
|
|||||||
ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index);
|
ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index);
|
||||||
ncclResult_t wrapNvmlDeviceSetCpuAffinity(nvmlDevice_t device);
|
ncclResult_t wrapNvmlDeviceSetCpuAffinity(nvmlDevice_t device);
|
||||||
ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device);
|
ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device);
|
||||||
ncclResult_t wrapNvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t *device);
|
|
||||||
|
|
||||||
#endif // End include guard
|
#endif // End include guard
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user