NVML (libwrap) : import the needed definitions
This commit is contained in:
parent
8bb06c94be
commit
bf7d1514f7
@ -8,16 +8,15 @@
|
||||
#include <dlfcn.h>
|
||||
#include "core.h"
|
||||
|
||||
typedef enum { SUCCESS = 0 } RetCode;
|
||||
int symbolsLoaded = 0;
|
||||
|
||||
static RetCode (*nvmlInternalInit)(void);
|
||||
static RetCode (*nvmlInternalShutdown)(void);
|
||||
static RetCode (*nvmlInternalDeviceGetHandleByPciBusId)(const char* pciBusId, nvmlDevice_t* device);
|
||||
static RetCode (*nvmlInternalDeviceGetIndex)(nvmlDevice_t device, unsigned* index);
|
||||
static RetCode (*nvmlInternalDeviceSetCpuAffinity)(nvmlDevice_t device);
|
||||
static RetCode (*nvmlInternalDeviceClearCpuAffinity)(nvmlDevice_t device);
|
||||
static const char* (*nvmlInternalErrorString)(RetCode r);
|
||||
static nvmlReturn_t (*nvmlInternalInit)(void);
|
||||
static nvmlReturn_t (*nvmlInternalShutdown)(void);
|
||||
static nvmlReturn_t (*nvmlInternalDeviceGetHandleByPciBusId)(const char* pciBusId, nvmlDevice_t* device);
|
||||
static nvmlReturn_t (*nvmlInternalDeviceGetIndex)(nvmlDevice_t device, unsigned* index);
|
||||
static nvmlReturn_t (*nvmlInternalDeviceSetCpuAffinity)(nvmlDevice_t device);
|
||||
static nvmlReturn_t (*nvmlInternalDeviceClearCpuAffinity)(nvmlDevice_t device);
|
||||
static const char* (*nvmlInternalErrorString)(nvmlReturn_t r);
|
||||
|
||||
ncclResult_t wrapSymbols(void) {
|
||||
|
||||
@ -76,8 +75,8 @@ ncclResult_t wrapNvmlInit(void) {
|
||||
WARN("lib wrapper not initialized.");
|
||||
return ncclLibWrapperNotSet;
|
||||
}
|
||||
RetCode ret = nvmlInternalInit();
|
||||
if (ret != SUCCESS) {
|
||||
nvmlReturn_t ret = nvmlInternalInit();
|
||||
if (ret != NVML_SUCCESS) {
|
||||
WARN("nvmlInit() failed: %s",
|
||||
nvmlInternalErrorString(ret));
|
||||
return ncclSystemError;
|
||||
@ -90,8 +89,8 @@ ncclResult_t wrapNvmlShutdown(void) {
|
||||
WARN("lib wrapper not initialized.");
|
||||
return ncclLibWrapperNotSet;
|
||||
}
|
||||
RetCode ret = nvmlInternalShutdown();
|
||||
if (ret != SUCCESS) {
|
||||
nvmlReturn_t ret = nvmlInternalShutdown();
|
||||
if (ret != NVML_SUCCESS) {
|
||||
WARN("nvmlShutdown() failed: %s ",
|
||||
nvmlInternalErrorString(ret));
|
||||
return ncclSystemError;
|
||||
@ -104,8 +103,8 @@ ncclResult_t wrapNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_
|
||||
WARN("lib wrapper not initialized.");
|
||||
return ncclLibWrapperNotSet;
|
||||
}
|
||||
RetCode ret = nvmlInternalDeviceGetHandleByPciBusId(pciBusId, device);
|
||||
if (ret != SUCCESS) {
|
||||
nvmlReturn_t ret = nvmlInternalDeviceGetHandleByPciBusId(pciBusId, device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
WARN("nvmlDeviceGetHandleByPciBusId() failed: %s ",
|
||||
nvmlInternalErrorString(ret));
|
||||
return ncclSystemError;
|
||||
@ -118,8 +117,8 @@ ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index) {
|
||||
WARN("lib wrapper not initialized.");
|
||||
return ncclLibWrapperNotSet;
|
||||
}
|
||||
RetCode ret = nvmlInternalDeviceGetIndex(device, index);
|
||||
if (ret != SUCCESS) {
|
||||
nvmlReturn_t ret = nvmlInternalDeviceGetIndex(device, index);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
WARN("nvmlDeviceGetIndex() failed: %s ",
|
||||
nvmlInternalErrorString(ret));
|
||||
return ncclSystemError;
|
||||
@ -132,8 +131,8 @@ ncclResult_t wrapNvmlDeviceSetCpuAffinity(nvmlDevice_t device) {
|
||||
WARN("lib wrapper not initialized.");
|
||||
return ncclLibWrapperNotSet;
|
||||
}
|
||||
RetCode ret = nvmlInternalDeviceSetCpuAffinity(device);
|
||||
if (ret != SUCCESS) {
|
||||
nvmlReturn_t ret = nvmlInternalDeviceSetCpuAffinity(device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
WARN("nvmlDeviceSetCpuAffinity() failed: %s ",
|
||||
nvmlInternalErrorString(ret));
|
||||
return ncclSystemError;
|
||||
@ -146,12 +145,11 @@ ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device) {
|
||||
WARN("lib wrapper not initialized.");
|
||||
return ncclLibWrapperNotSet;
|
||||
}
|
||||
RetCode ret = nvmlInternalDeviceClearCpuAffinity(device);
|
||||
if (ret != SUCCESS) {
|
||||
nvmlReturn_t ret = nvmlInternalDeviceClearCpuAffinity(device);
|
||||
if (ret != NVML_SUCCESS) {
|
||||
WARN("nvmlDeviceClearCpuAffinity() failed: %s ",
|
||||
nvmlInternalErrorString(ret));
|
||||
return ncclSystemError;
|
||||
}
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*************************************************************************
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* See LICENSE.txt for license information
|
||||
************************************************************************/
|
||||
@ -12,16 +12,34 @@
|
||||
|
||||
#include "core.h"
|
||||
|
||||
/* Extracted from nvml.h */
|
||||
typedef struct nvmlDevice_st* nvmlDevice_t;
|
||||
|
||||
/**
|
||||
* Generic enable/disable enum.
|
||||
*/
|
||||
typedef enum nvmlEnableState_enum
|
||||
typedef enum nvmlReturn_enum
|
||||
{
|
||||
NVML_FEATURE_DISABLED = 0, //!< Feature disabled
|
||||
NVML_FEATURE_ENABLED = 1 //!< Feature enabled
|
||||
} nvmlEnableState_t;
|
||||
NVML_SUCCESS = 0, //!< The operation was successful
|
||||
NVML_ERROR_UNINITIALIZED = 1, //!< NVML was not first initialized with nvmlInit()
|
||||
NVML_ERROR_INVALID_ARGUMENT = 2, //!< A supplied argument is invalid
|
||||
NVML_ERROR_NOT_SUPPORTED = 3, //!< The requested operation is not available on target device
|
||||
NVML_ERROR_NO_PERMISSION = 4, //!< The current user does not have permission for operation
|
||||
NVML_ERROR_ALREADY_INITIALIZED = 5, //!< Deprecated: Multiple initializations are now allowed through ref counting
|
||||
NVML_ERROR_NOT_FOUND = 6, //!< A query to find an object was unsuccessful
|
||||
NVML_ERROR_INSUFFICIENT_SIZE = 7, //!< An input argument is not large enough
|
||||
NVML_ERROR_INSUFFICIENT_POWER = 8, //!< A device's external power cables are not properly attached
|
||||
NVML_ERROR_DRIVER_NOT_LOADED = 9, //!< NVIDIA driver is not loaded
|
||||
NVML_ERROR_TIMEOUT = 10, //!< User provided timeout passed
|
||||
NVML_ERROR_IRQ_ISSUE = 11, //!< NVIDIA Kernel detected an interrupt issue with a GPU
|
||||
NVML_ERROR_LIBRARY_NOT_FOUND = 12, //!< NVML Shared Library couldn't be found or loaded
|
||||
NVML_ERROR_FUNCTION_NOT_FOUND = 13, //!< Local version of NVML doesn't implement this function
|
||||
NVML_ERROR_CORRUPTED_INFOROM = 14, //!< infoROM is corrupted
|
||||
NVML_ERROR_GPU_IS_LOST = 15, //!< The GPU has fallen off the bus or has otherwise become inaccessible
|
||||
NVML_ERROR_RESET_REQUIRED = 16, //!< The GPU requires a reset before it can be used again
|
||||
NVML_ERROR_OPERATING_SYSTEM = 17, //!< The GPU control device has been blocked by the operating system/cgroups
|
||||
NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18, //!< RM detects a driver/library version mismatch
|
||||
NVML_ERROR_IN_USE = 19, //!< An operation cannot be performed because the GPU is currently in use
|
||||
NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred
|
||||
} nvmlReturn_t;
|
||||
/* End of nvml.h */
|
||||
|
||||
ncclResult_t wrapSymbols(void);
|
||||
|
||||
@ -31,7 +49,6 @@ ncclResult_t wrapNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_
|
||||
ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index);
|
||||
ncclResult_t wrapNvmlDeviceSetCpuAffinity(nvmlDevice_t device);
|
||||
ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device);
|
||||
ncclResult_t wrapNvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t *device);
|
||||
|
||||
#endif // End include guard
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user