NVML (libwrap) : import the needed definitions

This commit is contained in:
Sylvain Jeaugey 2016-10-13 10:28:59 -07:00
parent 8bb06c94be
commit bf7d1514f7
2 changed files with 45 additions and 30 deletions

View File

@ -8,16 +8,15 @@
#include <dlfcn.h>
#include "core.h"
typedef enum { SUCCESS = 0 } RetCode;
int symbolsLoaded = 0;
static RetCode (*nvmlInternalInit)(void);
static RetCode (*nvmlInternalShutdown)(void);
static RetCode (*nvmlInternalDeviceGetHandleByPciBusId)(const char* pciBusId, nvmlDevice_t* device);
static RetCode (*nvmlInternalDeviceGetIndex)(nvmlDevice_t device, unsigned* index);
static RetCode (*nvmlInternalDeviceSetCpuAffinity)(nvmlDevice_t device);
static RetCode (*nvmlInternalDeviceClearCpuAffinity)(nvmlDevice_t device);
static const char* (*nvmlInternalErrorString)(RetCode r);
static nvmlReturn_t (*nvmlInternalInit)(void);
static nvmlReturn_t (*nvmlInternalShutdown)(void);
static nvmlReturn_t (*nvmlInternalDeviceGetHandleByPciBusId)(const char* pciBusId, nvmlDevice_t* device);
static nvmlReturn_t (*nvmlInternalDeviceGetIndex)(nvmlDevice_t device, unsigned* index);
static nvmlReturn_t (*nvmlInternalDeviceSetCpuAffinity)(nvmlDevice_t device);
static nvmlReturn_t (*nvmlInternalDeviceClearCpuAffinity)(nvmlDevice_t device);
static const char* (*nvmlInternalErrorString)(nvmlReturn_t r);
ncclResult_t wrapSymbols(void) {
@ -76,8 +75,8 @@ ncclResult_t wrapNvmlInit(void) {
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
RetCode ret = nvmlInternalInit();
if (ret != SUCCESS) {
nvmlReturn_t ret = nvmlInternalInit();
if (ret != NVML_SUCCESS) {
WARN("nvmlInit() failed: %s",
nvmlInternalErrorString(ret));
return ncclSystemError;
@ -90,8 +89,8 @@ ncclResult_t wrapNvmlShutdown(void) {
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
RetCode ret = nvmlInternalShutdown();
if (ret != SUCCESS) {
nvmlReturn_t ret = nvmlInternalShutdown();
if (ret != NVML_SUCCESS) {
WARN("nvmlShutdown() failed: %s ",
nvmlInternalErrorString(ret));
return ncclSystemError;
@ -104,8 +103,8 @@ ncclResult_t wrapNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
RetCode ret = nvmlInternalDeviceGetHandleByPciBusId(pciBusId, device);
if (ret != SUCCESS) {
nvmlReturn_t ret = nvmlInternalDeviceGetHandleByPciBusId(pciBusId, device);
if (ret != NVML_SUCCESS) {
WARN("nvmlDeviceGetHandleByPciBusId() failed: %s ",
nvmlInternalErrorString(ret));
return ncclSystemError;
@ -118,8 +117,8 @@ ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index) {
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
RetCode ret = nvmlInternalDeviceGetIndex(device, index);
if (ret != SUCCESS) {
nvmlReturn_t ret = nvmlInternalDeviceGetIndex(device, index);
if (ret != NVML_SUCCESS) {
WARN("nvmlDeviceGetIndex() failed: %s ",
nvmlInternalErrorString(ret));
return ncclSystemError;
@ -132,8 +131,8 @@ ncclResult_t wrapNvmlDeviceSetCpuAffinity(nvmlDevice_t device) {
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
RetCode ret = nvmlInternalDeviceSetCpuAffinity(device);
if (ret != SUCCESS) {
nvmlReturn_t ret = nvmlInternalDeviceSetCpuAffinity(device);
if (ret != NVML_SUCCESS) {
WARN("nvmlDeviceSetCpuAffinity() failed: %s ",
nvmlInternalErrorString(ret));
return ncclSystemError;
@ -146,12 +145,11 @@ ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device) {
WARN("lib wrapper not initialized.");
return ncclLibWrapperNotSet;
}
RetCode ret = nvmlInternalDeviceClearCpuAffinity(device);
if (ret != SUCCESS) {
nvmlReturn_t ret = nvmlInternalDeviceClearCpuAffinity(device);
if (ret != NVML_SUCCESS) {
WARN("nvmlDeviceClearCpuAffinity() failed: %s ",
nvmlInternalErrorString(ret));
return ncclSystemError;
}
return ncclSuccess;
}

View File

@ -1,5 +1,5 @@
/*************************************************************************
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
@ -12,16 +12,34 @@
#include "core.h"
/* Extracted from nvml.h */
typedef struct nvmlDevice_st* nvmlDevice_t;
/**
* Generic enable/disable enum.
*/
typedef enum nvmlEnableState_enum
typedef enum nvmlReturn_enum
{
NVML_FEATURE_DISABLED = 0, //!< Feature disabled
NVML_FEATURE_ENABLED = 1 //!< Feature enabled
} nvmlEnableState_t;
NVML_SUCCESS = 0, //!< The operation was successful
NVML_ERROR_UNINITIALIZED = 1, //!< NVML was not first initialized with nvmlInit()
NVML_ERROR_INVALID_ARGUMENT = 2, //!< A supplied argument is invalid
NVML_ERROR_NOT_SUPPORTED = 3, //!< The requested operation is not available on target device
NVML_ERROR_NO_PERMISSION = 4, //!< The current user does not have permission for operation
NVML_ERROR_ALREADY_INITIALIZED = 5, //!< Deprecated: Multiple initializations are now allowed through ref counting
NVML_ERROR_NOT_FOUND = 6, //!< A query to find an object was unsuccessful
NVML_ERROR_INSUFFICIENT_SIZE = 7, //!< An input argument is not large enough
NVML_ERROR_INSUFFICIENT_POWER = 8, //!< A device's external power cables are not properly attached
NVML_ERROR_DRIVER_NOT_LOADED = 9, //!< NVIDIA driver is not loaded
NVML_ERROR_TIMEOUT = 10, //!< User provided timeout passed
NVML_ERROR_IRQ_ISSUE = 11, //!< NVIDIA Kernel detected an interrupt issue with a GPU
NVML_ERROR_LIBRARY_NOT_FOUND = 12, //!< NVML Shared Library couldn't be found or loaded
NVML_ERROR_FUNCTION_NOT_FOUND = 13, //!< Local version of NVML doesn't implement this function
NVML_ERROR_CORRUPTED_INFOROM = 14, //!< infoROM is corrupted
NVML_ERROR_GPU_IS_LOST = 15, //!< The GPU has fallen off the bus or has otherwise become inaccessible
NVML_ERROR_RESET_REQUIRED = 16, //!< The GPU requires a reset before it can be used again
NVML_ERROR_OPERATING_SYSTEM = 17, //!< The GPU control device has been blocked by the operating system/cgroups
NVML_ERROR_LIB_RM_VERSION_MISMATCH = 18, //!< RM detects a driver/library version mismatch
NVML_ERROR_IN_USE = 19, //!< An operation cannot be performed because the GPU is currently in use
NVML_ERROR_UNKNOWN = 999 //!< An internal driver error occurred
} nvmlReturn_t;
/* End of nvml.h */
ncclResult_t wrapSymbols(void);
@ -31,7 +49,6 @@ ncclResult_t wrapNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_
ncclResult_t wrapNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index);
ncclResult_t wrapNvmlDeviceSetCpuAffinity(nvmlDevice_t device);
ncclResult_t wrapNvmlDeviceClearCpuAffinity(nvmlDevice_t device);
ncclResult_t wrapNvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t *device);
#endif // End include guard