-
Notifications
You must be signed in to change notification settings - Fork 174
Enable user to take ownership of MPI #722
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1c87a3c
19ac320
95415b7
0699e58
bfec279
3a3bdff
3358581
6a093d0
727d1bc
588b636
7c6caac
e6628e4
bb2d5f2
7c30b8c
7512e67
cb72846
7ff2a2e
13769be
68e435b
f15def4
dbe2b1e
bf62480
f26ae47
6edbd1c
ea23f30
142db02
d9d2369
20d2a9b
562b883
beb1a61
34a8083
1fe9740
226e653
70efc1f
87b8cd6
7894ade
c768075
a557adf
67e7ffe
ef34662
5df9dab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,8 @@ | |
| #ifndef ENVIRONMENT_H | ||
| #define ENVIRONMENT_H | ||
|
|
||
| #include <stdbool.h> | ||
|
|
||
| // enable invocation by both C and C++ binaries | ||
| #ifdef __cplusplus | ||
| extern "C" { | ||
|
|
@@ -36,6 +38,7 @@ typedef struct { | |
| int isMultithreaded; | ||
| int isGpuAccelerated; | ||
| int isDistributed; | ||
| bool userOwnsMpi; | ||
|
|
||
| // deployment modes which cannot be directly changed after compilation | ||
| int isCuQuantumEnabled; | ||
|
|
@@ -61,6 +64,12 @@ void initQuESTEnv(); | |
| */ | ||
| void initCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMultithread); | ||
|
|
||
| /** @notyetdoced | ||
| * Advanced initialiser which lets the user positively declare that they take responsibility for MPI. | ||
| * This means we assume they have called MPI_Init, and that they will call MPI_Finalize. | ||
| */ | ||
| void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread); | ||
|
otbrown marked this conversation as resolved.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the user needs to resolve MPI in order to own it, it seems fair to move this into the same "optional advanced-MPI user" module needed by subcommunicator. I think relegating this to an alternate header is better give this is anticipated to be tentatively, and is opaquely named. It can then live in a dedicated doc group, e.g. advanced_env, or similarly |
||
|
|
||
| /// @notyetdoced | ||
| void finalizeQuESTEnv(); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| #ifndef SUBCOMMUNICATOR_H | ||
| #define SUBCOMMUNICATOR_H | ||
|
|
||
| #include "quest/include/config.h" | ||
|
|
||
| #if QUEST_COMPILE_MPI && QUEST_COMPILE_SUBCOMM | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not thrilled about guarding the entire header, and the contextualising of exposing subcommunicator as "compiling". Semantics like "expose subcommunicator" might have been better, since all the internal trickery of "don't expose internal subcommunicator-specific functions to other TUs" is brittle when This is a half-thought for a future me |
||
|
|
||
| #include <mpi.h> | ||
|
|
||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif | ||
|
|
||
| /** @notyetdoced | ||
| * Advanced initialiser which allows the user to provide an MPI communicator for QuEST to use. | ||
| * Use of this initialiser implies userOwnsMpi = true, (exposed by initCustomMpiQuESTEnv) and | ||
| * therefore that they have already initialised MPI, and they will call MPI_Finalize at the | ||
| * appropriate time. | ||
| * | ||
| * The user-provided MPI communicator undergoes the same validation procedure as any that QuEST | ||
| * would use, and so must contain a power-of-2 number of processes. | ||
| */ | ||
| void initCustomMpiCommQuESTEnv(MPI_Comm questComm, int useGpuAccel, int useMultithread); | ||
|
|
||
| #ifdef __cplusplus | ||
| } | ||
| #endif | ||
|
|
||
| #endif | ||
|
|
||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,7 @@ target_sources(QuEST | |
| operations.cpp | ||
| paulis.cpp | ||
| qureg.cpp | ||
| subcommunicator.cpp | ||
| trotterisation.cpp | ||
| types.cpp | ||
| ) | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -71,7 +71,7 @@ static bool hasEnvBeenFinalized = false; | |
| */ | ||
|
|
||
|
|
||
| void validateAndInitCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMultithread, const char* caller) { | ||
| void validateAndInitCustomQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread, const char* caller) { | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is missing validation that when |
||
| // ensure that we are never re-initialising QuEST (even after finalize) because | ||
| // this leads to undefined behaviour in distributed mode, as per the MPI | ||
|
|
@@ -93,8 +93,7 @@ void validateAndInitCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMulti | |
| // and before any GPU initialisation and validation, since we will | ||
| // perform that specifically upon the MPI-process-bound GPU(s). Further, | ||
| // we can make sure validation errors are reported only by the root node. | ||
| if (useDistrib) | ||
| comm_init(); | ||
| comm_init(useDistrib, userOwnsMpi); | ||
|
|
||
| validate_newEnvDistributedBetweenPower2Nodes(caller); | ||
|
|
||
|
|
@@ -145,6 +144,7 @@ void validateAndInitCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMulti | |
| globalEnvPtr->isMultithreaded = useMultithread; | ||
| globalEnvPtr->isGpuAccelerated = useGpuAccel; | ||
| globalEnvPtr->isDistributed = useDistrib; | ||
| globalEnvPtr->userOwnsMpi = userOwnsMpi; | ||
| globalEnvPtr->isCuQuantumEnabled = useCuQuantum; | ||
| globalEnvPtr->isGpuSharingEnabled = permitGpuSharing; | ||
|
|
||
|
|
@@ -153,6 +153,11 @@ void validateAndInitCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMulti | |
| globalEnvPtr->numNodes = (useDistrib)? comm_getNumNodes() : 1; | ||
| } | ||
|
|
||
| void updateQuESTEnvDistInfo() { | ||
| globalEnvPtr->rank = (globalEnvPtr->isDistributed)? comm_getRank() : 0; | ||
| globalEnvPtr->numNodes = (globalEnvPtr->isDistributed)? comm_getNumNodes() : 1; | ||
| return; | ||
| } | ||
|
|
||
|
|
||
| /* | ||
|
|
@@ -187,10 +192,11 @@ void printCompilationInfo() { | |
|
|
||
| print_table( | ||
| "compilation", { | ||
| {"isMpiCompiled", comm_isMpiCompiled()}, | ||
| {"isGpuCompiled", gpu_isGpuCompiled()}, | ||
| {"isOmpCompiled", cpu_isOpenmpCompiled()}, | ||
| {"isCuQuantumCompiled", gpu_isCuQuantumCompiled()}, | ||
| {"isMpiCompiled", comm_isMpiCompiled()}, | ||
| {"isMpiSubCommunicatorCompiled", comm_isMpiSubCommunicatorCompiled()}, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should abbreviate |
||
| {"isGpuCompiled", gpu_isGpuCompiled()}, | ||
| {"isOmpCompiled", cpu_isOpenmpCompiled()}, | ||
| {"isCuQuantumCompiled", gpu_isCuQuantumCompiled()}, | ||
| }); | ||
| } | ||
|
|
||
|
|
@@ -200,6 +206,7 @@ void printDeploymentInfo() { | |
| print_table( | ||
| "deployment", { | ||
| {"isMpiEnabled", globalEnvPtr->isDistributed}, | ||
| {"doesUserOwnMpi", globalEnvPtr->userOwnsMpi}, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| {"isGpuEnabled", globalEnvPtr->isGpuAccelerated}, | ||
| {"isOmpEnabled", globalEnvPtr->isMultithreaded}, | ||
| {"isCuQuantumEnabled", globalEnvPtr->isCuQuantumEnabled}, | ||
|
|
@@ -397,13 +404,19 @@ extern "C" { | |
|
|
||
| void initCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMultithread) { | ||
|
|
||
| validateAndInitCustomQuESTEnv(useDistrib, useGpuAccel, useMultithread, __func__); | ||
| const bool userOwnsMpi = false; | ||
| validateAndInitCustomQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread, __func__); | ||
| } | ||
|
|
||
|
|
||
| void initCustomMpiQuESTEnv(int useDistrib, bool userOwnsMpi, int useGpuAccel, int useMultithread) { | ||
| validateAndInitCustomQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread, __func__); | ||
| } | ||
|
|
||
| void initQuESTEnv() { | ||
|
|
||
| validateAndInitCustomQuESTEnv(modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__); | ||
| const bool userOwnsMpi = false; | ||
| validateAndInitCustomQuESTEnv(modeflag::USE_AUTO, userOwnsMpi, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__); | ||
| } | ||
|
|
||
|
|
||
|
|
@@ -436,7 +449,7 @@ void finalizeQuESTEnv() { | |
|
|
||
| if (globalEnvPtr->isDistributed) { | ||
| comm_sync(); | ||
| comm_end(); | ||
| comm_end(globalEnvPtr->userOwnsMpi); | ||
| } | ||
|
|
||
| // free global env's heap memory and flag it as unallocated | ||
|
|
@@ -454,8 +467,12 @@ void syncQuESTEnv() { | |
| if (globalEnvPtr->isGpuAccelerated) | ||
| gpu_sync(); | ||
|
|
||
| if (globalEnvPtr->isDistributed) | ||
| if (globalEnvPtr->isDistributed) { | ||
| comm_sync(); | ||
| #if QUEST_COMPILE_SUBCOMM | ||
| updateQuESTEnvDistInfo(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function needn't exist. It invokes globalEnvPtr->rank = (globalEnvPtr->isDistributed)? comm_getRank() : 0;
globalEnvPtr->numNodes = (globalEnvPtr->isDistributed)? comm_getNumNodes() : 1;which is called from nowhere else; the two lines are clearer than the vagueness of "QuESTEnvDistInfo". Further, the ternary in those lines are dead-code, given this function si called only when But more importantly, I'm unsure of the semantics of this function; why should
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @otbrown is this just a dev artefact? (double checking since I've removed in cleanup)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Apologies, yes, there was a time when that call was needed to allow the user to change the subcommunicator after initialisation. That way madness lies however, so that plan was aborted. |
||
| #endif | ||
| } | ||
| } | ||
|
|
||
|
|
||
|
|
@@ -498,10 +515,11 @@ void getQuESTEnvironmentString(char str[200]) { | |
| int cuQuantum = env.isGpuAccelerated && gpu_isCuQuantumCompiled(); | ||
| int gpuDirect = env.isGpuAccelerated && gpu_isDirectGpuCommPossible(); | ||
|
|
||
| snprintf(str, 200, "CUDA=%d OpenMP=%d MPI=%d threads=%d ranks=%d cuQuantum=%d gpuDirect=%d", | ||
| snprintf(str, 200, "CUDA=%d OpenMP=%d MPI=%d userOwnsMPI=%d threads=%d ranks=%d cuQuantum=%d gpuDirect=%d", | ||
| env.isGpuAccelerated, | ||
| env.isMultithreaded, | ||
| env.isDistributed, | ||
| env.userOwnsMpi, | ||
| numThreads, | ||
| env.numNodes, | ||
| cuQuantum, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| #include "quest/include/config.h" | ||
| #include "quest/include/environment.h" | ||
| #include "quest/include/subcommunicator.h" | ||
|
|
||
| #include "quest/src/comm/comm_config.hpp" | ||
| #include "quest/src/core/errors.hpp" | ||
|
|
||
| #if QUEST_COMPILE_MPI && QUEST_COMPILE_SUBCOMM | ||
|
|
||
| #include <stdbool.h> | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is the C |
||
| #include <mpi.h> | ||
|
|
||
| void initCustomMpiCommQuESTEnv(MPI_Comm userQuestComm, int useGpuAccel, int useMultithread) { | ||
| // useDistrib and userOwnsMpi are implied by the user of this initialiser | ||
| const int useDistrib = 1; | ||
| const bool userOwnsMpi = true; | ||
|
|
||
| // set mpiCommQuest to user provided communicator | ||
| if (comm_isInit()) { | ||
| comm_setMpiComm(userQuestComm); | ||
| } else { | ||
| error_commNotInit(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an internal errror throw to indicate a QuEST bug, not user-input validation. See here |
||
| } | ||
|
|
||
| // initialise QuEST around that communicator | ||
| initCustomMpiQuESTEnv(useDistrib, userOwnsMpi, useGpuAccel, useMultithread); | ||
|
otbrown marked this conversation as resolved.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function ( |
||
|
|
||
| return; | ||
| } | ||
|
|
||
| #endif | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be an
int, or the other fields updated tobool, since they are all on equal footing. The existingis*fields are always set to0or1. Possibly there was confusion with the arguments ofinitCustomQuESTEnv()which accepts similarly named arguments as-1in order for the auto-deployer to override them - that never reaches theQuESTEnv.