Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
/maint/codes.pc
/test-driver
.deps
src/surrogate/zmqml/demozmqmlrequester
src/surrogate/zmqml/libzmqmlrequester.so

# make generated artifacts
.dirstamp
Expand Down
226 changes: 213 additions & 13 deletions CODES-compile-instructions.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#!/usr/bin bash -x
#!/usr/bin/env bash
set -euo pipefail
set -x

# Switches
swm_enable=1
union_enable=1
torch_enable=0
swm_enable=0
union_enable=0
torch_enable=1

# Uncomment below for MPICH
#export PATH=/usr/local/mpich-4.1.2/bin/:"$PATH"
Expand All @@ -21,8 +23,17 @@ CUR_DIR="$PWD"

##### Downloading everything #####

git clone https://github.com/codes-org/codes --depth=100 --branch=v1.5.0
git clone https://github.com/ross-org/ross --depth=100 --branch=v8.1.0
if [ ! -d codes/.git ]; then
git clone https://github.com/codes-org/codes --depth=100 --branch=v1.5.0
else
echo "Using existing codes checkout: $(realpath codes)"
fi

if [ ! -d ross/.git ]; then
git clone https://github.com/ross-org/ross --depth=100 --branch=v8.1.0
else
echo "Using existing ross checkout: $(realpath ross)"
fi

if [ $swm_enable = 1 ]; then
git clone https://github.com/pmodels/argobots --depth=1
Expand All @@ -40,7 +51,7 @@ fi

##### COMPILING #####

mkdir ross/build
mkdir -p ross/build
pushd ross/build
cmake .. -DROSS_BUILD_MODELS=ON -DCMAKE_INSTALL_PREFIX="$(realpath ./bin)" \
-DCMAKE_C_COMPILER=mpicc -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_FLAGS="-g -Wall"
Expand All @@ -53,7 +64,7 @@ popd
if [ $swm_enable = 1 ]; then
pushd swm-workloads/swm
./prepare.sh
mkdir build
mkdir -p build
pushd build
../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g
#make V=1 && make install
Expand All @@ -64,7 +75,7 @@ if [ $swm_enable = 1 ]; then

pushd argobots
./autogen.sh
mkdir build
mkdir -p build
pushd build
#../configure --enable-debug=all --disable-fast --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g
../configure --disable-shared --prefix="$(realpath ./bin)" CC=mpicc CXX=mpicxx CFLAGS=-g CXXFLAGS=-g
Expand Down Expand Up @@ -97,16 +108,181 @@ if [ $union_enable = 1 ]; then
fi


mkdir codes/build

# Make system pkg-config metadata visible even when Conda's pkg-config is active.
# This is needed for libzmq.pc on systems where ZeroMQ is installed through the OS
# but the active Conda environment's pkg-config only searches Conda pkgconfig dirs.
if ! pkg-config --exists libzmq 2>/dev/null; then
for pcdir in \
/usr/lib/x86_64-linux-gnu/pkgconfig \
/usr/lib64/pkgconfig \
/usr/lib/pkgconfig \
/usr/local/lib/pkgconfig \
/usr/local/lib64/pkgconfig \
/opt/homebrew/lib/pkgconfig \
/usr/share/pkgconfig
do
if [ -d "$pcdir" ]; then
export PKG_CONFIG_PATH="$pcdir:${PKG_CONFIG_PATH:-}"
fi
done
fi

if ! pkg-config --exists libzmq 2>/dev/null; then
echo "WARNING: pkg-config still cannot find libzmq.pc." >&2
echo " If ZMQML fails to build, install the ZeroMQ development package" >&2
echo " or set PKG_CONFIG_PATH to the directory containing libzmq.pc." >&2
fi

# Build local ZMQML requester library required by director-client.C
pushd codes/src/surrogate/zmqml
make clean
make
test -f libzmqmlrequester.so
test -f zmqmlrequester.h
popd

# Make imported zmqmlrequester target visible to doc/example and tests.
python3 - <<'INNERPY'
from pathlib import Path
cm = Path("codes/src/CMakeLists.txt")
text = cm.read_text()
old = "add_library(zmqmlrequester SHARED IMPORTED )"
new = "add_library(zmqmlrequester SHARED IMPORTED GLOBAL)"
if old in text:
cm.write_text(text.replace(old, new))
elif new in text:
pass
else:
raise SystemExit("Could not find zmqmlrequester imported target line in codes/src/CMakeLists.txt")
INNERPY

mkdir -p codes/build
pushd codes/build

torch_cmake_prefix=""
torch_dir=""

if [ "$torch_enable" = 1 ]; then
torch_cmake_prefix="$(python3 - <<'INNERPY'
import torch
print(torch.utils.cmake_prefix_path)
INNERPY
)"
torch_dir="${torch_cmake_prefix}/Torch"

if [ ! -f "${torch_dir}/TorchConfig.cmake" ]; then
echo "ERROR: TorchConfig.cmake not found at: ${torch_dir}/TorchConfig.cmake" >&2
echo " torch.utils.cmake_prefix_path returned: ${torch_cmake_prefix}" >&2
exit 1
fi

echo "Using Torch CMake prefix: ${torch_cmake_prefix}"
echo "Using Torch_DIR: ${torch_dir}"

# CUDA is intentionally opt-in.
# Default to CPU-only Torch-JIT compilation unless CUDA_HOME is explicitly set.
#
# To enable CUDA, run for example:
# export CUDA_HOME=/usr/local/cuda-12.4
# ./CODES-compile-instructions.sh
torch_cuda_version="$(python3 - <<'INNERPY'
import torch
print(torch.version.cuda or "")
INNERPY
)"

cuda_arch=""
if [ -z "${CUDA_HOME:-}" ] && [ -n "${torch_cuda_version}" ]; then
echo "ERROR: CUDA_HOME is not set, so this script is defaulting to CPU-only Torch-JIT compilation." >&2
echo " However, the active Python environment has a CUDA-enabled PyTorch build:" >&2
echo " torch.version.cuda=${torch_cuda_version}" >&2
echo "" >&2
echo " CMake cannot use a CUDA-enabled PyTorch package as a CPU-only LibTorch package." >&2
echo " Choose one of the following:" >&2
echo " 1. For CPU-only compilation, install a CPU-only PyTorch build in this environment." >&2
echo " 2. For CUDA compilation, export CUDA_HOME to your CUDA toolkit root." >&2
echo "" >&2
echo " Example CUDA build:" >&2
echo " export CUDA_HOME=/usr/local/cuda-12.4" >&2
echo " bash CODES-compile-instructions.sh" >&2
exit 1
fi

if [ -n "${CUDA_HOME:-}" ]; then
if [ ! -f "${CUDA_HOME}/include/cuda_runtime_api.h" ]; then
echo "ERROR: CUDA_HOME is set, but missing CUDA header: ${CUDA_HOME}/include/cuda_runtime_api.h" >&2
exit 1
fi

if [ ! -f "${CUDA_HOME}/lib64/libcudart.so" ] && [ ! -f "${CUDA_HOME}/lib/libcudart.so" ]; then
echo "ERROR: CUDA_HOME is set, but missing CUDA runtime library under ${CUDA_HOME}/lib64 or ${CUDA_HOME}/lib" >&2
exit 1
fi

if [ ! -x "${CUDA_HOME}/bin/nvcc" ]; then
echo "ERROR: CUDA_HOME is set, but missing CUDA compiler: ${CUDA_HOME}/bin/nvcc" >&2
exit 1
fi

if [ ! -d "${CUDA_HOME}/nvvm/libdevice" ]; then
echo "ERROR: CUDA_HOME is set, but missing CUDA libdevice directory: ${CUDA_HOME}/nvvm/libdevice" >&2
exit 1
fi

if command -v nvidia-smi >/dev/null 2>&1; then
cuda_arch="$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -n1 | tr -d '.[:space:]' || true)"
fi

if [ -z "${cuda_arch}" ]; then
echo "WARNING: Could not auto-detect GPU compute capability with nvidia-smi." >&2
echo " Falling back to CMAKE_CUDA_ARCHITECTURES=80." >&2
cuda_arch="80"
fi

export CUDA_HOME
export CUDA_PATH="${CUDA_HOME}"
export CUDA_ROOT="${CUDA_HOME}"
export CUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}"
export CUDAToolkit_ROOT="${CUDA_HOME}"
export CUDACXX="${CUDA_HOME}/bin/nvcc"
export PATH="${CUDA_HOME}/bin:${PATH}"
export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/lib:${LD_LIBRARY_PATH:-}"

echo "CUDA_HOME is set; enabling CUDA Torch-JIT compilation."
echo "Using CUDA_HOME: ${CUDA_HOME}"
echo "Using CUDACXX: ${CUDACXX}"
echo "Using CMAKE_CUDA_ARCHITECTURES=${cuda_arch}"
else
echo "CUDA_HOME is not set; forcing CPU-only Torch-JIT compilation."

# Prevent accidental CUDA discovery from /usr/local/cuda, nvcc on PATH,
# inherited CMake cache variables, or CUDA-enabled PyTorch metadata.
unset CUDA_HOME
unset CUDA_PATH
unset CUDA_ROOT
unset CUDA_TOOLKIT_ROOT_DIR
unset CUDAToolkit_ROOT
unset CUDACXX
unset CMAKE_CUDA_COMPILER
fi
fi

cmake_prefix_path="$(realpath "$CUR_DIR/ross/build/bin")"
if [ "$torch_enable" = 1 ]; then
cmake_prefix_path="${cmake_prefix_path};${torch_cmake_prefix}"
fi

make_args_codes=(
-DCMAKE_PREFIX_PATH="$(realpath "$CUR_DIR/ross/build/bin")"
-DCMAKE_PREFIX_PATH="${cmake_prefix_path}"
-DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_C_COMPILER=mpicc
-DCMAKE_C_FLAGS="-g -Wall"
-DCMAKE_CXX_FLAGS="-g -Wall"
-DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON
-DCMAKE_INSTALL_PREFIX="$(realpath bin)"
-DZMQML_BUILD_PATH="$(realpath "$CUR_DIR/codes/src/surrogate/zmqml")"
-DZeroMQ_INCLUDE_DIR=/usr/include
-DZeroMQ_LIBRARY=/usr/lib/x86_64-linux-gnu/libzmq.so
)
if [ $swm_enable = 1 ]; then
make_args_codes=(
Expand All @@ -121,8 +297,32 @@ if [ $union_enable = 1 ]; then
-DUNION_PKG_CONFIG_PATH="$(realpath "$CUR_DIR/Union/install/lib/pkgconfig")"
)
fi
if [ $torch_enable = 1 ]; then
make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=true)
if [ "$torch_enable" = 1 ]; then
make_args_codes=(
"${make_args_codes[@]}"
-DUSE_TORCH=true
-DTorch_DIR="${torch_dir}"
)

if [ -n "${CUDA_HOME:-}" ]; then
make_args_codes=(
"${make_args_codes[@]}"
-DCUDA_TOOLKIT_ROOT_DIR="${CUDA_HOME}"
-DCUDAToolkit_ROOT="${CUDA_HOME}"
-DCUDA_PATH="${CUDA_HOME}"
-DCUDA_ROOT="${CUDA_HOME}"
-DCMAKE_CUDA_COMPILER="${CUDA_HOME}/bin/nvcc"
-DCMAKE_CUDA_ARCHITECTURES="${cuda_arch}"
-DCUDA_INCLUDE_DIRS="${CUDA_HOME}/include"
-DCUDA_CUDART_LIBRARY="${CUDA_HOME}/lib64/libcudart.so"
)
else
make_args_codes=(
"${make_args_codes[@]}"
-DCMAKE_DISABLE_FIND_PACKAGE_CUDA=ON
-DCMAKE_DISABLE_FIND_PACKAGE_CUDAToolkit=ON
)
fi
else
make_args_codes=("${make_args_codes[@]}" -DUSE_TORCH=false)
fi
Expand Down
105 changes: 105 additions & 0 deletions codes/surrogate/director-client.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#ifndef __DIRECTOR_CLIENT_H_DEFINED__
#define __DIRECTOR_CLIENT_H_DEFINED__

#include <ross.h>
#include "codes/codes_mapping.h"



#define NUM_DIR_TO_NW_EVENT 20


enum SIMULATION_MODE
{
SIM_MODE_PDES=1,
SIM_MODE_ITERATION_SURROGATE,
};


typedef struct director_message director_message;
typedef struct director_annotation director_annotation;

enum DIR_EVENTS
{
DIR_AN_ITER_MARK=1,
DIR_OP_NW,
DIR_REGISTERED_EVENT__SWITCH_TO_SURR,
DIR_REGISTERED_EVENT__SWITCH_TO_PDES,
DIR_REGISTERED_EVENT__MOVE_TO_NEXT,
};

enum DIR_OPERATIONS //currently unused
{
DIR_AN_WK_START=1,
DIR_AN_WK_ITERATION_END,
DIR_AN_WK_END,
DIR_OP_SEND,
DIR_OP_RECV,
};


// director event message struct
struct director_message
{
int msg_type;
int op_type;
int num_rngs;
int value;
//model_net_event_return event_rc;
//struct codes_workload_op * mpi_op;

void *buffer; // this pointer MUST be at the end of the structure
};

// director annotation struct
struct director_annotation
{
int an_type;
int an_value;
};


#ifdef __cplusplus
extern "C"
{
#endif


/**
* @brief Prepares a request to send to client with the specified command and arguments,
* receives a reply

* @param cmd zmqml request command: 'query', 'launch', execute', send', 'nothing', 'exit'
* @param args the arguments for launch and execute
* @param bindata binary data from send
* @param surrdata containing the 'status' field and optionally 'et' and 'id'.
* 'status' is not present, returns a vector with "failed".
* Fromat is "<key1>:<val1>;<key2>:<val2>;..."
*
*/

//extern char* dir_client_request(const char* cmd,
// const char* args,
// const char* data);


extern void director_lp_register_model(const char *);


/*
extern void director_parse_args(char *args, int **args_array, int *length);
static void director_issue_codes_event(director_state * s, tw_lpid nw_lpid, int dir_registered_event_type, tw_stime ts, tw_lp* lp);
extern void director_register_events(director_state * s, director_message * msg, tw_lp * lp);
extern void dir_test_init(director_state* s, tw_lp* lp);
extern void director_prepare_iteration_dataset(director_state* s, tw_stime * training_data, int training_cycle, int training_records);
extern void director_get_surrogate_prediction(director_state* s, tw_bf * bf, director_message * m, tw_lp * lp, tw_stime* delay_ts);
extern void dir_test_event_handler(director_state* s, tw_bf * bf, director_message * m, tw_lp * lp);
extern void dir_test_finalize(director_state* s, tw_lp* lp);
*/


#ifdef __cplusplus
}
#endif

#endif
Loading