Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
6ad9509
First pass deployment script.
elliottslaughter Mar 3, 2026
c62405f
Update deploy.sh script.
elliottslaughter Mar 4, 2026
a2adf43
Deployment script completes build.
elliottslaughter Mar 4, 2026
4770f2f
Avoid duplicate cpptrace build, run proj dtgen.
elliottslaughter Mar 4, 2026
11b9b60
Remove all submodules.
elliottslaughter Mar 4, 2026
bad59b3
Kernels don't need to explicitly link cudart because they already con…
elliottslaughter Mar 4, 2026
9606ee8
Don't alias CUDA libraries.
elliottslaughter Mar 4, 2026
9b46808
Remove build parameters for external dependencies.
elliottslaughter Mar 5, 2026
9207034
Remove most aliases.
elliottslaughter Mar 5, 2026
767555e
Add deployment CI job.
elliottslaughter Mar 5, 2026
53cbb21
Fixes for CI test.
elliottslaughter Mar 5, 2026
35e5126
Update dependencies.
elliottslaughter Mar 5, 2026
98cebbb
Make shellcheck happy.
elliottslaughter Mar 5, 2026
bfed112
Add MPI dependency.
elliottslaughter Mar 5, 2026
0672883
Add ibverbs.
elliottslaughter Mar 5, 2026
9b5d989
Make shellcheck happy.
elliottslaughter Mar 5, 2026
406f72d
Add zlib.
elliottslaughter Mar 5, 2026
015529e
Add Python.
elliottslaughter Mar 5, 2026
5ab2bc6
Use GCC 10 everywhere.
elliottslaughter Mar 5, 2026
ab2603a
Set threads for CI.
elliottslaughter Mar 5, 2026
df2d2e6
Fix shellcheck.
elliottslaughter Mar 5, 2026
672abd2
Downgrade CUDA image to get an older cuDNN.
elliottslaughter Mar 5, 2026
2a85d22
Fix format.
elliottslaughter Mar 5, 2026
f9a3714
Revert "Downgrade CUDA image to get an older cuDNN."
elliottslaughter Mar 5, 2026
4abca1e
Fix modern cuDNN build, update thread count.
elliottslaughter Mar 5, 2026
652727a
Put back the FindNCCL module required by Nix.
elliottslaughter Mar 5, 2026
700e11c
Test GCC build matrix.
elliottslaughter Mar 5, 2026
21b17a4
Name tests clearly.
elliottslaughter Mar 5, 2026
14b0970
Stringify GCC version.
elliottslaughter Mar 5, 2026
f4f7c10
Fix code coverage for CUDA.
elliottslaughter Mar 5, 2026
b115549
Fix matrix.
elliottslaughter Mar 5, 2026
2608e8e
Shut off GCC 12.
elliottslaughter Mar 5, 2026
916b9b2
Remove unused FF_CUDA_ARCH flag.
elliottslaughter Mar 6, 2026
08f4a33
Skeleton of run-model binary.
elliottslaughter Mar 6, 2026
39b5eb9
JSON decoding code. DO NOT BUILD.
elliottslaughter Mar 6, 2026
e4bb542
Version of run-model that runs a mapped PCG (without loss).
elliottslaughter Mar 9, 2026
00ceae1
Updates for recent Realm backend changes.
elliottslaughter Mar 25, 2026
2c97026
Fixes for bin/sp-ization-benchmarking.
elliottslaughter Mar 25, 2026
2d8f3e8
Use pip instead of uv in deploy script, add cli arg parsing to deploy…
lockshaw Jun 5, 2026
e9d5adf
Fix shellcheck errors
lockshaw Jun 5, 2026
f915d92
Speed up clones, pull from CC and CXX rather than GCC_VERSION
lockshaw Jun 5, 2026
c87892b
Use deps:: for third-party cmake dependencies
lockshaw Jun 5, 2026
f280fdd
Remove unnecessary shell variables from flake.nix
lockshaw Jun 5, 2026
c735d87
Restore Sapling defaults.
elliottslaughter Jun 5, 2026
75d4d35
Resort apt package list.
elliottslaughter Jun 5, 2026
5df4784
Actually run iterations during training.
elliottslaughter Jun 5, 2026
06fc797
Rename deployment script.
elliottslaughter Jun 5, 2026
00b247f
Fix check for CI.
elliottslaughter Jun 5, 2026
fdc701e
Fix permissions.
elliottslaughter Jun 5, 2026
7a3a862
Fix script path.
elliottslaughter Jun 5, 2026
c653af3
Build deps in a subdirectory.
elliottslaughter Jun 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: deploy
on: [push, pull_request, workflow_dispatch]
jobs:
deploy:
name: Test Deployment (GCC ${{ matrix.gcc }})
runs-on: ubuntu-latest
container: nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04

steps:
- name: Checkout Git Repository
uses: actions/checkout@v3

- name: Install Depedencies
run: |
apt-get update -qq
apt-get install -y build-essential cmake curl gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git libibverbs-dev libmpich-dev mpich python3 python3-venv zlib1g-dev

- name: Run Deploy Script
run: ./deploy/sapling.sh
env:
CC: gcc-${{ matrix.gcc }}
CXX: g++-${{ matrix.gcc }}

strategy:
matrix:
gcc: [10, 11]
24 changes: 0 additions & 24 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,24 +0,0 @@
[submodule "deps/nccl"]
path = deps/nccl
url = https://github.com/NVIDIA/nccl.git
[submodule "deps/json"]
path = deps/json
url = https://github.com/nlohmann/json.git
[submodule "deps/spdlog"]
path = deps/spdlog
url = https://github.com/gabime/spdlog.git
[submodule "deps/rapidcheck"]
path = deps/rapidcheck
url = https://github.com/emil-e/rapidcheck.git
[submodule "deps/doctest"]
path = deps/doctest
url = https://github.com/doctest/doctest.git
[submodule "deps/visit_struct"]
path = deps/visit_struct
url = https://github.com/cbeck88/visit_struct.git
[submodule "deps/expected"]
path = deps/expected
url = https://github.com/TartanLlama/expected.git
[submodule "deps/fmt"]
path = deps/fmt
url = https://github.com/fmtlib/fmt.git
5 changes: 4 additions & 1 deletion .proj.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ cuda = false
type = "bin"
cuda = false

[targets.run-model]
type = "bin"
cuda = false

# default_build_targets = [
# "utils",
# ]
Expand All @@ -124,5 +128,4 @@ cuda = false
# ]

[cmake_flags_extra]
FF_CUDA_ARCH = "60"
CMAKE_CUDA_ARCHITECTURES = "60"
45 changes: 7 additions & 38 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,13 @@
cmake_minimum_required(VERSION 3.10)
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(FlexFlow)

set(
CMAKE_MODULE_PATH
${CMAKE_MODULE_PATH}
${CMAKE_CURRENT_LIST_DIR}/cmake
list(
APPEND
CMAKE_MODULE_PATH
${CMAKE_CURRENT_LIST_DIR}/cmake
${CMAKE_CURRENT_LIST_DIR}/cmake/Modules
)

# Detect OS type and Linux version (if it applies)
set(LINUX_VERSION "")
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
find_program(LSB_RELEASE_EXEC lsb_release)
if(LSB_RELEASE_EXEC)
execute_process(COMMAND ${LSB_RELEASE_EXEC} -r --short
OUTPUT_VARIABLE LINUX_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "Linux Version: ${LINUX_VERSION}")
endif()
endif()

set(FF_MAX_DIM "5" CACHE STRING "Maximum tensor order")
set(FF_MAX_OPNAME "128" CACHE STRING "Maximum op name length")
set(FF_MAX_NUM_OUTPUTS "256" CACHE STRING "Maximum number of outputs (per operator)")
Expand All @@ -28,13 +16,12 @@ set(FF_MAX_NUM_WEIGHTS "64" CACHE STRING "Maximum number of weights (per operato
set(FF_MAX_NUM_FUSED_OPERATORS "64" CACHE STRING "Maximum number of fused tensors")
set(FF_MAX_NUM_FUSED_TENSORS "64" CACHE STRING "Maximum number of input and output tensors per fused op")
set(FF_MAX_NUM_WORKERS "1024" CACHE STRING "Maximum number of GPUs")
set(FF_MAX_NUM_TASK_REGIONS "20" CACHE STRING
set(FF_MAX_NUM_TASK_REGIONS "20" CACHE STRING
"Maximum number of regions that can be passed to a task through the TaskSpec interface")
set(FF_MAX_NUM_TASK_ARGUMENTS "5" CACHE STRING
"Maximum number of arguments that can be declared in a TaskSignature")
option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF)
option(FF_USE_PREALM "Build with PRealm profiling interface" ON)
option(FF_USE_ALL_PREBUILT_LIBRARIES "Enable use of all pre-compiled libraries, if available" OFF)
option(FF_USE_PYTHON "Enable Python" ON)
option(FF_BUILD_FROM_PYPI "Build from pypi" OFF)
option(FF_USE_CODE_COVERAGE "Enable code coverage" OFF)
Expand All @@ -43,16 +30,6 @@ set(FF_GPU_BACKENDS cuda hip_cuda hip_rocm intel)
set(FF_GPU_BACKEND "cuda" CACHE STRING "Select GPU Backend ${FF_GPU_BACKENDS}")
set_property(CACHE FF_GPU_BACKEND PROPERTY STRINGS ${FF_GPU_BACKENDS})

option(FF_USE_EXTERNAL_NCCL "Use pre-installed NCCL" OFF)
option(FF_USE_EXTERNAL_JSON "Use pre-installed nlohmann::json" OFF)
option(FF_USE_EXTERNAL_FMT "Use pre-installed fmt" OFF)
option(FF_USE_EXTERNAL_SPDLOG "Use pre-installed spdlog" OFF)
option(FF_USE_EXTERNAL_DOCTEST "Use pre-installed doctest" OFF)
option(FF_USE_EXTERNAL_RAPIDCHECK "Use pre-installed rapidcheck" OFF)
option(FF_USE_EXTERNAL_EXPECTED "Use pre-installed tl::expected" OFF)
option(FF_USE_EXTERNAL_GBENCHMARK "Use pre-installed google benchmark" OFF)
option(FF_USE_EXTERNAL_LIBASSERT "Use pre-installed libassert" OFF)

option(FF_BUILD_RESNET "build resnet example" OFF)
option(FF_BUILD_RESNEXT "build resnext example" OFF)
option(FF_BUILD_ALEXNET "build alexnet example" OFF)
Expand All @@ -72,15 +49,7 @@ option(FF_BUILD_VISUALIZATION_TOOL "build substitution visualization tool" ON)
option(FF_BUILD_SP_IZATION_BENCHMARKING "build sp-ization benchmarking" ON)
option(FF_BUILD_ARG_PARSER "build command line argument parser" OFF)
option(FF_BUILD_BIN_EXPORT_MODEL_ARCH "build export-model-arch utility" ON)

set(FF_CUDA_ARCH "autodetect" CACHE STRING "Target CUDA Arch")
if (FF_CUDA_ARCH STREQUAL "")
message(FATAL_ERROR "FF_CUDA_ARCH cannot be an empty string. Set it to `autodetect`, `all`, or pass one or multiple valid CUDA archs.")
endif()

if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
set(LIBEXT ".so")
endif()
option(FF_BUILD_BIN_RUN_MODEL "build run-model binary" ON)

include(cuda)
include(cudnn)
Expand Down
4 changes: 4 additions & 0 deletions bin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ endif()
if(FF_BUILD_BIN_EXPORT_MODEL_ARCH)
add_subdirectory(export-model-arch)
endif()

if(FF_BUILD_BIN_RUN_MODEL)
add_subdirectory(run-model)
endif()
10 changes: 10 additions & 0 deletions bin/run-model/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ff_add_executable(
NAME
run-model
SRC_PATTERNS
src/*.cc
PRIVATE_INCLUDE
include/
DEPS
realm-execution
)
117 changes: 117 additions & 0 deletions bin/run-model/src/run-model/main.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#include "pcg/file_format/v1/v1_mapped_parallel_computation_graph.h"
#include "pcg/mapped_parallel_computation_graph/mapped_parallel_computation_graph.dtg.h"
#include "realm-execution/distributed_ff_handle.h"
#include "realm-execution/pcg_instance.h"
#include "realm-execution/realm_context.h"
#include "realm-execution/realm_manager.h"
#include "utils/cli/cli_get_help_message.h"
#include "utils/cli/cli_parse.h"
#include "utils/cli/cli_parse_result.h"
#include "utils/cli/cli_spec.h"
#include "utils/nonnegative_int/nonnegative_int.h"
#include "utils/positive_int/positive_int.h"
#include <fstream>
#include <string_view>

using namespace FlexFlow;

static char *leak_string_contents(std::string_view str) {
// Realm command-line arguments require char* so intentionally leak the
// allocated string contents here
std::vector<char> *content = new std::vector<char>{str.begin(), str.end()};
content->push_back(0); // NUL byte
return content->data();
}

static std::vector<char *> make_realm_args(std::string_view executable_name) {
std::vector<char *> result;
result.push_back(leak_string_contents(executable_name));
return result;
}

int main(int argc, char **argv) {
CLISpec cli = empty_cli_spec();

CLIArgumentKey arg_key_help = cli_add_help_flag(cli);

CLIArgumentKey key_mapped_pcg_json = cli_add_positional_argument(
cli,
CLIPositionalArgumentSpec{
"mapped_pcg_json",
std::nullopt,
"path to a file containing mappped PCG encoded as JSON"});

ASSERT(argc >= 1);
std::string prog_name = argv[0];

CLIParseResult parsed = ({
tl::expected<CLIParseResult, std::string> result =
cli_parse(cli, argc, argv);
if (!result.has_value()) {
std::string error_msg = result.error();
std::cerr << cli_get_help_message(prog_name, cli);
std::cerr << std::endl;
std::cerr << "error: " << error_msg << std::endl;
return 1;
}

result.value();
});

bool help = cli_get_flag(parsed, arg_key_help);
if (help) {
std::cerr << cli_get_help_message(prog_name, cli);
return 1;
}

std::string mapped_pcg_json = cli_get_argument(parsed, key_mapped_pcg_json);

std::vector<char *> realm_args = make_realm_args(prog_name);
int realm_argc = realm_args.size();
char **realm_argv = realm_args.data();
RealmManager manager(&realm_argc, &realm_argv);

ControllerTaskResult result = manager.start_controller([&](RealmContext
&ctx) {
MappedParallelComputationGraph mpcg = [&]() {
std::ifstream f(mapped_pcg_json);
nlohmann::json mpcg_json = nlohmann::json::parse(f);
return from_v1(mpcg_json.get<V1MappedParallelComputationGraph>());
}();

// instantiate computation graph
OptimizerAttrs optimizer_attrs =
OptimizerAttrs{SGDOptimizerAttrs{/*lr=*/0.001,
/*momentum=*/0.9,
/*nesterov=*/false,
/*weight_decay=*/0.001}};

std::unordered_map<DynamicValueAttrs, DynamicTensorAccessor> input_tensors;

DistributedFfHandle device_handle =
create_distributed_ff_handle(ctx,
/*workSpaceSize=*/1024 * 1024,
/*allowTensorOpMathConversion=*/true);

PCGInstance pcg_instance = create_pcg_instance(
/*ctx=*/ctx,
/*mpcg=*/mpcg,
/*optimizer=*/optimizer_attrs,
/*loss=*/std::nullopt,
/*input_tensors=*/input_tensors,
/*profiling_settings=*/ProfilingSettings{0, 0},
/*device_handle=*/device_handle);

// begin training loop
int num_epochs = 5;
for (int i = 0; i < num_epochs; i++) {
perform_all_passes_for_pcg_instance(
/*instance=*/pcg_instance,
/*profiling_settings=*/ProfilingSettings{0, 1},
/*device_handle=*/device_handle);
}
});
result.wait();

return 0;
}
2 changes: 2 additions & 0 deletions bin/sp-ization-benchmarking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ ff_add_executable(
include/
SRC_PATTERNS
*.cc
PRIVATE_INCLUDE
include/
DEPS
utils
rapidcheck
Expand Down
4 changes: 2 additions & 2 deletions cmake/Modules/CodeCoverage.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ foreach(LANG ${LANGUAGES})
endif()
elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU"
AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang")
message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...")
message(FATAL_ERROR "Compiler ${CMAKE_${LANG}_COMPILER_ID} is not GNU or Flang! Aborting...")
endif()
endforeach()

Expand Down Expand Up @@ -748,4 +748,4 @@ function(append_coverage_compiler_flags_to_target name)
if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
target_link_libraries(${name} PRIVATE gcov)
endif()
endfunction()
endfunction()
Loading
Loading