From 6ad9509fb5f3d7b405ee9430f8a9747a710356c1 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Tue, 3 Mar 2026 14:46:29 -0800 Subject: [PATCH 01/51] First pass deployment script. --- CMakeLists.txt | 2 +- deploy.sh | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) create mode 100755 deploy.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 4723a3168d..480c0da625 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.12) project(FlexFlow) set( diff --git a/deploy.sh b/deploy.sh new file mode 100755 index 0000000000..7eb8e3895d --- /dev/null +++ b/deploy.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +set -e + +git submodule update --init + +mkdir -p deploy +pushd deploy + +if [[ ! -e gasnet ]]; then + git clone https://github.com/StanfordLegion/gasnet.git +fi +if [[ ! -e gasnet/release ]]; then + make -C gasnet CONDUIT=ibv +fi +export GASNet_ROOT="$PWD"/gasnet/release + +set -x + +if [[ ! -e realm ]]; then + git clone https://github.com/StanfordLegion/realm.git +fi +if [[ ! -e realm_install/lib ]]; then + mkdir -p realm_build realm_install + pushd realm_build + cmake ../realm -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX="$PWD"/../realm_install -DBUILD_SHARED_LIBS=ON -DREALM_ENABLE_CUDA=ON -DREALM_ENABLE_PREALM=ON -DREALM_MAX_DIM=5 + make install -j20 + popd # realm_build +fi +export Realm_ROOT="$PWD"/realm_install + +if [[ ! -e zstd ]]; then + git clone https://github.com/facebook/zstd.git +fi +if [[ ! -e zstd_install/lib ]]; then + mkdir -p zstd_build zstd_install + pushd zstd_build + cmake ../zstd -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX="$PWD"/../zstd_install -DBUILD_SHARED_LIBS=ON + make install -j20 + popd # zstd_build +fi +export zstd_ROOT="$PWD"/zstd_install + +if [[ ! -e benchmark ]]; then + git clone https://github.com/google/benchmark.git +fi +if [[ ! -e benchmark_install/lib ]]; then + mkdir -p benchmark_build benchmark_install + pushd benchmark_build + cmake ../benchmark -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="$PWD"/../benchmark_install -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + make install -j20 + popd # benchmark_build +fi +export benchmark_ROOT="$PWD"/benchmark_install + +if [[ ! -e libassert ]]; then + git clone https://github.com/jeremy-rifkin/libassert.git +fi +if [[ ! -e libassert_install/lib ]]; then + mkdir -p libassert_build libassert_install + pushd libassert_build + cmake ../libassert -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX="$PWD"/../libassert_install + make install -j20 + popd # libassert_build +fi +export libassert_ROOT="$PWD"/libassert_install + +if [[ ! -e cpptrace ]]; then + git clone https://github.com/jeremy-rifkin/cpptrace.git +fi +if [[ ! -e cpptrace_install/lib ]]; then + mkdir -p cpptrace_build cpptrace_install + pushd cpptrace_build + cmake ../cpptrace -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX="$PWD"/../cpptrace_install -DBUILD_SHARED_LIBS=ON -DCPPTRACE_USE_EXTERNAL_ZSTD=ON + make install -j20 + popd # cpptrace_build +fi +export cpptrace_ROOT="$PWD"/cpptrace_install + +popd # deploy + +mkdir build install +pushd build +cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=$PWD/../install -DFF_USE_EXTERNAL_GBENCHMARK=ON -DFF_USE_EXTERNAL_LIBASSERT=ON +make install -j20 +popd # build From c62405fbd7906f75cbb04e8659596303fc8f5d06 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Tue, 3 Mar 2026 20:28:18 -0800 Subject: [PATCH 02/51] Update deploy.sh script. --- deploy.sh | 99 ++++++++++++++++++++++++------------------------------- 1 file changed, 43 insertions(+), 56 deletions(-) diff --git a/deploy.sh b/deploy.sh index 7eb8e3895d..664cae9812 100755 --- a/deploy.sh +++ b/deploy.sh @@ -7,6 +7,29 @@ git submodule update --init mkdir -p deploy pushd deploy +function build_cmake_library { + dep_name="$1" + dep_url="$2" + dep_args=("${@:3}") + if [[ ! -e ${dep_name} ]]; then + git clone "${dep_url}" "${dep_name}" + fi + if [[ ! -e "${dep_name}"_install/lib ]]; then + mkdir -p "${dep_name}"_build "${dep_name}"_install + pushd "${dep_name}"_build + cmake ../"${dep_name}" -DCMAKE_INSTALL_PREFIX="$PWD"/../"${dep_name}"_install "${dep_args[@]}" + make install -j20 + popd + fi + export "${dep_name}"_ROOT="$PWD"/"${dep_name}"_install +} + +if [[ ! -e uv ]]; then + mkdir uv + XDG_BIN_HOME="$PWD"/uv sh <(curl -LsSf https://astral.sh/uv/install.sh) --no-modify-path +fi +export PATH="$PATH:$PWD/uv" + if [[ ! -e gasnet ]]; then git clone https://github.com/StanfordLegion/gasnet.git fi @@ -17,70 +40,34 @@ export GASNet_ROOT="$PWD"/gasnet/release set -x -if [[ ! -e realm ]]; then - git clone https://github.com/StanfordLegion/realm.git -fi -if [[ ! -e realm_install/lib ]]; then - mkdir -p realm_build realm_install - pushd realm_build - cmake ../realm -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX="$PWD"/../realm_install -DBUILD_SHARED_LIBS=ON -DREALM_ENABLE_CUDA=ON -DREALM_ENABLE_PREALM=ON -DREALM_MAX_DIM=5 - make install -j20 - popd # realm_build -fi -export Realm_ROOT="$PWD"/realm_install +build_cmake_library Realm https://github.com/StanfordLegion/realm.git -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON -DREALM_ENABLE_CUDA=ON -DREALM_ENABLE_PREALM=ON -DREALM_MAX_DIM=5 -if [[ ! -e zstd ]]; then - git clone https://github.com/facebook/zstd.git -fi -if [[ ! -e zstd_install/lib ]]; then - mkdir -p zstd_build zstd_install - pushd zstd_build - cmake ../zstd -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX="$PWD"/../zstd_install -DBUILD_SHARED_LIBS=ON - make install -j20 - popd # zstd_build -fi -export zstd_ROOT="$PWD"/zstd_install +build_cmake_library zstd https://github.com/facebook/zstd.git -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -if [[ ! -e benchmark ]]; then - git clone https://github.com/google/benchmark.git -fi -if [[ ! -e benchmark_install/lib ]]; then - mkdir -p benchmark_build benchmark_install - pushd benchmark_build - cmake ../benchmark -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="$PWD"/../benchmark_install -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON - make install -j20 - popd # benchmark_build -fi -export benchmark_ROOT="$PWD"/benchmark_install +build_cmake_library benchmark https://github.com/google/benchmark.git -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -if [[ ! -e libassert ]]; then - git clone https://github.com/jeremy-rifkin/libassert.git -fi -if [[ ! -e libassert_install/lib ]]; then - mkdir -p libassert_build libassert_install - pushd libassert_build - cmake ../libassert -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX="$PWD"/../libassert_install - make install -j20 - popd # libassert_build -fi -export libassert_ROOT="$PWD"/libassert_install +build_cmake_library libassert https://github.com/jeremy-rifkin/libassert.git -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON -if [[ ! -e cpptrace ]]; then - git clone https://github.com/jeremy-rifkin/cpptrace.git -fi -if [[ ! -e cpptrace_install/lib ]]; then - mkdir -p cpptrace_build cpptrace_install - pushd cpptrace_build - cmake ../cpptrace -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX="$PWD"/../cpptrace_install -DBUILD_SHARED_LIBS=ON -DCPPTRACE_USE_EXTERNAL_ZSTD=ON - make install -j20 - popd # cpptrace_build -fi -export cpptrace_ROOT="$PWD"/cpptrace_install +build_cmake_library cpptrace https://github.com/jeremy-rifkin/cpptrace.git -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON -DCPPTRACE_USE_EXTERNAL_ZSTD=ON + +build_cmake_library NCCL https://github.com/NVIDIA/nccl.git -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +# if [[ ! -e proj ]]; then +# git clone -b python-install https://github.com/elliottslaughter/proj.git +# pushd proj +# uv venv +# uv sync +# popd # proj +# fi +# source proj/.venv/bin/activate +# export PATH="$PATH:$PWD/proj/bin" +# export PYTHONPATH="$PYTHONPATH:$PWD/proj" popd # deploy mkdir build install pushd build -cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=$PWD/../install -DFF_USE_EXTERNAL_GBENCHMARK=ON -DFF_USE_EXTERNAL_LIBASSERT=ON +cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=$PWD/../install -DFF_USE_EXTERNAL_GBENCHMARK=ON -DFF_USE_EXTERNAL_LIBASSERT=ON -DFF_USE_EXTERNAL_NCCL=ON +# proj dtgen make install -j20 popd # build From a2adf4301e7b4258dd32cedcf880441470ce7569 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 13:17:16 -0800 Subject: [PATCH 03/51] Deployment script completes build. --- CMakeLists.txt | 34 +----- cmake/Modules/FindNCCL.cmake | 175 ----------------------------- cmake/cuda.cmake | 92 +-------------- cmake/cudnn.cmake | 51 ++------- cmake/doctestlib.cmake | 5 +- cmake/expected.cmake | 4 +- cmake/fmt.cmake | 2 +- cmake/gbenchmark.cmake | 3 +- cmake/json.cmake | 5 +- cmake/libassert.cmake | 2 +- cmake/nccl.cmake | 29 +---- cmake/rapidcheck.cmake | 2 +- cmake/spdlog.cmake | 7 +- cmake/zlib.cmake | 8 -- deploy.sh | 58 ++++++++-- lib/CMakeLists.txt | 2 +- lib/compiler/CMakeLists.txt | 2 +- lib/kernels/CMakeLists.txt | 9 +- lib/kernels/test/CMakeLists.txt | 5 +- lib/local-execution/CMakeLists.txt | 2 +- lib/pcg/ffi/src/pcg.cc | 2 +- lib/realm-execution/CMakeLists.txt | 2 +- lib/task-spec/CMakeLists.txt | 2 +- lib/utils/CMakeLists.txt | 4 +- 24 files changed, 93 insertions(+), 414 deletions(-) delete mode 100644 cmake/Modules/FindNCCL.cmake delete mode 100644 cmake/zlib.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 480c0da625..906e4e4697 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,25 +1,13 @@ -cmake_minimum_required(VERSION 3.12) +cmake_minimum_required(VERSION 3.18 FATAL_ERROR) project(FlexFlow) -set( - CMAKE_MODULE_PATH - ${CMAKE_MODULE_PATH} - ${CMAKE_CURRENT_LIST_DIR}/cmake +list( + APPEND + CMAKE_MODULE_PATH + ${CMAKE_CURRENT_LIST_DIR}/cmake ${CMAKE_CURRENT_LIST_DIR}/cmake/Modules ) -# Detect OS type and Linux version (if it applies) -set(LINUX_VERSION "") -if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") - find_program(LSB_RELEASE_EXEC lsb_release) - if(LSB_RELEASE_EXEC) - execute_process(COMMAND ${LSB_RELEASE_EXEC} -r --short - OUTPUT_VARIABLE LINUX_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "Linux Version: ${LINUX_VERSION}") - endif() -endif() - set(FF_MAX_DIM "5" CACHE STRING "Maximum tensor order") set(FF_MAX_OPNAME "128" CACHE STRING "Maximum op name length") set(FF_MAX_NUM_OUTPUTS "256" CACHE STRING "Maximum number of outputs (per operator)") @@ -28,13 +16,12 @@ set(FF_MAX_NUM_WEIGHTS "64" CACHE STRING "Maximum number of weights (per operato set(FF_MAX_NUM_FUSED_OPERATORS "64" CACHE STRING "Maximum number of fused tensors") set(FF_MAX_NUM_FUSED_TENSORS "64" CACHE STRING "Maximum number of input and output tensors per fused op") set(FF_MAX_NUM_WORKERS "1024" CACHE STRING "Maximum number of GPUs") -set(FF_MAX_NUM_TASK_REGIONS "20" CACHE STRING +set(FF_MAX_NUM_TASK_REGIONS "20" CACHE STRING "Maximum number of regions that can be passed to a task through the TaskSpec interface") set(FF_MAX_NUM_TASK_ARGUMENTS "5" CACHE STRING "Maximum number of arguments that can be declared in a TaskSignature") option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF) option(FF_USE_PREALM "Build with PRealm profiling interface" ON) -option(FF_USE_ALL_PREBUILT_LIBRARIES "Enable use of all pre-compiled libraries, if available" OFF) option(FF_USE_PYTHON "Enable Python" ON) option(FF_BUILD_FROM_PYPI "Build from pypi" OFF) option(FF_USE_CODE_COVERAGE "Enable code coverage" OFF) @@ -73,15 +60,6 @@ option(FF_BUILD_SP_IZATION_BENCHMARKING "build sp-ization benchmarking" ON) option(FF_BUILD_ARG_PARSER "build command line argument parser" OFF) option(FF_BUILD_BIN_EXPORT_MODEL_ARCH "build export-model-arch utility" ON) -set(FF_CUDA_ARCH "autodetect" CACHE STRING "Target CUDA Arch") -if (FF_CUDA_ARCH STREQUAL "") - message(FATAL_ERROR "FF_CUDA_ARCH cannot be an empty string. Set it to `autodetect`, `all`, or pass one or multiple valid CUDA archs.") -endif() - -if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") - set(LIBEXT ".so") -endif() - include(cuda) include(cudnn) include(nccl) diff --git a/cmake/Modules/FindNCCL.cmake b/cmake/Modules/FindNCCL.cmake deleted file mode 100644 index 796818c0cf..0000000000 --- a/cmake/Modules/FindNCCL.cmake +++ /dev/null @@ -1,175 +0,0 @@ -# from https://github.com/pytorch/pytorch/blob/818b14025a1d70872b52d28a1e83e7797f6e271a/cmake/Modules/FindNCCL.cmake - -################################################################################ -# -# From PyTorch: -# -# Copyright (c) 2016- Facebook, Inc (Adam Paszke) -# Copyright (c) 2014- Facebook, Inc (Soumith Chintala) -# Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) -# Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) -# Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) -# Copyright (c) 2011-2013 NYU (Clement Farabet) -# Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) -# Copyright (c) 2006 Idiap Research Institute (Samy Bengio) -# Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) -# -# From Caffe2: -# -# Copyright (c) 2016-present, Facebook Inc. All rights reserved. -# -# All contributions by Facebook: -# Copyright (c) 2016 Facebook Inc. -# -# All contributions by Google: -# Copyright (c) 2015 Google Inc. -# All rights reserved. -# -# All contributions by Yangqing Jia: -# Copyright (c) 2015 Yangqing Jia -# All rights reserved. -# -# All contributions by Kakao Brain: -# Copyright 2019-2020 Kakao Brain -# -# All contributions by Cruise LLC: -# Copyright (c) 2022 Cruise LLC. -# All rights reserved. -# -# All contributions from Caffe: -# Copyright(c) 2013, 2014, 2015, the respective contributors -# All rights reserved. -# -# All other contributions: -# Copyright(c) 2015, 2016 the respective contributors -# All rights reserved. -# -# Caffe2 uses a copyright model similar to Caffe: each contributor holds -# copyright over their contributions to Caffe2. The project versioning records -# all such contribution and copyright details. If a contributor wants to further -# mark their specific copyright on a particular contribution, they should -# indicate their copyright solely in the commit message of the change when it is -# committed. -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America -# and IDIAP Research Institute nor the names of its contributors may be -# used to endorse or promote products derived from this software without -# specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -# -################################################################################ - - -# Find the nccl libraries -# -# The following variables are optionally searched for defaults -# NCCL_ROOT: Base directory where all NCCL components are found -# NCCL_INCLUDE_DIR: Directory where NCCL header is found -# NCCL_LIB_DIR: Directory where NCCL library is found -# -# The following are set after configuration is done: -# NCCL_FOUND -# NCCL_INCLUDE_DIRS -# NCCL_LIBRARIES -# -# The path hints include CUDA_TOOLKIT_ROOT_DIR seeing as some folks -# install NCCL in the same location as the CUDA toolkit. -# See https://github.com/caffe2/caffe2/issues/1601 - -set(NCCL_INCLUDE_DIR $ENV{NCCL_INCLUDE_DIR} CACHE PATH "Folder contains NVIDIA NCCL headers") -set(NCCL_LIB_DIR $ENV{NCCL_LIB_DIR} CACHE PATH "Folder contains NVIDIA NCCL libraries") -set(NCCL_VERSION $ENV{NCCL_VERSION} CACHE STRING "Version of NCCL to build with") - -if ($ENV{NCCL_ROOT_DIR}) - message(WARNING "NCCL_ROOT_DIR is deprecated. Please set NCCL_ROOT instead.") -endif() -list(APPEND NCCL_ROOT $ENV{NCCL_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}) -# Compatible layer for CMake <3.12. NCCL_ROOT will be accounted in for searching paths and libraries for CMake >=3.12. -list(APPEND CMAKE_PREFIX_PATH ${NCCL_ROOT}) - -find_path(NCCL_INCLUDE_DIRS - NAMES nccl.h - HINTS ${NCCL_INCLUDE_DIR}) - -if (USE_STATIC_NCCL) - MESSAGE(STATUS "USE_STATIC_NCCL is set. Linking with static NCCL library.") - SET(NCCL_LIBNAME "nccl_static") - if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified - set(CMAKE_FIND_LIBRARY_SUFFIXES ".a.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) - endif() -else() - SET(NCCL_LIBNAME "nccl") - if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified - set(CMAKE_FIND_LIBRARY_SUFFIXES ".so.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) - endif() -endif() - -find_library(NCCL_LIBRARIES - NAMES ${NCCL_LIBNAME} - HINTS ${NCCL_LIB_DIR}) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES) - -if(NCCL_FOUND) # obtaining NCCL version and some sanity checks - set (NCCL_HEADER_FILE "${NCCL_INCLUDE_DIRS}/nccl.h") - message (STATUS "Determining NCCL version from ${NCCL_HEADER_FILE}...") - set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) - list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS}) - include(CheckCXXSymbolExists) - check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED) - - if (NCCL_VERSION_DEFINED) - set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc") - file(WRITE ${file} " - #include - #include - int main() - { - std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl; - - int x; - ncclGetVersion(&x); - return x == NCCL_VERSION_CODE; - } -") - try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file} - RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER - CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}" - LINK_LIBRARIES ${NCCL_LIBRARIES}) - if (NOT NCCL_VERSION_MATCHED) - message(FATAL_ERROR "Found NCCL header version and library version do not match! \ -(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.") - endif() - message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}") - else() - message(STATUS "NCCL version < 2.3.5-5") - endif () - set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES}) - - message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})") - mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES) -endif() diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index da358e31dd..14dce85829 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -1,89 +1,7 @@ -set(CUDA_USE_STATIC_CUDA_RUNTIME OFF) +include(aliasing) -set(CUDA_ROOT ${CUDA_PATH}) -set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_PATH}) -list(APPEND CMAKE_PREFIX_PATH ${CUDA_ROOT}) -find_package(CUDA REQUIRED) -message("cuda-libraries ${CUDA_LIBRARIES}") +find_package(CUDAToolkit 11.7 REQUIRED) +enable_language(CUDA) -if(CUDA_FOUND) - # strip the cudart lib - string(REGEX REPLACE "[^\;]*cudart[^\;]*(\;?)" "" CUDA_LIBRARIES "${CUDA_LIBRARIES}") - set(CUDA_LIBRARIES ${CUDA_LIBRARIES}) - - # set cuda runtime and driver lib - # override cublas and curand because the FindCUDA module may not find the correct libs - set(CUDADRV_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libcuda${LIBEXT}) - set(CUDA_CUBLAS_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas${LIBEXT}) - set(CUDA_curand_LIBRARY ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand${LIBEXT}) - list(APPEND FLEXFLOW_EXT_LIBRARIES - ${CUDADRV_LIBRARIES} - ${CUDA_CUBLAS_LIBRARIES} - ${CUDA_curand_LIBRARY}) - - # Snippet below from legion/cmake/newcmake/FindCUDA.cmake - # Find the `nvcc` executable - find_program(CUDA_NVCC_EXECUTABLE - NAMES nvcc - PATHS "${CUDA_TOOLKIT_ROOT_DIR}" - ENV CUDA_PATH - ENV CUDA_BIN_PATH - PATH_SUFFIXES bin bin64 - NO_DEFAULT_PATH - ) - # Search default search paths, after we search our own set of paths. - find_program(CUDA_NVCC_EXECUTABLE nvcc) - mark_as_advanced(CUDA_NVCC_EXECUTABLE) - # Compute the CUDA version. - if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION) - execute_process (COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT}) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT}) - set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.") - mark_as_advanced(CUDA_VERSION) - else() - # Need to set these based off of the cached value - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${CUDA_VERSION}") - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${CUDA_VERSION}") - endif() - - # Set FF_CUDA_ARCH to the list of GPU architectures found on the machine. - if("${FF_CUDA_ARCH}" STREQUAL "autodetect") - include(utils) - detect_installed_gpus(DETECTED_CUDA_ARCH) - message( STATUS "CUDA Detected CUDA_ARCH : ${DETECTED_CUDA_ARCH}" ) - set(FF_CUDA_ARCH ${DETECTED_CUDA_ARCH}) - # Set FF_CUDA_ARCH to the list of all GPU architectures compatible with FlexFlow - elseif("${FF_CUDA_ARCH}" STREQUAL "all") - set(FF_CUDA_ARCH 60,61,62,70,72,75,80,86) - endif() - - # create CUDA_GENCODE list based on FF_CUDA_ARCH - string(REPLACE "," ";" CUDA_GENCODE "${FF_CUDA_ARCH}") - foreach(CODE ${CUDA_GENCODE}) - if(CODE LESS 60) - message( FATAL_ERROR "CUDA architecture <60 not supported") - endif() - endforeach() - string(REGEX REPLACE "([0-9]+)" "-gencode arch=compute_\\1,code=sm_\\1" CUDA_GENCODE "${CUDA_GENCODE}") - - #output - message( STATUS "CUDA_VERSION: ${CUDA_VERSION}") - message( STATUS "CUDA root path : ${CUDA_TOOLKIT_ROOT_DIR}" ) - message( STATUS "CUDA include path : ${CUDA_INCLUDE_DIRS}" ) - message( STATUS "CUDA runtime libraries : ${CUDA_LIBRARIES}" ) - message( STATUS "CUDA driver libraries : ${CUDADRV_LIBRARIES}" ) - message( STATUS "CUBLAS libraries : ${CUDA_CUBLAS_LIBRARIES}" ) - message( STATUS "CURAND libraries : ${CUDA_curand_LIBRARY}" ) - message( STATUS "CUDA Arch : ${FF_CUDA_ARCH}" ) - message( STATUS "CUDA_GENCODE: ${CUDA_GENCODE}") - - list(APPEND FLEXFLOW_INCLUDE_DIRS - ${CUDA_INCLUDE_DIRS}) - - add_library(cuda INTERFACE) - target_include_directories(cuda SYSTEM INTERFACE "${CUDA_INCLUDE_DIRS}") - -else() - message( FATAL_ERROR "CUDA package not found -> specify search path via CUDA_ROOT variable") -endif() +alias_library(cudart CUDA::cudart) +alias_library(cublas CUDA::cublas) diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake index 1055931cef..50b4093198 100644 --- a/cmake/cudnn.cmake +++ b/cmake/cudnn.cmake @@ -1,46 +1,13 @@ -# find cudnn in CUDNN_ROOT and CUDA_ROOT -if(CUDNN_PATH) - set(CUDNN_ROOT ${CUDNN_PATH}) -else() - # if CUDNN_PATH is not set, let's try to find it in the CUDA root - set(CUDNN_ROOT ${CUDA_TOOLKIT_ROOT_DIR}) - message( "CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}" ) -endif() -find_library(CUDNN_LIBRARY - NAMES cudnn - PATHS ${CUDNN_ROOT} ${CUDA_ROOT} - PATH_SUFFIXES lib lib64 - DOC "CUDNN library." ) - -find_path(CUDNN_INCLUDE_DIR - NAMES cudnn.h - HINTS ${CUDNN_ROOT} ${CUDA_ROOT} - PATH_SUFFIXES include - DOC "CUDNN include directory." ) +find_path(CUDNN_INCLUDE_DIR NAMES cudnn.h HINTS ${CUDAToolkit_INCLUDE_DIRS}) +find_library(CUDNN_LIBRARY NAMES cudnn HINTS ${CUDAToolkit_LIBRARY_DIR}) -# find cudnn, set cudnn lib and include -if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR) - set(CUDNN_FOUND ON) - set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) - set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) -endif() +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(CUDNN DEFAULT_MSG CUDNN_LIBRARY CUDNN_INCLUDE_DIR) -# find cuda and cudnn if(CUDNN_FOUND) - list(APPEND FLEXFLOW_EXT_LIBRARIES - ${CUDNN_LIBRARIES}) - - list(APPEND FLEXFLOW_INCLUDE_DIRS - ${CUDNN_INCLUDE_DIR}) + add_library(cudnn UNKNOWN IMPORTED) + set_target_properties(cudnn PROPERTIES + IMPORTED_LOCATION "${CUDNN_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CUDNN_INCLUDE_DIR}" + ) endif() - -if(CUDNN_FOUND) -message( STATUS "CUDNN include : ${CUDNN_INCLUDE_DIR}" ) - message( STATUS "CUDNN libraries : ${CUDNN_LIBRARIES}" ) -else() - message( FATAL_ERROR "CUDNN package not found -> specify search path via CUDNN_DIR variable") -endif() - -add_library(cudnn INTERFACE) -target_include_directories(cudnn SYSTEM INTERFACE ${CUDNN_INCLUDE_DIR}) -target_link_libraries(cudnn INTERFACE ${CUDNN_LIBRARY}) diff --git a/cmake/doctestlib.cmake b/cmake/doctestlib.cmake index 6a41a0d5f0..b303f6715f 100644 --- a/cmake/doctestlib.cmake +++ b/cmake/doctestlib.cmake @@ -1,5 +1,5 @@ include(aliasing) - + if (FF_USE_EXTERNAL_DOCTEST) find_package(doctest REQUIRED) include(doctest) # import doctest_discover_tests @@ -11,6 +11,5 @@ if (FF_USE_EXTERNAL_DOCTEST) ) alias_library(doctest doctest::doctest) else() - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/doctest) - include(${CMAKE_CURRENT_SOURCE_DIR}/deps/doctest/scripts/cmake/doctest.cmake) + message(FATAL_ERROR "FF_USE_EXTERNAL_DOCTEST is required") endif() diff --git a/cmake/expected.cmake b/cmake/expected.cmake index 17c73532fe..d2a7d965f4 100644 --- a/cmake/expected.cmake +++ b/cmake/expected.cmake @@ -3,7 +3,5 @@ if (FF_USE_EXTERNAL_EXPECTED) find_package(tl-expected REQUIRED) alias_library(expected tl::expected) else() - set(EXPECTED_BUILD_TESTS OFF) - set(EXPECTED_BUILD_PACKAGE OFF) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/expected) + message(FATAL_ERROR "FF_USE_EXTERNAL_EXPECTED is required") endif() diff --git a/cmake/fmt.cmake b/cmake/fmt.cmake index 470de6a847..23b8fef6e5 100644 --- a/cmake/fmt.cmake +++ b/cmake/fmt.cmake @@ -3,6 +3,6 @@ include(aliasing) if (FF_USE_EXTERNAL_FMT) find_package(fmt REQUIRED) else() - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/fmt) + message(FATAL_ERROR "FF_USE_EXTERNAL_FMT is required") endif() alias_library(fmt fmt::fmt) diff --git a/cmake/gbenchmark.cmake b/cmake/gbenchmark.cmake index d5bab19a1f..bfdf57a154 100644 --- a/cmake/gbenchmark.cmake +++ b/cmake/gbenchmark.cmake @@ -5,6 +5,5 @@ if (FF_USE_EXTERNAL_GBENCHMARK) alias_library(gbenchmark benchmark::benchmark) alias_library(gbenchmark-main benchmark::benchmark_main) else() - message(FATAL_ERROR "Currently FF_USE_EXTERNAL_GBENCHMARK is required") + message(FATAL_ERROR "FF_USE_EXTERNAL_GBENCHMARK is required") endif() - diff --git a/cmake/json.cmake b/cmake/json.cmake index 093ec51cdc..a19d2ef1d1 100644 --- a/cmake/json.cmake +++ b/cmake/json.cmake @@ -5,8 +5,5 @@ if (FF_USE_EXTERNAL_JSON) alias_library(json nlohmann_json) else() - set(JSON_BuildTests OFF CACHE INTERNAL "") - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/json) - - alias_library(json nlohmann_json::nlohmann_json) + message(FATAL_ERROR "FF_USE_EXTERNAL_JSON is required") endif() diff --git a/cmake/libassert.cmake b/cmake/libassert.cmake index 3abe03b014..ab50fd22d8 100644 --- a/cmake/libassert.cmake +++ b/cmake/libassert.cmake @@ -3,7 +3,7 @@ include(aliasing) if(FF_USE_EXTERNAL_LIBASSERT) find_package(libassert REQUIRED) else() - message(FATAL_ERROR "Currently FF_USE_EXTERNAL_LIBASSERT is required") + message(FATAL_ERROR "FF_USE_EXTERNAL_LIBASSERT is required") endif() alias_library(libassert libassert::assert) diff --git a/cmake/nccl.cmake b/cmake/nccl.cmake index 755fe00f1b..e092e60f5f 100644 --- a/cmake/nccl.cmake +++ b/cmake/nccl.cmake @@ -1,37 +1,12 @@ include(aliasing) -add_library(nccl INTERFACE) - if (FF_USE_EXTERNAL_NCCL) find_package(NCCL REQUIRED) else() - message(STATUS "Building NCCL from source") - list(TRANSFORM CUDA_GENCODE PREPEND "NVCC_GENCODE=" OUTPUT_VARIABLE NCCL_BUILD_NVCC_GENCODE) - - include(ExternalProject) - ExternalProject_Add(nccl_source_build - SOURCE_DIR ${PROJECT_SOURCE_DIR}/deps/${NCCL_NAME} - PREFIX ${CMAKE_BINARY_DIR}/deps/${NCCL_NAME} - INSTALL_DIR ${CMAKE_BINARY_DIR}/deps/${NCCL_NAME} - BUILD_BYPRODUCTS ${CMAKE_BINARY_DIR}/deps/${NCCL_NAME}/lib/libnccl${LIBEXT} - INSTALL_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND make src.build "${NCCL_BUILD_NVCC_GENCODE}" "CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}" "BUILDDIR=${CMAKE_BINARY_DIR}/deps/${NCCL_NAME}" - BUILD_IN_SOURCE 1 - ) - - ExternalProject_Get_Property(nccl_source_build INSTALL_DIR) - set_directory_properties(PROPERTIES ADDITIONAL_CLEAN_FILES "${CMAKE_BINARY_DIR}/deps/nccl_source_build/lib/") - - install(DIRECTORY ${CMAKE_BINARY_DIR}/deps/nccl_source_build/include/ DESTINATION include) - install(DIRECTORY ${CMAKE_BINARY_DIR}/deps/nccl_source_build/lib/ DESTINATION lib PATTERN "pkgconfig" EXCLUDE) - - set(NCCL_INCLUDE_DIR "${INSTALL_DIR}/include") - set(NCCL_LIBRARIES "${INSTALL_DIR}/lib/libnccl${LIBEXT}") - - add_dependencies(nccl nccl_source_build) + message(FATAL_ERROR "FF_USE_EXTERNAL_NCCL is required") endif() message(STATUS "NCCL_LIBRARIES = ${NCCL_LIBRARIES}") +add_library(nccl INTERFACE) target_include_directories(nccl SYSTEM INTERFACE ${NCCL_INCLUDE_DIRS}) target_link_libraries(nccl INTERFACE ${NCCL_LIBRARIES}) diff --git a/cmake/rapidcheck.cmake b/cmake/rapidcheck.cmake index bf8f058e63..179d9d07a7 100644 --- a/cmake/rapidcheck.cmake +++ b/cmake/rapidcheck.cmake @@ -1,5 +1,5 @@ if (FF_USE_EXTERNAL_RAPIDCHECK) find_package(rapidcheck REQUIRED) else() - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/rapidcheck) + message(FATAL_ERROR "FF_USE_EXTERNAL_RAPIDCHECK is required") endif() diff --git a/cmake/spdlog.cmake b/cmake/spdlog.cmake index 5ba1d6cc15..ad806cf56c 100644 --- a/cmake/spdlog.cmake +++ b/cmake/spdlog.cmake @@ -3,10 +3,5 @@ include(aliasing) if (FF_USE_EXTERNAL_SPDLOG) find_package(spdlog REQUIRED) else() - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/spdlog) + message(FATAL_ERROR "FF_USE_EXTERNAL_SPDLOG is required") endif() - -add_library(ff_spdlog INTERFACE) -target_link_libraries(ff_spdlog INTERFACE spdlog::spdlog) -target_compile_definitions(ff_spdlog INTERFACE SPDLOG_FMT_EXTERNAL) -alias_library(spdlog ff_spdlog) diff --git a/cmake/zlib.cmake b/cmake/zlib.cmake deleted file mode 100644 index 0281e02b88..0000000000 --- a/cmake/zlib.cmake +++ /dev/null @@ -1,8 +0,0 @@ -find_package(ZLIB REQUIRED) -if(ZLIB_FOUND) - list(APPEND FLEXFLOW_EXT_LIBRARIES - ${ZLIB_LIBRARIES}) - message( STATUS "ZLIB libraries : ${ZLIB_LIBRARIES}" ) -else() - message( FATAL_ERROR "ZLIB package not found") -endif() \ No newline at end of file diff --git a/deploy.sh b/deploy.sh index 664cae9812..6020a8ee10 100755 --- a/deploy.sh +++ b/deploy.sh @@ -2,6 +2,10 @@ set -e +module load cuda cmake +export CC=gcc-10 +export CXX=g++-10 + git submodule update --init mkdir -p deploy @@ -12,16 +16,21 @@ function build_cmake_library { dep_url="$2" dep_args=("${@:3}") if [[ ! -e ${dep_name} ]]; then - git clone "${dep_url}" "${dep_name}" + if [[ ${dep_url} == *.git ]]; then + git clone "${dep_url}" "${dep_name}" + else + mkdir "${dep_name}" + tar xfz <(curl -LsSf "${dep_url}") -C "${dep_name}" --strip-components=1 + fi fi - if [[ ! -e "${dep_name}"_install/lib ]]; then + if [[ ! -e "${dep_name}"_install/include ]]; then mkdir -p "${dep_name}"_build "${dep_name}"_install pushd "${dep_name}"_build cmake ../"${dep_name}" -DCMAKE_INSTALL_PREFIX="$PWD"/../"${dep_name}"_install "${dep_args[@]}" make install -j20 popd fi - export "${dep_name}"_ROOT="$PWD"/"${dep_name}"_install + export CMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH:$PWD/${dep_name}_install" } if [[ ! -e uv ]]; then @@ -40,17 +49,29 @@ export GASNet_ROOT="$PWD"/gasnet/release set -x -build_cmake_library Realm https://github.com/StanfordLegion/realm.git -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON -DREALM_ENABLE_CUDA=ON -DREALM_ENABLE_PREALM=ON -DREALM_MAX_DIM=5 - build_cmake_library zstd https://github.com/facebook/zstd.git -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -build_cmake_library benchmark https://github.com/google/benchmark.git -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON +build_cmake_library fmt https://github.com/fmtlib/fmt/archive/refs/tags/10.2.1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library libassert https://github.com/jeremy-rifkin/libassert/archive/refs/tags/v2.2.1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library cpptrace https://github.com/jeremy-rifkin/cpptrace/archive/refs/tags/v1.0.4.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCPPTRACE_USE_EXTERNAL_ZSTD=ON + +build_cmake_library Realm https://github.com/StanfordLegion/realm.git -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON -DREALM_ENABLE_CUDA=ON -DREALM_ENABLE_PREALM=ON -DREALM_ENABLE_CPPTRACE=ON -DREALM_ENABLE_HDF5=OFF -DREALM_MAX_DIM=5 -build_cmake_library libassert https://github.com/jeremy-rifkin/libassert.git -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON +build_cmake_library benchmark https://github.com/google/benchmark/archive/refs/tags/v1.9.5.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -build_cmake_library cpptrace https://github.com/jeremy-rifkin/cpptrace.git -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON -DCPPTRACE_USE_EXTERNAL_ZSTD=ON +build_cmake_library rapidcheck https://github.com/emil-e/rapidcheck.git -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -build_cmake_library NCCL https://github.com/NVIDIA/nccl.git -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON +build_cmake_library tl-expected https://github.com/TartanLlama/expected/archive/refs/tags/v1.3.1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library doctest https://github.com/doctest/doctest/archive/refs/tags/v2.4.12.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library spdlog https://github.com/gabime/spdlog/archive/refs/tags/v1.17.0.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DSPDLOG_FMT_EXTERNAL=ON + +build_cmake_library nlohmann_json https://github.com/nlohmann/json/archive/refs/tags/v3.12.0.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library NCCL https://github.com/NVIDIA/nccl/archive/refs/tags/v2.29.7-1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON # if [[ ! -e proj ]]; then # git clone -b python-install https://github.com/elliottslaughter/proj.git @@ -65,9 +86,24 @@ build_cmake_library NCCL https://github.com/NVIDIA/nccl.git -DCMAKE_BUILD_TYPE=R popd # deploy +ff_cmake_flags=( + -DCMAKE_BUILD_TYPE=RelWithDebInfo + -DCMAKE_INSTALL_PREFIX=$PWD/../install + -DCMAKE_CUDA_ARCHITECTURES=60 + -DFF_USE_EXTERNAL_DOCTEST=ON + -DFF_USE_EXTERNAL_EXPECTED=ON + -DFF_USE_EXTERNAL_FMT=ON + -DFF_USE_EXTERNAL_GBENCHMARK=ON + -DFF_USE_EXTERNAL_JSON=ON + -DFF_USE_EXTERNAL_LIBASSERT=ON + -DFF_USE_EXTERNAL_NCCL=ON + -DFF_USE_EXTERNAL_RAPIDCHECK=ON + -DFF_USE_EXTERNAL_SPDLOG=ON +) + mkdir build install pushd build -cmake .. -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=$PWD/../install -DFF_USE_EXTERNAL_GBENCHMARK=ON -DFF_USE_EXTERNAL_LIBASSERT=ON -DFF_USE_EXTERNAL_NCCL=ON +cmake .. "${ff_cmake_flags[@]}" # proj dtgen -make install -j20 +make -j20 popd # build diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 501de709c1..c50e35620a 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -6,7 +6,7 @@ add_subdirectory(local-execution) add_subdirectory(realm-execution) add_subdirectory(task-spec) add_subdirectory(utils) -add_subdirectory(ffi) +#add_subdirectory(ffi) add_subdirectory(substitutions) add_subdirectory(substitution-generator) add_subdirectory(models) diff --git a/lib/compiler/CMakeLists.txt b/lib/compiler/CMakeLists.txt index 26fdc8a38a..df4a9d27ae 100644 --- a/lib/compiler/CMakeLists.txt +++ b/lib/compiler/CMakeLists.txt @@ -12,7 +12,7 @@ ff_add_library( utils json pcg - spdlog + spdlog::spdlog substitutions ) diff --git a/lib/kernels/CMakeLists.txt b/lib/kernels/CMakeLists.txt index f5d88f102f..a72f4ab30a 100644 --- a/lib/kernels/CMakeLists.txt +++ b/lib/kernels/CMakeLists.txt @@ -1,6 +1,6 @@ set(project_target kernels) -project(${project_target} +project(${project_target} LANGUAGES CXX CUDA) file(GLOB_RECURSE SRC @@ -25,7 +25,8 @@ target_include_directories( target_link_libraries( ${project_target} op-attrs - cuda + cudart + cublas cudnn nccl utils @@ -35,8 +36,8 @@ target_link_libraries( define_ff_vars(${project_target}) set_target_properties( - ${project_target} - PROPERTIES + ${project_target} + PROPERTIES CUDA_STANDARD 17 ) diff --git a/lib/kernels/test/CMakeLists.txt b/lib/kernels/test/CMakeLists.txt index 981f87b3d8..f902d433a8 100644 --- a/lib/kernels/test/CMakeLists.txt +++ b/lib/kernels/test/CMakeLists.txt @@ -3,16 +3,15 @@ ff_add_test_executable( kernels-tests SRC_PATTERNS src/*.cc - PRIVATE_INCLUDE + PRIVATE_INCLUDE src/ DEPS doctest utils-test-common kernels op-attrs - cuda - cudnn cudart cublas + cudnn pcg ) diff --git a/lib/local-execution/CMakeLists.txt b/lib/local-execution/CMakeLists.txt index b75f81fb3e..a3cfd4e8d3 100644 --- a/lib/local-execution/CMakeLists.txt +++ b/lib/local-execution/CMakeLists.txt @@ -13,7 +13,7 @@ ff_add_library( kernels task-spec pcg - spdlog + spdlog::spdlog compiler ) diff --git a/lib/pcg/ffi/src/pcg.cc b/lib/pcg/ffi/src/pcg.cc index 4e1bd55bc3..c21b60118f 100644 --- a/lib/pcg/ffi/src/pcg.cc +++ b/lib/pcg/ffi/src/pcg.cc @@ -1,2 +1,2 @@ #include "flexflow/pcg.h" -#include "pcg/model_compilation.h" +//#include "pcg/model_compilation.h" diff --git a/lib/realm-execution/CMakeLists.txt b/lib/realm-execution/CMakeLists.txt index 25a51ada54..1d244febe8 100644 --- a/lib/realm-execution/CMakeLists.txt +++ b/lib/realm-execution/CMakeLists.txt @@ -13,7 +13,7 @@ ff_add_library( local-execution op-attrs pcg - spdlog + spdlog::spdlog task-spec utils realm diff --git a/lib/task-spec/CMakeLists.txt b/lib/task-spec/CMakeLists.txt index 3c7c91af67..4c7fa158bf 100644 --- a/lib/task-spec/CMakeLists.txt +++ b/lib/task-spec/CMakeLists.txt @@ -12,7 +12,7 @@ ff_add_library( utils kernels pcg - spdlog + spdlog::spdlog compiler ) diff --git a/lib/utils/CMakeLists.txt b/lib/utils/CMakeLists.txt index e2f7c433d6..b35bb68692 100644 --- a/lib/utils/CMakeLists.txt +++ b/lib/utils/CMakeLists.txt @@ -11,10 +11,10 @@ ff_add_library( expected fmt json - cuda + cudart rapidcheck libassert -) +) add_subdirectory(ffi) add_subdirectory(test) From 4770f2ff7d5d75fef3ce918eab3cbcf3cf8dc968 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 14:00:04 -0800 Subject: [PATCH 04/51] Avoid duplicate cpptrace build, run proj dtgen. --- deploy.sh | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/deploy.sh b/deploy.sh index 6020a8ee10..0a0d6015c6 100755 --- a/deploy.sh +++ b/deploy.sh @@ -53,10 +53,10 @@ build_cmake_library zstd https://github.com/facebook/zstd.git -DCMAKE_BUILD_TYPE build_cmake_library fmt https://github.com/fmtlib/fmt/archive/refs/tags/10.2.1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -build_cmake_library libassert https://github.com/jeremy-rifkin/libassert/archive/refs/tags/v2.2.1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON - build_cmake_library cpptrace https://github.com/jeremy-rifkin/cpptrace/archive/refs/tags/v1.0.4.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCPPTRACE_USE_EXTERNAL_ZSTD=ON +build_cmake_library libassert https://github.com/jeremy-rifkin/libassert/archive/refs/tags/v2.2.1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DLIBASSERT_USE_EXTERNAL_CPPTRACE=ON + build_cmake_library Realm https://github.com/StanfordLegion/realm.git -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON -DREALM_ENABLE_CUDA=ON -DREALM_ENABLE_PREALM=ON -DREALM_ENABLE_CPPTRACE=ON -DREALM_ENABLE_HDF5=OFF -DREALM_MAX_DIM=5 build_cmake_library benchmark https://github.com/google/benchmark/archive/refs/tags/v1.9.5.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON @@ -73,16 +73,16 @@ build_cmake_library nlohmann_json https://github.com/nlohmann/json/archive/refs/ build_cmake_library NCCL https://github.com/NVIDIA/nccl/archive/refs/tags/v2.29.7-1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -# if [[ ! -e proj ]]; then -# git clone -b python-install https://github.com/elliottslaughter/proj.git -# pushd proj -# uv venv -# uv sync -# popd # proj -# fi -# source proj/.venv/bin/activate -# export PATH="$PATH:$PWD/proj/bin" -# export PYTHONPATH="$PYTHONPATH:$PWD/proj" +if [[ ! -e /tmp/$USER/proj ]]; then + git clone -b python-install https://github.com/elliottslaughter/proj.git /tmp/$USER/proj + pushd /tmp/$USER/proj + uv venv + uv sync + popd # /tmp/$USER/proj +fi +source /tmp/$USER/proj/.venv/bin/activate +export PATH="$PATH:/tmp/$USER/proj/bin" +export PYTHONPATH="$PYTHONPATH:/tmp/$USER/proj" popd # deploy @@ -101,9 +101,10 @@ ff_cmake_flags=( -DFF_USE_EXTERNAL_SPDLOG=ON ) +proj dtgen + mkdir build install pushd build cmake .. "${ff_cmake_flags[@]}" -# proj dtgen make -j20 popd # build From 11b9b6045bd5cf45e9ad151b57592167b91b03dc Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 14:03:00 -0800 Subject: [PATCH 05/51] Remove all submodules. --- .gitmodules | 24 ------------------------ deps/doctest | 1 - deps/expected | 1 - deps/fmt | 1 - deps/json | 1 - deps/nccl | 1 - deps/rapidcheck | 1 - deps/spdlog | 1 - deps/visit_struct | 1 - 9 files changed, 32 deletions(-) delete mode 160000 deps/doctest delete mode 160000 deps/expected delete mode 160000 deps/fmt delete mode 160000 deps/json delete mode 160000 deps/nccl delete mode 160000 deps/rapidcheck delete mode 160000 deps/spdlog delete mode 160000 deps/visit_struct diff --git a/.gitmodules b/.gitmodules index 24bf52a686..e69de29bb2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,24 +0,0 @@ -[submodule "deps/nccl"] - path = deps/nccl - url = https://github.com/NVIDIA/nccl.git -[submodule "deps/json"] - path = deps/json - url = https://github.com/nlohmann/json.git -[submodule "deps/spdlog"] - path = deps/spdlog - url = https://github.com/gabime/spdlog.git -[submodule "deps/rapidcheck"] - path = deps/rapidcheck - url = https://github.com/emil-e/rapidcheck.git -[submodule "deps/doctest"] - path = deps/doctest - url = https://github.com/doctest/doctest.git -[submodule "deps/visit_struct"] - path = deps/visit_struct - url = https://github.com/cbeck88/visit_struct.git -[submodule "deps/expected"] - path = deps/expected - url = https://github.com/TartanLlama/expected.git -[submodule "deps/fmt"] - path = deps/fmt - url = https://github.com/fmtlib/fmt.git diff --git a/deps/doctest b/deps/doctest deleted file mode 160000 index b7c21ec5ce..0000000000 --- a/deps/doctest +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b7c21ec5ceeadb4951b00396fc1e4642dd347e5f diff --git a/deps/expected b/deps/expected deleted file mode 160000 index 292eff8bd8..0000000000 --- a/deps/expected +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 292eff8bd8ee230a7df1d6a1c00c4ea0eb2f0362 diff --git a/deps/fmt b/deps/fmt deleted file mode 160000 index f5e54359df..0000000000 --- a/deps/fmt +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f5e54359df4c26b6230fc61d38aa294581393084 diff --git a/deps/json b/deps/json deleted file mode 160000 index 4f8fba1406..0000000000 --- a/deps/json +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4f8fba14066156b73f1189a2b8bd568bde5284c5 diff --git a/deps/nccl b/deps/nccl deleted file mode 160000 index 3996562690..0000000000 --- a/deps/nccl +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 399656269027c1818fc999ccf8ec4dd838cec50d diff --git a/deps/rapidcheck b/deps/rapidcheck deleted file mode 160000 index 1505cbbce7..0000000000 --- a/deps/rapidcheck +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1505cbbce733bde3b78042cf2e9309c0b7f227a2 diff --git a/deps/spdlog b/deps/spdlog deleted file mode 160000 index ad0e89cbfb..0000000000 --- a/deps/spdlog +++ /dev/null @@ -1 +0,0 @@ -Subproject commit ad0e89cbfb4d0c1ce4d097e134eb7be67baebb36 diff --git a/deps/visit_struct b/deps/visit_struct deleted file mode 160000 index add1752b7a..0000000000 --- a/deps/visit_struct +++ /dev/null @@ -1 +0,0 @@ -Subproject commit add1752b7a1d806da4cee6aa135518003afd5bf7 From bad59b3aaa8144f1237acf046f2c336e0533edd7 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 15:15:35 -0800 Subject: [PATCH 06/51] Kernels don't need to explicitly link cudart because they already contain CUDA code. --- lib/kernels/CMakeLists.txt | 1 - lib/kernels/test/CMakeLists.txt | 1 - 2 files changed, 2 deletions(-) diff --git a/lib/kernels/CMakeLists.txt b/lib/kernels/CMakeLists.txt index a72f4ab30a..1dbd56f99e 100644 --- a/lib/kernels/CMakeLists.txt +++ b/lib/kernels/CMakeLists.txt @@ -25,7 +25,6 @@ target_include_directories( target_link_libraries( ${project_target} op-attrs - cudart cublas cudnn nccl diff --git a/lib/kernels/test/CMakeLists.txt b/lib/kernels/test/CMakeLists.txt index f902d433a8..10ac6993fd 100644 --- a/lib/kernels/test/CMakeLists.txt +++ b/lib/kernels/test/CMakeLists.txt @@ -10,7 +10,6 @@ ff_add_test_executable( utils-test-common kernels op-attrs - cudart cublas cudnn pcg From 9606ee8448e11afb305f4cb92a37922ee0e1e864 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 15:38:09 -0800 Subject: [PATCH 07/51] Don't alias CUDA libraries. --- cmake/cuda.cmake | 5 ----- lib/kernels/CMakeLists.txt | 2 +- lib/kernels/test/CMakeLists.txt | 2 -- lib/utils/CMakeLists.txt | 2 +- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 14dce85829..dddd07b831 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -1,7 +1,2 @@ -include(aliasing) - find_package(CUDAToolkit 11.7 REQUIRED) enable_language(CUDA) - -alias_library(cudart CUDA::cudart) -alias_library(cublas CUDA::cublas) diff --git a/lib/kernels/CMakeLists.txt b/lib/kernels/CMakeLists.txt index 1dbd56f99e..92711afa69 100644 --- a/lib/kernels/CMakeLists.txt +++ b/lib/kernels/CMakeLists.txt @@ -25,7 +25,7 @@ target_include_directories( target_link_libraries( ${project_target} op-attrs - cublas + CUDA::cublas cudnn nccl utils diff --git a/lib/kernels/test/CMakeLists.txt b/lib/kernels/test/CMakeLists.txt index 10ac6993fd..f4ae6ba56e 100644 --- a/lib/kernels/test/CMakeLists.txt +++ b/lib/kernels/test/CMakeLists.txt @@ -10,7 +10,5 @@ ff_add_test_executable( utils-test-common kernels op-attrs - cublas - cudnn pcg ) diff --git a/lib/utils/CMakeLists.txt b/lib/utils/CMakeLists.txt index b35bb68692..6b51d2cf95 100644 --- a/lib/utils/CMakeLists.txt +++ b/lib/utils/CMakeLists.txt @@ -11,7 +11,7 @@ ff_add_library( expected fmt json - cudart + CUDA::cudart rapidcheck libassert ) From 9b468082a8e519d29c39a7aed30dd1752a4b4338 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 16:09:44 -0800 Subject: [PATCH 08/51] Remove build parameters for external dependencies. --- CMakeLists.txt | 10 -------- cmake/doctestlib.cmake | 20 ++++++--------- cmake/expected.cmake | 9 +++---- cmake/fmt.cmake | 6 +---- cmake/gbenchmark.cmake | 10 +++----- cmake/json.cmake | 8 ++---- cmake/libassert.cmake | 6 +---- cmake/nccl.cmake | 7 +----- cmake/rapidcheck.cmake | 6 +---- cmake/spdlog.cmake | 8 +----- cmake/utils.cmake | 56 ------------------------------------------ deploy.sh | 9 ------- flake.nix | 10 +------- 13 files changed, 22 insertions(+), 143 deletions(-) delete mode 100644 cmake/utils.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 906e4e4697..29c8143ebd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,16 +30,6 @@ set(FF_GPU_BACKENDS cuda hip_cuda hip_rocm intel) set(FF_GPU_BACKEND "cuda" CACHE STRING "Select GPU Backend ${FF_GPU_BACKENDS}") set_property(CACHE FF_GPU_BACKEND PROPERTY STRINGS ${FF_GPU_BACKENDS}) -option(FF_USE_EXTERNAL_NCCL "Use pre-installed NCCL" OFF) -option(FF_USE_EXTERNAL_JSON "Use pre-installed nlohmann::json" OFF) -option(FF_USE_EXTERNAL_FMT "Use pre-installed fmt" OFF) -option(FF_USE_EXTERNAL_SPDLOG "Use pre-installed spdlog" OFF) -option(FF_USE_EXTERNAL_DOCTEST "Use pre-installed doctest" OFF) -option(FF_USE_EXTERNAL_RAPIDCHECK "Use pre-installed rapidcheck" OFF) -option(FF_USE_EXTERNAL_EXPECTED "Use pre-installed tl::expected" OFF) -option(FF_USE_EXTERNAL_GBENCHMARK "Use pre-installed google benchmark" OFF) -option(FF_USE_EXTERNAL_LIBASSERT "Use pre-installed libassert" OFF) - option(FF_BUILD_RESNET "build resnet example" OFF) option(FF_BUILD_RESNEXT "build resnext example" OFF) option(FF_BUILD_ALEXNET "build alexnet example" OFF) diff --git a/cmake/doctestlib.cmake b/cmake/doctestlib.cmake index b303f6715f..c203605ee6 100644 --- a/cmake/doctestlib.cmake +++ b/cmake/doctestlib.cmake @@ -1,15 +1,11 @@ include(aliasing) -if (FF_USE_EXTERNAL_DOCTEST) - find_package(doctest REQUIRED) - include(doctest) # import doctest_discover_tests +find_package(doctest REQUIRED) +include(doctest) # import doctest_discover_tests - target_compile_definitions( - doctest::doctest - INTERFACE - DOCTEST_CONFIG_REQUIRE_STRINGIFICATION_FOR_ALL_USED_TYPES - ) - alias_library(doctest doctest::doctest) -else() - message(FATAL_ERROR "FF_USE_EXTERNAL_DOCTEST is required") -endif() +target_compile_definitions( + doctest::doctest + INTERFACE + DOCTEST_CONFIG_REQUIRE_STRINGIFICATION_FOR_ALL_USED_TYPES +) +alias_library(doctest doctest::doctest) diff --git a/cmake/expected.cmake b/cmake/expected.cmake index d2a7d965f4..cab1c133a9 100644 --- a/cmake/expected.cmake +++ b/cmake/expected.cmake @@ -1,7 +1,4 @@ include(aliasing) -if (FF_USE_EXTERNAL_EXPECTED) - find_package(tl-expected REQUIRED) - alias_library(expected tl::expected) -else() - message(FATAL_ERROR "FF_USE_EXTERNAL_EXPECTED is required") -endif() + +find_package(tl-expected REQUIRED) +alias_library(expected tl::expected) diff --git a/cmake/fmt.cmake b/cmake/fmt.cmake index 23b8fef6e5..d7e63eb75e 100644 --- a/cmake/fmt.cmake +++ b/cmake/fmt.cmake @@ -1,8 +1,4 @@ include(aliasing) -if (FF_USE_EXTERNAL_FMT) - find_package(fmt REQUIRED) -else() - message(FATAL_ERROR "FF_USE_EXTERNAL_FMT is required") -endif() +find_package(fmt REQUIRED) alias_library(fmt fmt::fmt) diff --git a/cmake/gbenchmark.cmake b/cmake/gbenchmark.cmake index bfdf57a154..1dd1a5413c 100644 --- a/cmake/gbenchmark.cmake +++ b/cmake/gbenchmark.cmake @@ -1,9 +1,5 @@ include(aliasing) -if (FF_USE_EXTERNAL_GBENCHMARK) - find_package(benchmark REQUIRED) - alias_library(gbenchmark benchmark::benchmark) - alias_library(gbenchmark-main benchmark::benchmark_main) -else() - message(FATAL_ERROR "FF_USE_EXTERNAL_GBENCHMARK is required") -endif() +find_package(benchmark REQUIRED) +alias_library(gbenchmark benchmark::benchmark) +alias_library(gbenchmark-main benchmark::benchmark_main) diff --git a/cmake/json.cmake b/cmake/json.cmake index a19d2ef1d1..ea71466411 100644 --- a/cmake/json.cmake +++ b/cmake/json.cmake @@ -1,9 +1,5 @@ include(aliasing) -if (FF_USE_EXTERNAL_JSON) - find_package(nlohmann_json REQUIRED) +find_package(nlohmann_json REQUIRED) - alias_library(json nlohmann_json) -else() - message(FATAL_ERROR "FF_USE_EXTERNAL_JSON is required") -endif() +alias_library(json nlohmann_json) diff --git a/cmake/libassert.cmake b/cmake/libassert.cmake index ab50fd22d8..c0c9fd6f3c 100644 --- a/cmake/libassert.cmake +++ b/cmake/libassert.cmake @@ -1,9 +1,5 @@ include(aliasing) -if(FF_USE_EXTERNAL_LIBASSERT) - find_package(libassert REQUIRED) -else() - message(FATAL_ERROR "FF_USE_EXTERNAL_LIBASSERT is required") -endif() +find_package(libassert REQUIRED) alias_library(libassert libassert::assert) diff --git a/cmake/nccl.cmake b/cmake/nccl.cmake index e092e60f5f..5fdff30f78 100644 --- a/cmake/nccl.cmake +++ b/cmake/nccl.cmake @@ -1,12 +1,7 @@ include(aliasing) -if (FF_USE_EXTERNAL_NCCL) - find_package(NCCL REQUIRED) -else() - message(FATAL_ERROR "FF_USE_EXTERNAL_NCCL is required") -endif() +find_package(NCCL REQUIRED) -message(STATUS "NCCL_LIBRARIES = ${NCCL_LIBRARIES}") add_library(nccl INTERFACE) target_include_directories(nccl SYSTEM INTERFACE ${NCCL_INCLUDE_DIRS}) target_link_libraries(nccl INTERFACE ${NCCL_LIBRARIES}) diff --git a/cmake/rapidcheck.cmake b/cmake/rapidcheck.cmake index 179d9d07a7..837a3e0d74 100644 --- a/cmake/rapidcheck.cmake +++ b/cmake/rapidcheck.cmake @@ -1,5 +1 @@ -if (FF_USE_EXTERNAL_RAPIDCHECK) - find_package(rapidcheck REQUIRED) -else() - message(FATAL_ERROR "FF_USE_EXTERNAL_RAPIDCHECK is required") -endif() +find_package(rapidcheck REQUIRED) diff --git a/cmake/spdlog.cmake b/cmake/spdlog.cmake index ad806cf56c..00e7385d9f 100644 --- a/cmake/spdlog.cmake +++ b/cmake/spdlog.cmake @@ -1,7 +1 @@ -include(aliasing) - -if (FF_USE_EXTERNAL_SPDLOG) - find_package(spdlog REQUIRED) -else() - message(FATAL_ERROR "FF_USE_EXTERNAL_SPDLOG is required") -endif() +find_package(spdlog REQUIRED) diff --git a/cmake/utils.cmake b/cmake/utils.cmake deleted file mode 100644 index 4e23ed2e3f..0000000000 --- a/cmake/utils.cmake +++ /dev/null @@ -1,56 +0,0 @@ -set(known_gpu_archs "") -function(remove_duplicate_args __string) - if(${__string}) - set(__list ${${__string}}) - separate_arguments(__list) - list(REMOVE_DUPLICATES __list) - foreach(__e ${__list}) - set(__str "${__str} ${__e}") - endforeach() - set(${__string} ${__str} PARENT_SCOPE) - endif() -endfunction() -function(detect_installed_gpus out_variable) - if(NOT CUDA_gpu_detect_output) - set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) - file(WRITE ${__cufile} "" - "#include \n" - "int main()\n" - "{\n" - " int count = 0;\n" - " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" - " if (count == 0) return -1;\n" - " for (int device = 0; device < count; ++device)\n" - " {\n" - " cudaDeviceProp prop;\n" - " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" - " std::printf(\"%d.%d \", prop.major, prop.minor);\n" - " }\n" - " return 0;\n" - "}\n") - execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${__cufile}" - WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" - RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - if(__nvcc_res EQUAL 0) - message(STATUS "No result from nvcc so building for 2.0") - string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}") - set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_gpus tool" FORCE) - endif() - endif() - if(NOT CUDA_gpu_detect_output) - message(STATUS "Automatic GPU detection failed, Architecture is not set: ${known_gpu_archs}.") - set(${out_variable} ${known_gpu_archs} PARENT_SCOPE) - else() - remove_duplicate_args(CUDA_gpu_detect_output) - #Strip leading and trailing whitespaces - string(STRIP "${CUDA_gpu_detect_output}" CUDA_gpu_detect_output) - #Replace spaces in between with commas so you go from "5.2 6.1" to "5.2,6.1" - string(REGEX REPLACE " " "," CUDA_gpu_detect_output "${CUDA_gpu_detect_output}") - # message(${CUDA_gpu_detect_output}) - string(REPLACE "." "" CUDA_gpu_detect_output "${CUDA_gpu_detect_output}") - # message(${CUDA_gpu_detect_output}) - set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) - # message(STATUS "Automatic GPU ARCH detection: ${CUDA_gpu_detect_output}") - endif() -endfunction() diff --git a/deploy.sh b/deploy.sh index 0a0d6015c6..8a2831bee6 100755 --- a/deploy.sh +++ b/deploy.sh @@ -90,15 +90,6 @@ ff_cmake_flags=( -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=$PWD/../install -DCMAKE_CUDA_ARCHITECTURES=60 - -DFF_USE_EXTERNAL_DOCTEST=ON - -DFF_USE_EXTERNAL_EXPECTED=ON - -DFF_USE_EXTERNAL_FMT=ON - -DFF_USE_EXTERNAL_GBENCHMARK=ON - -DFF_USE_EXTERNAL_JSON=ON - -DFF_USE_EXTERNAL_LIBASSERT=ON - -DFF_USE_EXTERNAL_NCCL=ON - -DFF_USE_EXTERNAL_RAPIDCHECK=ON - -DFF_USE_EXTERNAL_SPDLOG=ON ) proj dtgen diff --git a/flake.nix b/flake.nix index da162eba26..a3202f3fb7 100644 --- a/flake.nix +++ b/flake.nix @@ -83,15 +83,7 @@ shellHook = '' export PATH="$HOME/ff/.scripts/:$PATH" export RC_PARAMS="max_discard_ratio=100" - export CMAKE_FLAGS="-DFF_USE_EXTERNAL_NCCL=ON \ - -DFF_USE_EXTERNAL_JSON=ON \ - -DFF_USE_EXTERNAL_FMT=ON \ - -DFF_USE_EXTERNAL_SPDLOG=ON \ - -DFF_USE_EXTERNAL_DOCTEST=ON \ - -DFF_USE_EXTERNAL_RAPIDCHECK=ON \ - -DFF_USE_EXTERNAL_EXPECTED=ON \ - -DFF_USE_EXTERNAL_GBENCHMARK=ON \ - -DFF_USE_EXTERNAL_LIBASSERT=ON" + export CMAKE_FLAGS="" ''; buildInputs = builtins.concatLists [ From 9207034a0e282fe75164df2eceab4dfc10b245c0 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 16:24:55 -0800 Subject: [PATCH 09/51] Remove most aliases. --- cmake/expected.cmake | 3 --- cmake/flexflow-utils.cmake | 4 ++-- cmake/fmt.cmake | 3 --- cmake/gbenchmark.cmake | 4 ---- cmake/libassert.cmake | 4 ---- cmake/nccl.cmake | 2 -- lib/utils/CMakeLists.txt | 6 +++--- 7 files changed, 5 insertions(+), 21 deletions(-) diff --git a/cmake/expected.cmake b/cmake/expected.cmake index cab1c133a9..77143ce91a 100644 --- a/cmake/expected.cmake +++ b/cmake/expected.cmake @@ -1,4 +1 @@ -include(aliasing) - find_package(tl-expected REQUIRED) -alias_library(expected tl::expected) diff --git a/cmake/flexflow-utils.cmake b/cmake/flexflow-utils.cmake index 7d5d189b1c..6250531e8f 100644 --- a/cmake/flexflow-utils.cmake +++ b/cmake/flexflow-utils.cmake @@ -180,8 +180,8 @@ function(ff_add_benchmark_executable) target_link_libraries( ${FF_BENCHMARK_EXEC_NAME} ${FF_BENCHMARK_EXEC_DEPS} - gbenchmark - gbenchmark-main) + benchmark::benchmark + benchmark::benchmark_main) define_ff_vars(${FF_BENCHMARK_EXEC_NAME}) ff_set_cxx_properties(${FF_BENCHMARK_EXEC_NAME}) diff --git a/cmake/fmt.cmake b/cmake/fmt.cmake index d7e63eb75e..c6d3a73d00 100644 --- a/cmake/fmt.cmake +++ b/cmake/fmt.cmake @@ -1,4 +1 @@ -include(aliasing) - find_package(fmt REQUIRED) -alias_library(fmt fmt::fmt) diff --git a/cmake/gbenchmark.cmake b/cmake/gbenchmark.cmake index 1dd1a5413c..2282b6846d 100644 --- a/cmake/gbenchmark.cmake +++ b/cmake/gbenchmark.cmake @@ -1,5 +1 @@ -include(aliasing) - find_package(benchmark REQUIRED) -alias_library(gbenchmark benchmark::benchmark) -alias_library(gbenchmark-main benchmark::benchmark_main) diff --git a/cmake/libassert.cmake b/cmake/libassert.cmake index c0c9fd6f3c..a6d3af9b55 100644 --- a/cmake/libassert.cmake +++ b/cmake/libassert.cmake @@ -1,5 +1 @@ -include(aliasing) - find_package(libassert REQUIRED) - -alias_library(libassert libassert::assert) diff --git a/cmake/nccl.cmake b/cmake/nccl.cmake index 5fdff30f78..8c121cc998 100644 --- a/cmake/nccl.cmake +++ b/cmake/nccl.cmake @@ -1,5 +1,3 @@ -include(aliasing) - find_package(NCCL REQUIRED) add_library(nccl INTERFACE) diff --git a/lib/utils/CMakeLists.txt b/lib/utils/CMakeLists.txt index 6b51d2cf95..fad8193164 100644 --- a/lib/utils/CMakeLists.txt +++ b/lib/utils/CMakeLists.txt @@ -8,12 +8,12 @@ ff_add_library( PRIVATE_INCLUDE src/ DEPS - expected - fmt + tl::expected + fmt::fmt json CUDA::cudart rapidcheck - libassert + libassert::assert ) add_subdirectory(ffi) From 767555ee8bbabc5938d810609434e4db02cfaba8 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 16:34:18 -0800 Subject: [PATCH 10/51] Add deployment CI job. --- .github/workflows/deploy.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .github/workflows/deploy.yml diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000000..d727ff1294 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,19 @@ +name: Test Deployment +on: [push, pull_request, workflow_dispatch] +jobs: + deploy: + name: test deployment scripts + runs-on: ubuntu-latest + container: nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 + + steps: + - name: Checkout Git Repository + uses: actions/checkout@v3 + + - name: Install Depedencies + run: | + apt-get update -qq + apt-get install -y cmake + + - name: Run Deploy Script + run: ./deploy.sh From 53cbb21da5e7ed086ff32139dd246f54c12200cf Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 16:41:01 -0800 Subject: [PATCH 11/51] Fixes for CI test. --- deploy.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/deploy.sh b/deploy.sh index 8a2831bee6..904bbd22a7 100755 --- a/deploy.sh +++ b/deploy.sh @@ -2,11 +2,11 @@ set -e -module load cuda cmake -export CC=gcc-10 -export CXX=g++-10 - -git submodule update --init +if [[ -z $CI ]]; then + module load cuda cmake + export CC=gcc-10 + export CXX=g++-10 +fi mkdir -p deploy pushd deploy From 35e5126215be847a050f41e139540202d023c846 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 16:46:13 -0800 Subject: [PATCH 12/51] Update dependencies. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index d727ff1294..2cfa9aedc1 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,7 +13,7 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y cmake + apt-get install -y build-essential cmake curl git - name: Run Deploy Script run: ./deploy.sh From 98cebbb0bf7d722b79f6de4ccb3da8ab650078e8 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 16:47:43 -0800 Subject: [PATCH 13/51] Make shellcheck happy. --- deploy.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/deploy.sh b/deploy.sh index 904bbd22a7..931e2af9cd 100755 --- a/deploy.sh +++ b/deploy.sh @@ -74,13 +74,13 @@ build_cmake_library nlohmann_json https://github.com/nlohmann/json/archive/refs/ build_cmake_library NCCL https://github.com/NVIDIA/nccl/archive/refs/tags/v2.29.7-1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON if [[ ! -e /tmp/$USER/proj ]]; then - git clone -b python-install https://github.com/elliottslaughter/proj.git /tmp/$USER/proj - pushd /tmp/$USER/proj + git clone -b python-install https://github.com/elliottslaughter/proj.git "/tmp/$USER/proj" + pushd "/tmp/$USER/proj" uv venv uv sync popd # /tmp/$USER/proj fi -source /tmp/$USER/proj/.venv/bin/activate +source "/tmp/$USER/proj/.venv/bin/activate" export PATH="$PATH:/tmp/$USER/proj/bin" export PYTHONPATH="$PYTHONPATH:/tmp/$USER/proj" @@ -88,7 +88,7 @@ popd # deploy ff_cmake_flags=( -DCMAKE_BUILD_TYPE=RelWithDebInfo - -DCMAKE_INSTALL_PREFIX=$PWD/../install + -DCMAKE_INSTALL_PREFIX="$PWD/../install" -DCMAKE_CUDA_ARCHITECTURES=60 ) From bfed1120869b78364dd7b30658e8deaf4a24cb77 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 16:50:52 -0800 Subject: [PATCH 14/51] Add MPI dependency. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 2cfa9aedc1..96fa11dd94 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,7 +13,7 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y build-essential cmake curl git + apt-get install -y build-essential cmake curl git mpich libmpich-dev - name: Run Deploy Script run: ./deploy.sh From 0672883e5976159d1d0906cfb8008a35de88121e Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 16:53:42 -0800 Subject: [PATCH 15/51] Add ibverbs. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 96fa11dd94..f0b4121fa5 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,7 +13,7 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y build-essential cmake curl git mpich libmpich-dev + apt-get install -y build-essential cmake curl git mpich libmpich-dev libibverbs-dev - name: Run Deploy Script run: ./deploy.sh From 9b5d98965ea4a6830eb3f60bcaaa1c0ec865d96a Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 17:02:35 -0800 Subject: [PATCH 16/51] Make shellcheck happy. --- deploy.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/deploy.sh b/deploy.sh index 931e2af9cd..e3f5df4b42 100755 --- a/deploy.sh +++ b/deploy.sh @@ -80,6 +80,7 @@ if [[ ! -e /tmp/$USER/proj ]]; then uv sync popd # /tmp/$USER/proj fi +# shellcheck disable=SC1090 # Must be out of source to avoid: https://github.com/lockshaw/proj/issues/16 source "/tmp/$USER/proj/.venv/bin/activate" export PATH="$PATH:/tmp/$USER/proj/bin" export PYTHONPATH="$PYTHONPATH:/tmp/$USER/proj" From 406f72d606e0b5965438975e81acdf9abb9541b6 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 17:03:45 -0800 Subject: [PATCH 17/51] Add zlib. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index f0b4121fa5..49d64fcc12 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,7 +13,7 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y build-essential cmake curl git mpich libmpich-dev libibverbs-dev + apt-get install -y build-essential cmake curl git mpich libmpich-dev libibverbs-dev zlib1g-dev - name: Run Deploy Script run: ./deploy.sh From 015529e9845c3fd5ea2bb85fe7497c0916daa5b6 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 18:15:40 -0800 Subject: [PATCH 18/51] Add Python. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 49d64fcc12..490cde152b 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,7 +13,7 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y build-essential cmake curl git mpich libmpich-dev libibverbs-dev zlib1g-dev + apt-get install -y build-essential cmake curl git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev - name: Run Deploy Script run: ./deploy.sh From 5ab2bc67aaa096dc14fabfcafb8f137d44020bbd Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 20:30:46 -0800 Subject: [PATCH 19/51] Use GCC 10 everywhere. --- .github/workflows/deploy.yml | 2 +- deploy.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 490cde152b..3274627477 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,7 +13,7 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y build-essential cmake curl git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev + apt-get install -y build-essential cmake curl gcc-10 g++-10 git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev - name: Run Deploy Script run: ./deploy.sh diff --git a/deploy.sh b/deploy.sh index e3f5df4b42..40c4d3d59c 100755 --- a/deploy.sh +++ b/deploy.sh @@ -4,9 +4,9 @@ set -e if [[ -z $CI ]]; then module load cuda cmake - export CC=gcc-10 - export CXX=g++-10 fi +export CC=gcc-10 +export CXX=g++-10 mkdir -p deploy pushd deploy From ab2603a1f98878a6d824b6f1071f897947d825af Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 21:13:38 -0800 Subject: [PATCH 20/51] Set threads for CI. --- .github/workflows/deploy.yml | 2 ++ deploy.sh | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 3274627477..06cff43be4 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -17,3 +17,5 @@ jobs: - name: Run Deploy Script run: ./deploy.sh + env: + THREADS: 4 diff --git a/deploy.sh b/deploy.sh index 40c4d3d59c..dd2795e5f6 100755 --- a/deploy.sh +++ b/deploy.sh @@ -27,7 +27,7 @@ function build_cmake_library { mkdir -p "${dep_name}"_build "${dep_name}"_install pushd "${dep_name}"_build cmake ../"${dep_name}" -DCMAKE_INSTALL_PREFIX="$PWD"/../"${dep_name}"_install "${dep_args[@]}" - make install -j20 + make install -j${THREADS:-20} popd fi export CMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH:$PWD/${dep_name}_install" @@ -98,5 +98,5 @@ proj dtgen mkdir build install pushd build cmake .. "${ff_cmake_flags[@]}" -make -j20 +make -j${THREADS:-20} popd # build From df2d2e6af6cb51291cfba5831aa05c3422982937 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 4 Mar 2026 21:19:07 -0800 Subject: [PATCH 21/51] Fix shellcheck. --- deploy.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy.sh b/deploy.sh index dd2795e5f6..92664fffd1 100755 --- a/deploy.sh +++ b/deploy.sh @@ -27,7 +27,7 @@ function build_cmake_library { mkdir -p "${dep_name}"_build "${dep_name}"_install pushd "${dep_name}"_build cmake ../"${dep_name}" -DCMAKE_INSTALL_PREFIX="$PWD"/../"${dep_name}"_install "${dep_args[@]}" - make install -j${THREADS:-20} + make install -j"${THREADS:-20}" popd fi export CMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH:$PWD/${dep_name}_install" @@ -98,5 +98,5 @@ proj dtgen mkdir build install pushd build cmake .. "${ff_cmake_flags[@]}" -make -j${THREADS:-20} +make -j"${THREADS:-20}" popd # build From 672abd2fb89d9960cc20d9f6337f5549bcc5ec17 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 09:20:45 -0800 Subject: [PATCH 22/51] Downgrade CUDA image to get an older cuDNN. --- .github/workflows/deploy.yml | 4 ++-- deploy.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 06cff43be4..ac899b37e0 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -4,7 +4,7 @@ jobs: deploy: name: test deployment scripts runs-on: ubuntu-latest - container: nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 + container: nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04 steps: - name: Checkout Git Repository @@ -13,7 +13,7 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y build-essential cmake curl gcc-10 g++-10 git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev + apt-get install -y build-essential cmake curl git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev - name: Run Deploy Script run: ./deploy.sh diff --git a/deploy.sh b/deploy.sh index 92664fffd1..c7b2c702f3 100755 --- a/deploy.sh +++ b/deploy.sh @@ -4,9 +4,9 @@ set -e if [[ -z $CI ]]; then module load cuda cmake + export CC=gcc-10 + export CXX=g++-10 fi -export CC=gcc-10 -export CXX=g++-10 mkdir -p deploy pushd deploy From 2a85d223209f5b2ed0bc72b2cb35dbbda1ffc21a Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 09:22:06 -0800 Subject: [PATCH 23/51] Fix format. --- lib/pcg/ffi/src/pcg.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pcg/ffi/src/pcg.cc b/lib/pcg/ffi/src/pcg.cc index c21b60118f..c8be9b47e7 100644 --- a/lib/pcg/ffi/src/pcg.cc +++ b/lib/pcg/ffi/src/pcg.cc @@ -1,2 +1,2 @@ #include "flexflow/pcg.h" -//#include "pcg/model_compilation.h" +// #include "pcg/model_compilation.h" From f9a3714f651818e0381f24d71229db5d4d2b606f Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 11:21:40 -0800 Subject: [PATCH 24/51] Revert "Downgrade CUDA image to get an older cuDNN." This reverts commit e34b155c7c604361d3b230a7c5bd2a4400958597. --- .github/workflows/deploy.yml | 4 ++-- deploy.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index ac899b37e0..06cff43be4 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -4,7 +4,7 @@ jobs: deploy: name: test deployment scripts runs-on: ubuntu-latest - container: nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04 + container: nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 steps: - name: Checkout Git Repository @@ -13,7 +13,7 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y build-essential cmake curl git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev + apt-get install -y build-essential cmake curl gcc-10 g++-10 git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev - name: Run Deploy Script run: ./deploy.sh diff --git a/deploy.sh b/deploy.sh index c7b2c702f3..92664fffd1 100755 --- a/deploy.sh +++ b/deploy.sh @@ -4,9 +4,9 @@ set -e if [[ -z $CI ]]; then module load cuda cmake - export CC=gcc-10 - export CXX=g++-10 fi +export CC=gcc-10 +export CXX=g++-10 mkdir -p deploy pushd deploy From 4abca1e73c4c8cf7a83354a558a2fcad0c782741 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 12:06:12 -0800 Subject: [PATCH 25/51] Fix modern cuDNN build, update thread count. --- .github/workflows/deploy.yml | 6 ++---- deploy.sh | 5 +++-- lib/kernels/src/cuda/ops/attention_kernels.cu | 4 ++++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 06cff43be4..cb07ff9fc0 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,8 +1,8 @@ -name: Test Deployment +name: deploy on: [push, pull_request, workflow_dispatch] jobs: deploy: - name: test deployment scripts + name: Test Deployment Scripts runs-on: ubuntu-latest container: nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 @@ -17,5 +17,3 @@ jobs: - name: Run Deploy Script run: ./deploy.sh - env: - THREADS: 4 diff --git a/deploy.sh b/deploy.sh index 92664fffd1..1025a2fea7 100755 --- a/deploy.sh +++ b/deploy.sh @@ -7,6 +7,7 @@ if [[ -z $CI ]]; then fi export CC=gcc-10 export CXX=g++-10 +export THREADS="${THREADS:-$(nproc)}" mkdir -p deploy pushd deploy @@ -27,7 +28,7 @@ function build_cmake_library { mkdir -p "${dep_name}"_build "${dep_name}"_install pushd "${dep_name}"_build cmake ../"${dep_name}" -DCMAKE_INSTALL_PREFIX="$PWD"/../"${dep_name}"_install "${dep_args[@]}" - make install -j"${THREADS:-20}" + make install -j"$THREADS" popd fi export CMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH:$PWD/${dep_name}_install" @@ -98,5 +99,5 @@ proj dtgen mkdir build install pushd build cmake .. "${ff_cmake_flags[@]}" -make -j"${THREADS:-20}" +make -j"$THREADS" popd # build diff --git a/lib/kernels/src/cuda/ops/attention_kernels.cu b/lib/kernels/src/cuda/ops/attention_kernels.cu index a5fd9ea648..c38f26e002 100644 --- a/lib/kernels/src/cuda/ops/attention_kernels.cu +++ b/lib/kernels/src/cuda/ops/attention_kernels.cu @@ -55,7 +55,11 @@ MHAPerDeviceState gpu_init_kernel(PerDeviceFFHandle const &handle, // Currently do not support adding bias to key/value projection assert(!add_bias_kv); +#if CUDNN_MAJOR >= 9 + unsigned attnMode = CUDNN_ATTN_QUERYMAP_ALL_TO_ONE; +#else cudnnAttnQueryMap_t attnMode = CUDNN_ATTN_QUERYMAP_ALL_TO_ONE; +#endif // Assume no beam search for now int maxBeamSize = 1; From 652727add2798e29e6d38767298523d70bfc2523 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 13:59:43 -0800 Subject: [PATCH 26/51] Put back the FindNCCL module required by Nix. --- cmake/Modules/FindNCCL.cmake | 175 +++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 cmake/Modules/FindNCCL.cmake diff --git a/cmake/Modules/FindNCCL.cmake b/cmake/Modules/FindNCCL.cmake new file mode 100644 index 0000000000..796818c0cf --- /dev/null +++ b/cmake/Modules/FindNCCL.cmake @@ -0,0 +1,175 @@ +# from https://github.com/pytorch/pytorch/blob/818b14025a1d70872b52d28a1e83e7797f6e271a/cmake/Modules/FindNCCL.cmake + +################################################################################ +# +# From PyTorch: +# +# Copyright (c) 2016- Facebook, Inc (Adam Paszke) +# Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +# Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +# Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +# Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +# Copyright (c) 2011-2013 NYU (Clement Farabet) +# Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +# Copyright (c) 2006 Idiap Research Institute (Samy Bengio) +# Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) +# +# From Caffe2: +# +# Copyright (c) 2016-present, Facebook Inc. All rights reserved. +# +# All contributions by Facebook: +# Copyright (c) 2016 Facebook Inc. +# +# All contributions by Google: +# Copyright (c) 2015 Google Inc. +# All rights reserved. +# +# All contributions by Yangqing Jia: +# Copyright (c) 2015 Yangqing Jia +# All rights reserved. +# +# All contributions by Kakao Brain: +# Copyright 2019-2020 Kakao Brain +# +# All contributions by Cruise LLC: +# Copyright (c) 2022 Cruise LLC. +# All rights reserved. +# +# All contributions from Caffe: +# Copyright(c) 2013, 2014, 2015, the respective contributors +# All rights reserved. +# +# All other contributions: +# Copyright(c) 2015, 2016 the respective contributors +# All rights reserved. +# +# Caffe2 uses a copyright model similar to Caffe: each contributor holds +# copyright over their contributions to Caffe2. The project versioning records +# all such contribution and copyright details. If a contributor wants to further +# mark their specific copyright on a particular contribution, they should +# indicate their copyright solely in the commit message of the change when it is +# committed. +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America +# and IDIAP Research Institute nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +################################################################################ + + +# Find the nccl libraries +# +# The following variables are optionally searched for defaults +# NCCL_ROOT: Base directory where all NCCL components are found +# NCCL_INCLUDE_DIR: Directory where NCCL header is found +# NCCL_LIB_DIR: Directory where NCCL library is found +# +# The following are set after configuration is done: +# NCCL_FOUND +# NCCL_INCLUDE_DIRS +# NCCL_LIBRARIES +# +# The path hints include CUDA_TOOLKIT_ROOT_DIR seeing as some folks +# install NCCL in the same location as the CUDA toolkit. +# See https://github.com/caffe2/caffe2/issues/1601 + +set(NCCL_INCLUDE_DIR $ENV{NCCL_INCLUDE_DIR} CACHE PATH "Folder contains NVIDIA NCCL headers") +set(NCCL_LIB_DIR $ENV{NCCL_LIB_DIR} CACHE PATH "Folder contains NVIDIA NCCL libraries") +set(NCCL_VERSION $ENV{NCCL_VERSION} CACHE STRING "Version of NCCL to build with") + +if ($ENV{NCCL_ROOT_DIR}) + message(WARNING "NCCL_ROOT_DIR is deprecated. Please set NCCL_ROOT instead.") +endif() +list(APPEND NCCL_ROOT $ENV{NCCL_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}) +# Compatible layer for CMake <3.12. NCCL_ROOT will be accounted in for searching paths and libraries for CMake >=3.12. +list(APPEND CMAKE_PREFIX_PATH ${NCCL_ROOT}) + +find_path(NCCL_INCLUDE_DIRS + NAMES nccl.h + HINTS ${NCCL_INCLUDE_DIR}) + +if (USE_STATIC_NCCL) + MESSAGE(STATUS "USE_STATIC_NCCL is set. Linking with static NCCL library.") + SET(NCCL_LIBNAME "nccl_static") + if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified + set(CMAKE_FIND_LIBRARY_SUFFIXES ".a.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif() +else() + SET(NCCL_LIBNAME "nccl") + if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified + set(CMAKE_FIND_LIBRARY_SUFFIXES ".so.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif() +endif() + +find_library(NCCL_LIBRARIES + NAMES ${NCCL_LIBNAME} + HINTS ${NCCL_LIB_DIR}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES) + +if(NCCL_FOUND) # obtaining NCCL version and some sanity checks + set (NCCL_HEADER_FILE "${NCCL_INCLUDE_DIRS}/nccl.h") + message (STATUS "Determining NCCL version from ${NCCL_HEADER_FILE}...") + set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) + list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS}) + include(CheckCXXSymbolExists) + check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED) + + if (NCCL_VERSION_DEFINED) + set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc") + file(WRITE ${file} " + #include + #include + int main() + { + std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl; + + int x; + ncclGetVersion(&x); + return x == NCCL_VERSION_CODE; + } +") + try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file} + RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}" + LINK_LIBRARIES ${NCCL_LIBRARIES}) + if (NOT NCCL_VERSION_MATCHED) + message(FATAL_ERROR "Found NCCL header version and library version do not match! \ +(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.") + endif() + message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}") + else() + message(STATUS "NCCL version < 2.3.5-5") + endif () + set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES}) + + message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})") + mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES) +endif() From 700e11c2285d479c414c2117651404afd618fa04 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 14:04:43 -0800 Subject: [PATCH 27/51] Test GCC build matrix. --- .github/workflows/deploy.yml | 8 +++++++- deploy.sh | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index cb07ff9fc0..a51cda6e29 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,7 +13,13 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y build-essential cmake curl gcc-10 g++-10 git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev + apt-get install -y build-essential cmake curl gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev - name: Run Deploy Script run: ./deploy.sh + env: + GCC_VERSION=${{ matrix.gcc }} + + strategy: + matrix: + gcc: [10, 11, 12] diff --git a/deploy.sh b/deploy.sh index 1025a2fea7..5d46e36d04 100755 --- a/deploy.sh +++ b/deploy.sh @@ -5,8 +5,8 @@ set -e if [[ -z $CI ]]; then module load cuda cmake fi -export CC=gcc-10 -export CXX=g++-10 +export CC=gcc-"${GCC_VERSION:-10}" +export CXX=g++-"${GCC_VERSION:-10}" export THREADS="${THREADS:-$(nproc)}" mkdir -p deploy From 21b17a4648c5b336673485135f2fdff82f462347 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 14:05:42 -0800 Subject: [PATCH 28/51] Name tests clearly. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index a51cda6e29..caf921d4a5 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -2,7 +2,7 @@ name: deploy on: [push, pull_request, workflow_dispatch] jobs: deploy: - name: Test Deployment Scripts + name: Test Deployment (GCC ${{ matrix.gcc }}) runs-on: ubuntu-latest container: nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 From 14b0970ada3141da59401c2db9030f527a81a15a Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 14:11:46 -0800 Subject: [PATCH 29/51] Stringify GCC version. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index caf921d4a5..2dca517f58 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -22,4 +22,4 @@ jobs: strategy: matrix: - gcc: [10, 11, 12] + gcc: ["10", "11", "12"] From f4f7c1005cd5106e27f5ca2a4457f6f192f24a1e Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 14:12:03 -0800 Subject: [PATCH 30/51] Fix code coverage for CUDA. --- cmake/Modules/CodeCoverage.cmake | 4 ++-- cmake/cuda.cmake | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cmake/Modules/CodeCoverage.cmake b/cmake/Modules/CodeCoverage.cmake index 2556b19f7b..5a8fe67057 100644 --- a/cmake/Modules/CodeCoverage.cmake +++ b/cmake/Modules/CodeCoverage.cmake @@ -161,7 +161,7 @@ foreach(LANG ${LANGUAGES}) endif() elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU" AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang") - message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...") + message(FATAL_ERROR "Compiler ${CMAKE_${LANG}_COMPILER_ID} is not GNU or Flang! Aborting...") endif() endforeach() @@ -748,4 +748,4 @@ function(append_coverage_compiler_flags_to_target name) if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") target_link_libraries(${name} PRIVATE gcov) endif() -endfunction() \ No newline at end of file +endfunction() diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index dddd07b831..0906e8f8d5 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -1,2 +1 @@ find_package(CUDAToolkit 11.7 REQUIRED) -enable_language(CUDA) From b115549aec9f8a022c847baade96cbf5632d80e2 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 14:14:40 -0800 Subject: [PATCH 31/51] Fix matrix. --- .github/workflows/deploy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 2dca517f58..d4c5a84a80 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -18,8 +18,8 @@ jobs: - name: Run Deploy Script run: ./deploy.sh env: - GCC_VERSION=${{ matrix.gcc }} + GCC_VERSION: ${{ matrix.gcc }} strategy: matrix: - gcc: ["10", "11", "12"] + gcc: [10, 11, 12] From 2608e8e20e180b298324e8e2f43fc90881b808f2 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 5 Mar 2026 15:41:38 -0800 Subject: [PATCH 32/51] Shut off GCC 12. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index d4c5a84a80..82b577c6e7 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -22,4 +22,4 @@ jobs: strategy: matrix: - gcc: [10, 11, 12] + gcc: [10, 11] From 916b9b204f943927a5a71f4d67e6ce1828a23e9e Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 6 Mar 2026 09:23:42 -0800 Subject: [PATCH 33/51] Remove unused FF_CUDA_ARCH flag. --- .proj.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/.proj.toml b/.proj.toml index 750f7e5955..630fbe5efd 100644 --- a/.proj.toml +++ b/.proj.toml @@ -124,5 +124,4 @@ cuda = false # ] [cmake_flags_extra] -FF_CUDA_ARCH = "60" CMAKE_CUDA_ARCHITECTURES = "60" From 08f4a33510dd0930c64bc90922e241418a254ac6 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 6 Mar 2026 09:24:46 -0800 Subject: [PATCH 34/51] Skeleton of run-model binary. --- .proj.toml | 4 ++ CMakeLists.txt | 1 + bin/CMakeLists.txt | 4 ++ bin/run-model/CMakeLists.txt | 10 ++++ bin/run-model/src/run-model/main.cc | 72 +++++++++++++++++++++++++++++ 5 files changed, 91 insertions(+) create mode 100644 bin/run-model/CMakeLists.txt create mode 100644 bin/run-model/src/run-model/main.cc diff --git a/.proj.toml b/.proj.toml index 630fbe5efd..522e11c369 100644 --- a/.proj.toml +++ b/.proj.toml @@ -111,6 +111,10 @@ cuda = false type = "bin" cuda = false +[targets.run-model] +type = "bin" +cuda = false + # default_build_targets = [ # "utils", # ] diff --git a/CMakeLists.txt b/CMakeLists.txt index 29c8143ebd..4d62faf688 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,7 @@ option(FF_BUILD_VISUALIZATION_TOOL "build substitution visualization tool" ON) option(FF_BUILD_SP_IZATION_BENCHMARKING "build sp-ization benchmarking" ON) option(FF_BUILD_ARG_PARSER "build command line argument parser" OFF) option(FF_BUILD_BIN_EXPORT_MODEL_ARCH "build export-model-arch utility" ON) +option(FF_BUILD_BIN_RUN_MODEL "build run-model binary" ON) include(cuda) include(cudnn) diff --git a/bin/CMakeLists.txt b/bin/CMakeLists.txt index 6855537460..7bf7909320 100644 --- a/bin/CMakeLists.txt +++ b/bin/CMakeLists.txt @@ -13,3 +13,7 @@ endif() if(FF_BUILD_BIN_EXPORT_MODEL_ARCH) add_subdirectory(export-model-arch) endif() + +if(FF_BUILD_BIN_RUN_MODEL) + add_subdirectory(run-model) +endif() diff --git a/bin/run-model/CMakeLists.txt b/bin/run-model/CMakeLists.txt new file mode 100644 index 0000000000..a4e3f0ea9c --- /dev/null +++ b/bin/run-model/CMakeLists.txt @@ -0,0 +1,10 @@ +ff_add_executable( + NAME + run-model + SRC_PATTERNS + src/*.cc + PRIVATE_INCLUDE + include/ + DEPS + realm-execution +) diff --git a/bin/run-model/src/run-model/main.cc b/bin/run-model/src/run-model/main.cc new file mode 100644 index 0000000000..b39b97f2a3 --- /dev/null +++ b/bin/run-model/src/run-model/main.cc @@ -0,0 +1,72 @@ +#include "realm-execution/realm_context.h" +#include "realm-execution/realm_manager.h" +#include "utils/cli/cli_get_help_message.h" +#include "utils/cli/cli_parse.h" +#include "utils/cli/cli_parse_result.h" +#include "utils/cli/cli_spec.h" +#include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/positive_int/positive_int.h" +#include + +using namespace FlexFlow; + +static char *leak_string_contents(std::string_view str) { + // Realm command-line arguments require char* so intentionally leak the + // allocated string contents here + std::vector *content = new std::vector{str.begin(), str.end()}; + content->push_back(0); // NUL byte + return content->data(); +} + +static std::vector make_realm_args(std::string_view executable_name) { + std::vector result; + result.push_back(leak_string_contents(executable_name)); + return result; +} + +int main(int argc, char **argv) { + CLISpec cli = empty_cli_spec(); + + CLIArgumentKey arg_key_help = cli_add_help_flag(cli); + + CLIArgumentKey key_mapped_pcg_json_file = cli_add_positional_argument( + cli, + CLIPositionalArgumentSpec{ + "mapped_pcg_json", + std::nullopt, + "path to a file containing mappped PCG encoded as JSON"}); + + ASSERT(argc >= 1); + std::string prog_name = argv[0]; + + CLIParseResult parsed = ({ + tl::expected result = + cli_parse(cli, argc, argv); + if (!result.has_value()) { + std::string error_msg = result.error(); + std::cerr << cli_get_help_message(prog_name, cli); + std::cerr << std::endl; + std::cerr << "error: " << error_msg << std::endl; + return 1; + } + + result.value(); + }); + + bool help = cli_get_flag(parsed, arg_key_help); + if (help) { + std::cerr << cli_get_help_message(prog_name, cli); + return 1; + } + + std::vector realm_args = make_realm_args(prog_name); + int realm_argc = realm_args.size(); + char **realm_argv = realm_args.data(); + RealmManager manager(&realm_argc, &realm_argv); + + FlexFlow::Realm::Event event = + manager.start_controller([](RealmContext &ctx) {}); + event.wait(); + + return 0; +} From 39b5eb9288facfa5566f15300c28a49ffe4a764a Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 6 Mar 2026 09:47:20 -0800 Subject: [PATCH 35/51] JSON decoding code. DO NOT BUILD. --- bin/run-model/src/run-model/main.cc | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/bin/run-model/src/run-model/main.cc b/bin/run-model/src/run-model/main.cc index b39b97f2a3..55493a4ecd 100644 --- a/bin/run-model/src/run-model/main.cc +++ b/bin/run-model/src/run-model/main.cc @@ -1,3 +1,5 @@ +#include "pcg/mapped_parallel_computation_graph/mapped_parallel_computation_graph.dtg.h" +#include "realm-execution/pcg_instance/pcg_instance.h" #include "realm-execution/realm_context.h" #include "realm-execution/realm_manager.h" #include "utils/cli/cli_get_help_message.h" @@ -6,6 +8,7 @@ #include "utils/cli/cli_spec.h" #include "utils/nonnegative_int/nonnegative_int.h" #include "utils/positive_int/positive_int.h" +#include #include using namespace FlexFlow; @@ -29,7 +32,7 @@ int main(int argc, char **argv) { CLIArgumentKey arg_key_help = cli_add_help_flag(cli); - CLIArgumentKey key_mapped_pcg_json_file = cli_add_positional_argument( + CLIArgumentKey key_mapped_pcg_json = cli_add_positional_argument( cli, CLIPositionalArgumentSpec{ "mapped_pcg_json", @@ -59,13 +62,21 @@ int main(int argc, char **argv) { return 1; } + std::string mapped_pcg_json = cli_get_argument(parsed, key_mapped_pcg_json); + std::vector realm_args = make_realm_args(prog_name); int realm_argc = realm_args.size(); char **realm_argv = realm_args.data(); RealmManager manager(&realm_argc, &realm_argv); FlexFlow::Realm::Event event = - manager.start_controller([](RealmContext &ctx) {}); + manager.start_controller([&](RealmContext &ctx) { + MappedParallelComputationGraph mpcg = [&]() { + std::ifstream f(mapped_pcg_json); + nlohmann::json mpcg_json = nlohmann::json::parse(f); + return mpcg_json.get(); + }(); + }); event.wait(); return 0; From e4bb542149d332a15eff11769fb4e7943cac62e7 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Mon, 9 Mar 2026 15:35:54 -0700 Subject: [PATCH 36/51] Version of run-model that runs a mapped PCG (without loss). --- bin/run-model/src/run-model/main.cc | 54 ++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/bin/run-model/src/run-model/main.cc b/bin/run-model/src/run-model/main.cc index 55493a4ecd..811a11933c 100644 --- a/bin/run-model/src/run-model/main.cc +++ b/bin/run-model/src/run-model/main.cc @@ -1,3 +1,4 @@ +#include "pcg/file_format/v1/v1_mapped_parallel_computation_graph.h" #include "pcg/mapped_parallel_computation_graph/mapped_parallel_computation_graph.dtg.h" #include "realm-execution/pcg_instance/pcg_instance.h" #include "realm-execution/realm_context.h" @@ -69,14 +70,51 @@ int main(int argc, char **argv) { char **realm_argv = realm_args.data(); RealmManager manager(&realm_argc, &realm_argv); - FlexFlow::Realm::Event event = - manager.start_controller([&](RealmContext &ctx) { - MappedParallelComputationGraph mpcg = [&]() { - std::ifstream f(mapped_pcg_json); - nlohmann::json mpcg_json = nlohmann::json::parse(f); - return mpcg_json.get(); - }(); - }); + FlexFlow::Realm::Event event = manager.start_controller([&](RealmContext + &ctx) { + MappedParallelComputationGraph mpcg = [&]() { + std::ifstream f(mapped_pcg_json); + nlohmann::json mpcg_json = nlohmann::json::parse(f); + return from_v1(mpcg_json.get()); + }(); + + // instantiate computation graph + OptimizerAttrs optimizer_attrs = + OptimizerAttrs{SGDOptimizerAttrs{/*lr=*/0.001, + /*momentum=*/0.9, + /*nesterov=*/false, + /*weight_decay=*/0.001}}; + + std::unordered_map input_tensors; + + DistributedDeviceHandle device_handle = + create_distributed_device_handle(ctx, + /*workSpaceSize=*/1024 * 1024, + /*allowTensorOpMathConversion=*/true); + + PCGInstance pcg_instance = create_pcg_instance( + /*ctx=*/ctx, + /*mpcg=*/mpcg, + /*optimizer=*/optimizer_attrs, + /*loss=*/std::nullopt, + /*label_tensor=*/std::nullopt, + /*logit_tensor=*/std::nullopt, + /*loss_mapping=*/std::nullopt, + /*input_tensors=*/input_tensors, + /*profiling_settings=*/ProfilingSettings{0, 0}, + /*device_handle=*/device_handle, + /*iteration_config=*/FFIterationConfig{1_p}); + + // begin training loop + int num_epochs = 5; + for (int i = 0; i < num_epochs; i++) { + perform_all_passes_for_pcg_instance( + /*instance=*/pcg_instance, + /*profiling_settings=*/ProfilingSettings{0, 0}, + /*device_handle=*/device_handle, + /*iteration_config=*/FFIterationConfig{1_p}); + } + }); event.wait(); return 0; From 00ceae1924edabaf04ba14084c14cf0a8973b240 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 25 Mar 2026 10:41:52 -0700 Subject: [PATCH 37/51] Updates for recent Realm backend changes. --- bin/run-model/src/run-model/main.cc | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/bin/run-model/src/run-model/main.cc b/bin/run-model/src/run-model/main.cc index 811a11933c..3f12e0c2e0 100644 --- a/bin/run-model/src/run-model/main.cc +++ b/bin/run-model/src/run-model/main.cc @@ -1,6 +1,7 @@ #include "pcg/file_format/v1/v1_mapped_parallel_computation_graph.h" #include "pcg/mapped_parallel_computation_graph/mapped_parallel_computation_graph.dtg.h" -#include "realm-execution/pcg_instance/pcg_instance.h" +#include "realm-execution/distributed_ff_handle.h" +#include "realm-execution/pcg_instance.h" #include "realm-execution/realm_context.h" #include "realm-execution/realm_manager.h" #include "utils/cli/cli_get_help_message.h" @@ -70,8 +71,8 @@ int main(int argc, char **argv) { char **realm_argv = realm_args.data(); RealmManager manager(&realm_argc, &realm_argv); - FlexFlow::Realm::Event event = manager.start_controller([&](RealmContext - &ctx) { + ControllerTaskResult result = manager.start_controller([&](RealmContext + &ctx) { MappedParallelComputationGraph mpcg = [&]() { std::ifstream f(mapped_pcg_json); nlohmann::json mpcg_json = nlohmann::json::parse(f); @@ -87,19 +88,16 @@ int main(int argc, char **argv) { std::unordered_map input_tensors; - DistributedDeviceHandle device_handle = - create_distributed_device_handle(ctx, - /*workSpaceSize=*/1024 * 1024, - /*allowTensorOpMathConversion=*/true); + DistributedFfHandle device_handle = + create_distributed_ff_handle(ctx, + /*workSpaceSize=*/1024 * 1024, + /*allowTensorOpMathConversion=*/true); PCGInstance pcg_instance = create_pcg_instance( /*ctx=*/ctx, /*mpcg=*/mpcg, /*optimizer=*/optimizer_attrs, /*loss=*/std::nullopt, - /*label_tensor=*/std::nullopt, - /*logit_tensor=*/std::nullopt, - /*loss_mapping=*/std::nullopt, /*input_tensors=*/input_tensors, /*profiling_settings=*/ProfilingSettings{0, 0}, /*device_handle=*/device_handle, @@ -115,7 +113,7 @@ int main(int argc, char **argv) { /*iteration_config=*/FFIterationConfig{1_p}); } }); - event.wait(); + result.wait(); return 0; } From 2c970263e7d9b01eeb6751238b7a6bdd5e88d769 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Wed, 25 Mar 2026 16:57:43 -0700 Subject: [PATCH 38/51] Fixes for bin/sp-ization-benchmarking. --- bin/sp-ization-benchmarking/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/sp-ization-benchmarking/CMakeLists.txt b/bin/sp-ization-benchmarking/CMakeLists.txt index c211ec067a..b2f2408d30 100644 --- a/bin/sp-ization-benchmarking/CMakeLists.txt +++ b/bin/sp-ization-benchmarking/CMakeLists.txt @@ -5,6 +5,8 @@ ff_add_executable( include/ SRC_PATTERNS *.cc + PRIVATE_INCLUDE + include/ DEPS utils rapidcheck From 2d8f3e8b6cca5d950af260631508c72ef751825e Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 4 Jun 2026 23:12:11 -0700 Subject: [PATCH 39/51] Use pip instead of uv in deploy script, add cli arg parsing to deploy script --- .github/workflows/deploy.yml | 4 +- deploy.sh | 88 +++++++++++++++++++++++++++--------- 2 files changed, 67 insertions(+), 25 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 82b577c6e7..39115c4696 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -16,9 +16,7 @@ jobs: apt-get install -y build-essential cmake curl gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev - name: Run Deploy Script - run: ./deploy.sh - env: - GCC_VERSION: ${{ matrix.gcc }} + run: ./deploy.sh --gcc-version ${{ matrix.gcc }} strategy: matrix: diff --git a/deploy.sh b/deploy.sh index 5d46e36d04..5b3af09601 100755 --- a/deploy.sh +++ b/deploy.sh @@ -1,17 +1,66 @@ -#!/bin/bash +#!/usr/bin/env bash -set -e +set -euo pipefail + +err_out() { + >&2 echo "$@" + exit 1 +} + +while [[ $# -gt 0 ]]; do + case $1 in + --ci) + CI="yes" + shift + ;; + --gcc-version) + GCC_VERSION="$2" + shift + shift + ;; + --jobs|-j) + THREADS="$2" + shift + shift + ;; + -*|--*) + err_out "Unknown option: $1" + ;; + *) + err_out "Unknown argument: $1" + exit 1 + ;; + esac +done if [[ -z $CI ]]; then module load cuda cmake fi -export CC=gcc-"${GCC_VERSION:-10}" -export CXX=g++-"${GCC_VERSION:-10}" -export THREADS="${THREADS:-$(nproc)}" + +: ${GCC_VERSION:=10} +: ${THREADS:=$(nproc)} + +export CC=gcc-"$GCC_VERSION" +export CXX=g++-"$$GCC_VERSION" +export THREADS="$THREADS" mkdir -p deploy pushd deploy +require_cmd() { + local cmd="$1" + if ! command -v "$cmd" >/dev/null 2>&1; then + err_out "Could not find command: $cmd" + fi +} + +require_cmd git +require_cmd curl +require_cmd tar +require_cmd cmake +require_cmd make +require_cmd python3 + function build_cmake_library { dep_name="$1" dep_url="$2" @@ -21,7 +70,8 @@ function build_cmake_library { git clone "${dep_url}" "${dep_name}" else mkdir "${dep_name}" - tar xfz <(curl -LsSf "${dep_url}") -C "${dep_name}" --strip-components=1 + curl -LsSf -o "${dep_name}.tar.gz" "${dep_url}" + tar xfz "${dep_name}.tar.gz" -C "${dep_name}" --strip-components=1 fi fi if [[ ! -e "${dep_name}"_install/include ]]; then @@ -34,12 +84,6 @@ function build_cmake_library { export CMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH:$PWD/${dep_name}_install" } -if [[ ! -e uv ]]; then - mkdir uv - XDG_BIN_HOME="$PWD"/uv sh <(curl -LsSf https://astral.sh/uv/install.sh) --no-modify-path -fi -export PATH="$PATH:$PWD/uv" - if [[ ! -e gasnet ]]; then git clone https://github.com/StanfordLegion/gasnet.git fi @@ -74,17 +118,17 @@ build_cmake_library nlohmann_json https://github.com/nlohmann/json/archive/refs/ build_cmake_library NCCL https://github.com/NVIDIA/nccl/archive/refs/tags/v2.29.7-1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -if [[ ! -e /tmp/$USER/proj ]]; then - git clone -b python-install https://github.com/elliottslaughter/proj.git "/tmp/$USER/proj" - pushd "/tmp/$USER/proj" - uv venv - uv sync - popd # /tmp/$USER/proj +mkdir -p proj/ +if [[ ! -e proj/venv ]]; then + python3 -m venv proj/venv +fi + +source proj/venv/bin/activate + +if ! command -v proj >/dev/null 2>&1 +then + pip install --require-virtualenv 'git+https://git.sr.ht/~lockshaw/proj' fi -# shellcheck disable=SC1090 # Must be out of source to avoid: https://github.com/lockshaw/proj/issues/16 -source "/tmp/$USER/proj/.venv/bin/activate" -export PATH="$PATH:/tmp/$USER/proj/bin" -export PYTHONPATH="$PYTHONPATH:/tmp/$USER/proj" popd # deploy From e9d5adf8ac4aedd4070fbc541e29cfeb6a806ea0 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 4 Jun 2026 23:17:45 -0700 Subject: [PATCH 40/51] Fix shellcheck errors --- deploy.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/deploy.sh b/deploy.sh index 5b3af09601..6d2a97bbe9 100755 --- a/deploy.sh +++ b/deploy.sh @@ -23,12 +23,11 @@ while [[ $# -gt 0 ]]; do shift shift ;; - -*|--*) + -*) err_out "Unknown option: $1" ;; *) err_out "Unknown argument: $1" - exit 1 ;; esac done @@ -37,8 +36,8 @@ if [[ -z $CI ]]; then module load cuda cmake fi -: ${GCC_VERSION:=10} -: ${THREADS:=$(nproc)} +: "${GCC_VERSION:=10}" +: "${THREADS:=$(nproc)}" export CC=gcc-"$GCC_VERSION" export CXX=g++-"$$GCC_VERSION" @@ -123,6 +122,7 @@ if [[ ! -e proj/venv ]]; then python3 -m venv proj/venv fi +# shellcheck disable=SC1091 source proj/venv/bin/activate if ! command -v proj >/dev/null 2>&1 From f915d9250ceeb84b6e57c8aa66e38b6fb596bec5 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Thu, 4 Jun 2026 23:42:44 -0700 Subject: [PATCH 41/51] Speed up clones, pull from CC and CXX rather than GCC_VERSION --- .github/workflows/deploy.yml | 5 ++++- deploy.sh | 24 +++++------------------- 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 39115c4696..273de08d38 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -16,7 +16,10 @@ jobs: apt-get install -y build-essential cmake curl gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev - name: Run Deploy Script - run: ./deploy.sh --gcc-version ${{ matrix.gcc }} + run: ./deploy.sh + env: + CC: gcc-${{ matrix.gcc }} + CXX: g++-${{ matrix.gcc }} strategy: matrix: diff --git a/deploy.sh b/deploy.sh index 6d2a97bbe9..2b58f8abb7 100755 --- a/deploy.sh +++ b/deploy.sh @@ -9,15 +9,6 @@ err_out() { while [[ $# -gt 0 ]]; do case $1 in - --ci) - CI="yes" - shift - ;; - --gcc-version) - GCC_VERSION="$2" - shift - shift - ;; --jobs|-j) THREADS="$2" shift @@ -32,16 +23,10 @@ while [[ $# -gt 0 ]]; do esac done -if [[ -z $CI ]]; then - module load cuda cmake -fi - -: "${GCC_VERSION:=10}" : "${THREADS:=$(nproc)}" +: "${CMAKE_PREFIX_PATH:=}" -export CC=gcc-"$GCC_VERSION" -export CXX=g++-"$$GCC_VERSION" -export THREADS="$THREADS" +export THREADS mkdir -p deploy pushd deploy @@ -59,6 +44,7 @@ require_cmd tar require_cmd cmake require_cmd make require_cmd python3 +require_cmd nvcc function build_cmake_library { dep_name="$1" @@ -66,7 +52,7 @@ function build_cmake_library { dep_args=("${@:3}") if [[ ! -e ${dep_name} ]]; then if [[ ${dep_url} == *.git ]]; then - git clone "${dep_url}" "${dep_name}" + git clone --depth 1 --single-branch "${dep_url}" "${dep_name}" else mkdir "${dep_name}" curl -LsSf -o "${dep_name}.tar.gz" "${dep_url}" @@ -84,7 +70,7 @@ function build_cmake_library { } if [[ ! -e gasnet ]]; then - git clone https://github.com/StanfordLegion/gasnet.git + git clone --depth 1 --single-branch https://github.com/StanfordLegion/gasnet.git fi if [[ ! -e gasnet/release ]]; then make -C gasnet CONDUIT=ibv From c87892b07b5d328368b3ba52d367fbae66a9c07f Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Fri, 5 Jun 2026 00:35:46 -0700 Subject: [PATCH 42/51] Use deps:: for third-party cmake dependencies --- .github/workflows/deploy.yml | 2 +- bin/run-model/src/run-model/main.cc | 6 ++---- cmake/cuda.cmake | 4 ++++ cmake/cudnn.cmake | 3 +++ cmake/doctestlib.cmake | 2 +- cmake/expected.cmake | 2 ++ cmake/flexflow-utils.cmake | 8 ++++---- cmake/fmt.cmake | 2 ++ cmake/gbenchmark.cmake | 3 +++ cmake/json.cmake | 2 +- cmake/libassert.cmake | 4 ++++ cmake/nccl.cmake | 3 +++ cmake/rapidcheck.cmake | 4 ++++ cmake/realm.cmake | 2 +- cmake/spdlog.cmake | 4 ++++ lib/compiler/CMakeLists.txt | 4 ++-- lib/compiler/test/CMakeLists.txt | 2 +- lib/kernels/CMakeLists.txt | 6 +++--- lib/kernels/test/CMakeLists.txt | 2 +- lib/local-execution/CMakeLists.txt | 2 +- lib/local-execution/test/CMakeLists.txt | 4 ++-- lib/models/CMakeLists.txt | 2 +- lib/models/test/CMakeLists.txt | 2 +- lib/op-attrs/test/CMakeLists.txt | 2 +- lib/pcg/CMakeLists.txt | 2 +- lib/pcg/test/CMakeLists.txt | 2 +- lib/realm-execution/CMakeLists.txt | 4 ++-- lib/realm-execution/test/CMakeLists.txt | 2 +- lib/substitution-generator/test/CMakeLists.txt | 2 +- lib/substitutions/test/CMakeLists.txt | 2 +- lib/task-spec/CMakeLists.txt | 2 +- lib/task-spec/test/CMakeLists.txt | 3 +-- lib/utils/CMakeLists.txt | 10 +++++----- lib/utils/test/CMakeLists.txt | 2 +- lib/utils/test/common/CMakeLists.txt | 4 ++-- 35 files changed, 69 insertions(+), 43 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 273de08d38..58b4765d00 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,7 +13,7 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y build-essential cmake curl gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev + apt-get install -y build-essential cmake curl gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev python3-venv - name: Run Deploy Script run: ./deploy.sh diff --git a/bin/run-model/src/run-model/main.cc b/bin/run-model/src/run-model/main.cc index 3f12e0c2e0..c996d6e99c 100644 --- a/bin/run-model/src/run-model/main.cc +++ b/bin/run-model/src/run-model/main.cc @@ -100,8 +100,7 @@ int main(int argc, char **argv) { /*loss=*/std::nullopt, /*input_tensors=*/input_tensors, /*profiling_settings=*/ProfilingSettings{0, 0}, - /*device_handle=*/device_handle, - /*iteration_config=*/FFIterationConfig{1_p}); + /*device_handle=*/device_handle); // begin training loop int num_epochs = 5; @@ -109,8 +108,7 @@ int main(int argc, char **argv) { perform_all_passes_for_pcg_instance( /*instance=*/pcg_instance, /*profiling_settings=*/ProfilingSettings{0, 0}, - /*device_handle=*/device_handle, - /*iteration_config=*/FFIterationConfig{1_p}); + /*device_handle=*/device_handle); } }); result.wait(); diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 0906e8f8d5..e12ac52589 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -1 +1,5 @@ +include(aliasing) + find_package(CUDAToolkit 11.7 REQUIRED) + +alias_library(deps::cublas CUDA::cublas) diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake index 50b4093198..326fce5cc1 100644 --- a/cmake/cudnn.cmake +++ b/cmake/cudnn.cmake @@ -1,3 +1,5 @@ +include(aliasing) + find_path(CUDNN_INCLUDE_DIR NAMES cudnn.h HINTS ${CUDAToolkit_INCLUDE_DIRS}) find_library(CUDNN_LIBRARY NAMES cudnn HINTS ${CUDAToolkit_LIBRARY_DIR}) @@ -10,4 +12,5 @@ if(CUDNN_FOUND) IMPORTED_LOCATION "${CUDNN_LIBRARY}" INTERFACE_INCLUDE_DIRECTORIES "${CUDNN_INCLUDE_DIR}" ) + alias_library(deps::cudnn cudnn) endif() diff --git a/cmake/doctestlib.cmake b/cmake/doctestlib.cmake index c203605ee6..d6b5953cd4 100644 --- a/cmake/doctestlib.cmake +++ b/cmake/doctestlib.cmake @@ -8,4 +8,4 @@ target_compile_definitions( INTERFACE DOCTEST_CONFIG_REQUIRE_STRINGIFICATION_FOR_ALL_USED_TYPES ) -alias_library(doctest doctest::doctest) +alias_library(deps::doctest doctest::doctest) diff --git a/cmake/expected.cmake b/cmake/expected.cmake index 77143ce91a..86f6a69fcd 100644 --- a/cmake/expected.cmake +++ b/cmake/expected.cmake @@ -1 +1,3 @@ +include(aliasing) find_package(tl-expected REQUIRED) +alias_library(deps::expected tl::expected) diff --git a/cmake/flexflow-utils.cmake b/cmake/flexflow-utils.cmake index 6250531e8f..5dccd5272d 100644 --- a/cmake/flexflow-utils.cmake +++ b/cmake/flexflow-utils.cmake @@ -113,8 +113,8 @@ function(ff_add_test_executable) DEPS PARSE ${ARGN} - rapidcheck - doctest + deps::rapidcheck + deps::doctest ) project(${FF_TEST_EXEC_NAME}) @@ -180,8 +180,8 @@ function(ff_add_benchmark_executable) target_link_libraries( ${FF_BENCHMARK_EXEC_NAME} ${FF_BENCHMARK_EXEC_DEPS} - benchmark::benchmark - benchmark::benchmark_main) + deps::gbenchmark + deps::gbenchmark-main) define_ff_vars(${FF_BENCHMARK_EXEC_NAME}) ff_set_cxx_properties(${FF_BENCHMARK_EXEC_NAME}) diff --git a/cmake/fmt.cmake b/cmake/fmt.cmake index c6d3a73d00..59df33a52f 100644 --- a/cmake/fmt.cmake +++ b/cmake/fmt.cmake @@ -1 +1,3 @@ +include(aliasing) find_package(fmt REQUIRED) +alias_library(deps::fmt fmt::fmt) diff --git a/cmake/gbenchmark.cmake b/cmake/gbenchmark.cmake index 2282b6846d..30b820855f 100644 --- a/cmake/gbenchmark.cmake +++ b/cmake/gbenchmark.cmake @@ -1 +1,4 @@ +include(aliasing) find_package(benchmark REQUIRED) +alias_library(deps::gbenchmark benchmark::benchmark) +alias_library(deps::gbenchmark-main benchmark::benchmark_main) diff --git a/cmake/json.cmake b/cmake/json.cmake index ea71466411..658afcb82e 100644 --- a/cmake/json.cmake +++ b/cmake/json.cmake @@ -2,4 +2,4 @@ include(aliasing) find_package(nlohmann_json REQUIRED) -alias_library(json nlohmann_json) +alias_library(deps::json nlohmann_json) diff --git a/cmake/libassert.cmake b/cmake/libassert.cmake index a6d3af9b55..f9290ee919 100644 --- a/cmake/libassert.cmake +++ b/cmake/libassert.cmake @@ -1 +1,5 @@ +include(aliasing) + find_package(libassert REQUIRED) + +alias_library(deps::libassert libassert::assert) diff --git a/cmake/nccl.cmake b/cmake/nccl.cmake index 8c121cc998..d831dde1db 100644 --- a/cmake/nccl.cmake +++ b/cmake/nccl.cmake @@ -1,5 +1,8 @@ +include(aliasing) + find_package(NCCL REQUIRED) add_library(nccl INTERFACE) target_include_directories(nccl SYSTEM INTERFACE ${NCCL_INCLUDE_DIRS}) target_link_libraries(nccl INTERFACE ${NCCL_LIBRARIES}) +alias_library(deps::nccl nccl) diff --git a/cmake/rapidcheck.cmake b/cmake/rapidcheck.cmake index 837a3e0d74..8180cc7436 100644 --- a/cmake/rapidcheck.cmake +++ b/cmake/rapidcheck.cmake @@ -1 +1,5 @@ +include(aliasing) + find_package(rapidcheck REQUIRED) + +alias_library(deps::rapidcheck rapidcheck) diff --git a/cmake/realm.cmake b/cmake/realm.cmake index 91d8f1345c..ace8f13f1c 100644 --- a/cmake/realm.cmake +++ b/cmake/realm.cmake @@ -2,4 +2,4 @@ include(aliasing) find_package(Realm REQUIRED) -alias_library(realm Realm::Realm) +alias_library(deps::realm Realm::Realm) diff --git a/cmake/spdlog.cmake b/cmake/spdlog.cmake index 00e7385d9f..3dbe0c8e95 100644 --- a/cmake/spdlog.cmake +++ b/cmake/spdlog.cmake @@ -1 +1,5 @@ +include(aliasing) + find_package(spdlog REQUIRED) + +alias_library(deps::spdlog spdlog::spdlog) diff --git a/lib/compiler/CMakeLists.txt b/lib/compiler/CMakeLists.txt index df4a9d27ae..61a8a16d79 100644 --- a/lib/compiler/CMakeLists.txt +++ b/lib/compiler/CMakeLists.txt @@ -10,9 +10,9 @@ ff_add_library( DEPS op-attrs utils - json + deps::json pcg - spdlog::spdlog + deps::spdlog substitutions ) diff --git a/lib/compiler/test/CMakeLists.txt b/lib/compiler/test/CMakeLists.txt index 3399a45f0f..9e2b532806 100644 --- a/lib/compiler/test/CMakeLists.txt +++ b/lib/compiler/test/CMakeLists.txt @@ -8,7 +8,7 @@ ff_add_test_executable( DEPS utils compiler - doctest + deps::doctest utils-test-common models ) diff --git a/lib/kernels/CMakeLists.txt b/lib/kernels/CMakeLists.txt index 92711afa69..d28cfbb94e 100644 --- a/lib/kernels/CMakeLists.txt +++ b/lib/kernels/CMakeLists.txt @@ -25,9 +25,9 @@ target_include_directories( target_link_libraries( ${project_target} op-attrs - CUDA::cublas - cudnn - nccl + deps::cublas + deps::cudnn + deps::nccl utils pcg ) diff --git a/lib/kernels/test/CMakeLists.txt b/lib/kernels/test/CMakeLists.txt index f4ae6ba56e..c3ba1c34b8 100644 --- a/lib/kernels/test/CMakeLists.txt +++ b/lib/kernels/test/CMakeLists.txt @@ -6,7 +6,7 @@ ff_add_test_executable( PRIVATE_INCLUDE src/ DEPS - doctest + deps::doctest utils-test-common kernels op-attrs diff --git a/lib/local-execution/CMakeLists.txt b/lib/local-execution/CMakeLists.txt index a3cfd4e8d3..1f63838d76 100644 --- a/lib/local-execution/CMakeLists.txt +++ b/lib/local-execution/CMakeLists.txt @@ -13,7 +13,7 @@ ff_add_library( kernels task-spec pcg - spdlog::spdlog + deps::spdlog compiler ) diff --git a/lib/local-execution/test/CMakeLists.txt b/lib/local-execution/test/CMakeLists.txt index 0e79376575..e364ed3aee 100644 --- a/lib/local-execution/test/CMakeLists.txt +++ b/lib/local-execution/test/CMakeLists.txt @@ -3,10 +3,10 @@ ff_add_test_executable( local-execution-tests SRC_PATTERNS src/*.cc - PRIVATE_INCLUDE + PRIVATE_INCLUDE src/ DEPS - doctest + deps::doctest utils-test-common local-execution kernels diff --git a/lib/models/CMakeLists.txt b/lib/models/CMakeLists.txt index 4f4b22ed47..16e076886f 100644 --- a/lib/models/CMakeLists.txt +++ b/lib/models/CMakeLists.txt @@ -11,7 +11,7 @@ ff_add_library( op-attrs utils pcg - rapidcheck + deps::rapidcheck ) add_subdirectory(test) diff --git a/lib/models/test/CMakeLists.txt b/lib/models/test/CMakeLists.txt index 9c2ae64d4e..349be009c8 100644 --- a/lib/models/test/CMakeLists.txt +++ b/lib/models/test/CMakeLists.txt @@ -7,6 +7,6 @@ ff_add_test_executable( src/ DEPS models - doctest + deps::doctest utils-test-common ) diff --git a/lib/op-attrs/test/CMakeLists.txt b/lib/op-attrs/test/CMakeLists.txt index b6ff72fc00..e79fdb45c8 100644 --- a/lib/op-attrs/test/CMakeLists.txt +++ b/lib/op-attrs/test/CMakeLists.txt @@ -8,6 +8,6 @@ ff_add_test_executable( DEPS utils op-attrs - doctest + deps::doctest utils-test-common ) diff --git a/lib/pcg/CMakeLists.txt b/lib/pcg/CMakeLists.txt index e6eb182740..7ad518bc79 100644 --- a/lib/pcg/CMakeLists.txt +++ b/lib/pcg/CMakeLists.txt @@ -10,7 +10,7 @@ ff_add_library( DEPS op-attrs utils - rapidcheck + deps::rapidcheck ) add_subdirectory(ffi) diff --git a/lib/pcg/test/CMakeLists.txt b/lib/pcg/test/CMakeLists.txt index 685d1d8b88..3aae2ed6f6 100644 --- a/lib/pcg/test/CMakeLists.txt +++ b/lib/pcg/test/CMakeLists.txt @@ -8,6 +8,6 @@ ff_add_test_executable( DEPS utils pcg - doctest + deps::doctest utils-test-common ) diff --git a/lib/realm-execution/CMakeLists.txt b/lib/realm-execution/CMakeLists.txt index 1d244febe8..49fbcfa4e0 100644 --- a/lib/realm-execution/CMakeLists.txt +++ b/lib/realm-execution/CMakeLists.txt @@ -13,10 +13,10 @@ ff_add_library( local-execution op-attrs pcg - spdlog::spdlog + deps::spdlog task-spec utils - realm + deps::realm ) add_subdirectory(test) diff --git a/lib/realm-execution/test/CMakeLists.txt b/lib/realm-execution/test/CMakeLists.txt index b3beff42c0..5cdd6c8282 100644 --- a/lib/realm-execution/test/CMakeLists.txt +++ b/lib/realm-execution/test/CMakeLists.txt @@ -6,7 +6,7 @@ ff_add_test_executable( PRIVATE_INCLUDE src/ DEPS - doctest + deps::doctest utils-test-common realm-execution kernels diff --git a/lib/substitution-generator/test/CMakeLists.txt b/lib/substitution-generator/test/CMakeLists.txt index 166c7ab51f..0a61d2e049 100644 --- a/lib/substitution-generator/test/CMakeLists.txt +++ b/lib/substitution-generator/test/CMakeLists.txt @@ -13,7 +13,7 @@ ff_add_test_executable( src/ DEPS utils - doctest + deps::doctest substitution-generator utils-test-common ) diff --git a/lib/substitutions/test/CMakeLists.txt b/lib/substitutions/test/CMakeLists.txt index cfd6383e95..d11c2eab1a 100644 --- a/lib/substitutions/test/CMakeLists.txt +++ b/lib/substitutions/test/CMakeLists.txt @@ -7,7 +7,7 @@ ff_add_test_executable( src/ DEPS utils - doctest + deps::doctest substitutions utils-test-common ) diff --git a/lib/task-spec/CMakeLists.txt b/lib/task-spec/CMakeLists.txt index 4c7fa158bf..af8f7f32e0 100644 --- a/lib/task-spec/CMakeLists.txt +++ b/lib/task-spec/CMakeLists.txt @@ -12,7 +12,7 @@ ff_add_library( utils kernels pcg - spdlog::spdlog + deps::spdlog compiler ) diff --git a/lib/task-spec/test/CMakeLists.txt b/lib/task-spec/test/CMakeLists.txt index 9665dba88e..36de69734e 100644 --- a/lib/task-spec/test/CMakeLists.txt +++ b/lib/task-spec/test/CMakeLists.txt @@ -6,9 +6,8 @@ ff_add_test_executable( PRIVATE_INCLUDE src/ DEPS - doctest + deps::doctest utils-test-common - # local-execution kernels task-spec op-attrs diff --git a/lib/utils/CMakeLists.txt b/lib/utils/CMakeLists.txt index fad8193164..73ac005ef2 100644 --- a/lib/utils/CMakeLists.txt +++ b/lib/utils/CMakeLists.txt @@ -8,12 +8,12 @@ ff_add_library( PRIVATE_INCLUDE src/ DEPS - tl::expected - fmt::fmt - json + deps::expected + deps::fmt + deps::json CUDA::cudart - rapidcheck - libassert::assert + deps::rapidcheck + deps::libassert ) add_subdirectory(ffi) diff --git a/lib/utils/test/CMakeLists.txt b/lib/utils/test/CMakeLists.txt index 660ef99e87..fb2c7e0e07 100644 --- a/lib/utils/test/CMakeLists.txt +++ b/lib/utils/test/CMakeLists.txt @@ -7,7 +7,7 @@ ff_add_test_executable( src/ DEPS utils - doctest + deps::doctest utils-test-common ) diff --git a/lib/utils/test/common/CMakeLists.txt b/lib/utils/test/common/CMakeLists.txt index 18681af9c6..1e7abcaba3 100644 --- a/lib/utils/test/common/CMakeLists.txt +++ b/lib/utils/test/common/CMakeLists.txt @@ -9,6 +9,6 @@ ff_add_library( src/ DEPS utils - rapidcheck - doctest + deps::rapidcheck + deps::doctest ) From f280fdd811ca156706cf661e2048dc353772d704 Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Fri, 5 Jun 2026 00:50:00 -0700 Subject: [PATCH 43/51] Remove unnecessary shell variables from flake.nix --- flake.nix | 2 -- 1 file changed, 2 deletions(-) diff --git a/flake.nix b/flake.nix index a3202f3fb7..3c06347214 100644 --- a/flake.nix +++ b/flake.nix @@ -81,9 +81,7 @@ devShells = rec { ci = mkShell { shellHook = '' - export PATH="$HOME/ff/.scripts/:$PATH" export RC_PARAMS="max_discard_ratio=100" - export CMAKE_FLAGS="" ''; buildInputs = builtins.concatLists [ From c735d877ff2e7d54fff9839b6f89527b097dece2 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 5 Jun 2026 15:31:53 -0700 Subject: [PATCH 44/51] Restore Sapling defaults. --- deploy.sh | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/deploy.sh b/deploy.sh index 2b58f8abb7..49d190d1ec 100755 --- a/deploy.sh +++ b/deploy.sh @@ -2,6 +2,12 @@ set -euo pipefail +# These modules are specific to Sapling, if deploying to another machine +# customize as necessary. +if [[ -z $CI ]]; then + module load cuda cmake +fi + err_out() { >&2 echo "$@" exit 1 @@ -23,9 +29,15 @@ while [[ $# -gt 0 ]]; do esac done +: "${CC:=gcc-10}" +: "${CXX:=g++-10}" +: "${CUDAARCHS:=60}" : "${THREADS:=$(nproc)}" : "${CMAKE_PREFIX_PATH:=}" +export CC +export CXX +export CUDAARCHS export THREADS mkdir -p deploy @@ -38,13 +50,15 @@ require_cmd() { fi } -require_cmd git -require_cmd curl -require_cmd tar +require_cmd "$CC" +require_cmd "$CXX" require_cmd cmake +require_cmd curl +require_cmd git require_cmd make -require_cmd python3 require_cmd nvcc +require_cmd python3 +require_cmd tar function build_cmake_library { dep_name="$1" @@ -65,6 +79,7 @@ function build_cmake_library { cmake ../"${dep_name}" -DCMAKE_INSTALL_PREFIX="$PWD"/../"${dep_name}"_install "${dep_args[@]}" make install -j"$THREADS" popd + rm -rf "${dep_name}"_build fi export CMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH:$PWD/${dep_name}_install" } @@ -121,7 +136,6 @@ popd # deploy ff_cmake_flags=( -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX="$PWD/../install" - -DCMAKE_CUDA_ARCHITECTURES=60 ) proj dtgen From 75d4d35c89bd4ceaec2fe387736ce52d123b233d Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 5 Jun 2026 15:34:12 -0700 Subject: [PATCH 45/51] Resort apt package list. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 58b4765d00..2755ec71b4 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,7 +13,7 @@ jobs: - name: Install Depedencies run: | apt-get update -qq - apt-get install -y build-essential cmake curl gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git libibverbs-dev mpich libmpich-dev python3 zlib1g-dev python3-venv + apt-get install -y build-essential cmake curl gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git libibverbs-dev libmpich-dev mpich python3 python3-venv zlib1g-dev - name: Run Deploy Script run: ./deploy.sh From 5df4784aee0bda59f7c80995a8cd00a3f3352aec Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 5 Jun 2026 15:35:55 -0700 Subject: [PATCH 46/51] Actually run iterations during training. --- bin/run-model/src/run-model/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/run-model/src/run-model/main.cc b/bin/run-model/src/run-model/main.cc index c996d6e99c..46e1186b6b 100644 --- a/bin/run-model/src/run-model/main.cc +++ b/bin/run-model/src/run-model/main.cc @@ -107,7 +107,7 @@ int main(int argc, char **argv) { for (int i = 0; i < num_epochs; i++) { perform_all_passes_for_pcg_instance( /*instance=*/pcg_instance, - /*profiling_settings=*/ProfilingSettings{0, 0}, + /*profiling_settings=*/ProfilingSettings{0, 1}, /*device_handle=*/device_handle); } }); From 06fc79706bb6bc040313ec79e7e7d3fefdd88366 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 5 Jun 2026 15:37:10 -0700 Subject: [PATCH 47/51] Rename deployment script. --- deploy.sh => deploy/sapling.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename deploy.sh => deploy/sapling.sh (100%) diff --git a/deploy.sh b/deploy/sapling.sh similarity index 100% rename from deploy.sh rename to deploy/sapling.sh From 00b247fe24358596c73609244a6a2c1bb45fca8c Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 5 Jun 2026 15:38:58 -0700 Subject: [PATCH 48/51] Fix check for CI. --- deploy/sapling.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100755 => 100644 deploy/sapling.sh diff --git a/deploy/sapling.sh b/deploy/sapling.sh old mode 100755 new mode 100644 index 49d190d1ec..f74bc1f990 --- a/deploy/sapling.sh +++ b/deploy/sapling.sh @@ -4,7 +4,7 @@ set -euo pipefail # These modules are specific to Sapling, if deploying to another machine # customize as necessary. -if [[ -z $CI ]]; then +if [[ ! -v CI ]]; then module load cuda cmake fi From fdc701ee5da1ab37566041ddbfbc59565c61d5a6 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 5 Jun 2026 15:40:27 -0700 Subject: [PATCH 49/51] Fix permissions. --- deploy/sapling.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 deploy/sapling.sh diff --git a/deploy/sapling.sh b/deploy/sapling.sh old mode 100644 new mode 100755 From 7a3a862b72029c5e0f88cf6a480f57ad8caf0b41 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 5 Jun 2026 15:43:06 -0700 Subject: [PATCH 50/51] Fix script path. --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 2755ec71b4..aad4b036b4 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -16,7 +16,7 @@ jobs: apt-get install -y build-essential cmake curl gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git libibverbs-dev libmpich-dev mpich python3 python3-venv zlib1g-dev - name: Run Deploy Script - run: ./deploy.sh + run: ./deploy/sapling.sh env: CC: gcc-${{ matrix.gcc }} CXX: g++-${{ matrix.gcc }} From c653af3fd29c8922594cdd1da86df0c5bb489def Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 5 Jun 2026 18:53:44 -0700 Subject: [PATCH 51/51] Build deps in a subdirectory. --- deploy/sapling.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/sapling.sh b/deploy/sapling.sh index f74bc1f990..04d43e252d 100755 --- a/deploy/sapling.sh +++ b/deploy/sapling.sh @@ -40,8 +40,8 @@ export CXX export CUDAARCHS export THREADS -mkdir -p deploy -pushd deploy +mkdir -p deploy/deps +pushd deploy/deps require_cmd() { local cmd="$1" @@ -131,7 +131,7 @@ then pip install --require-virtualenv 'git+https://git.sr.ht/~lockshaw/proj' fi -popd # deploy +popd # deploy/deps ff_cmake_flags=( -DCMAKE_BUILD_TYPE=RelWithDebInfo