diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000000..aad4b036b4 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,26 @@ +name: deploy +on: [push, pull_request, workflow_dispatch] +jobs: + deploy: + name: Test Deployment (GCC ${{ matrix.gcc }}) + runs-on: ubuntu-latest + container: nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 + + steps: + - name: Checkout Git Repository + uses: actions/checkout@v3 + + - name: Install Depedencies + run: | + apt-get update -qq + apt-get install -y build-essential cmake curl gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} git libibverbs-dev libmpich-dev mpich python3 python3-venv zlib1g-dev + + - name: Run Deploy Script + run: ./deploy/sapling.sh + env: + CC: gcc-${{ matrix.gcc }} + CXX: g++-${{ matrix.gcc }} + + strategy: + matrix: + gcc: [10, 11] diff --git a/.gitmodules b/.gitmodules index 24bf52a686..e69de29bb2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,24 +0,0 @@ -[submodule "deps/nccl"] - path = deps/nccl - url = https://github.com/NVIDIA/nccl.git -[submodule "deps/json"] - path = deps/json - url = https://github.com/nlohmann/json.git -[submodule "deps/spdlog"] - path = deps/spdlog - url = https://github.com/gabime/spdlog.git -[submodule "deps/rapidcheck"] - path = deps/rapidcheck - url = https://github.com/emil-e/rapidcheck.git -[submodule "deps/doctest"] - path = deps/doctest - url = https://github.com/doctest/doctest.git -[submodule "deps/visit_struct"] - path = deps/visit_struct - url = https://github.com/cbeck88/visit_struct.git -[submodule "deps/expected"] - path = deps/expected - url = https://github.com/TartanLlama/expected.git -[submodule "deps/fmt"] - path = deps/fmt - url = https://github.com/fmtlib/fmt.git diff --git a/.proj.toml b/.proj.toml index 750f7e5955..522e11c369 100644 --- a/.proj.toml +++ b/.proj.toml @@ -111,6 +111,10 @@ cuda = false type = "bin" cuda = false +[targets.run-model] +type = "bin" +cuda = false + # default_build_targets = [ # "utils", # ] @@ -124,5 +128,4 @@ cuda = false # ] [cmake_flags_extra] -FF_CUDA_ARCH = "60" CMAKE_CUDA_ARCHITECTURES = "60" diff --git a/CMakeLists.txt b/CMakeLists.txt index 4723a3168d..4d62faf688 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,25 +1,13 @@ -cmake_minimum_required(VERSION 3.10) +cmake_minimum_required(VERSION 3.18 FATAL_ERROR) project(FlexFlow) -set( - CMAKE_MODULE_PATH - ${CMAKE_MODULE_PATH} - ${CMAKE_CURRENT_LIST_DIR}/cmake +list( + APPEND + CMAKE_MODULE_PATH + ${CMAKE_CURRENT_LIST_DIR}/cmake ${CMAKE_CURRENT_LIST_DIR}/cmake/Modules ) -# Detect OS type and Linux version (if it applies) -set(LINUX_VERSION "") -if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") - find_program(LSB_RELEASE_EXEC lsb_release) - if(LSB_RELEASE_EXEC) - execute_process(COMMAND ${LSB_RELEASE_EXEC} -r --short - OUTPUT_VARIABLE LINUX_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE) - message(STATUS "Linux Version: ${LINUX_VERSION}") - endif() -endif() - set(FF_MAX_DIM "5" CACHE STRING "Maximum tensor order") set(FF_MAX_OPNAME "128" CACHE STRING "Maximum op name length") set(FF_MAX_NUM_OUTPUTS "256" CACHE STRING "Maximum number of outputs (per operator)") @@ -28,13 +16,12 @@ set(FF_MAX_NUM_WEIGHTS "64" CACHE STRING "Maximum number of weights (per operato set(FF_MAX_NUM_FUSED_OPERATORS "64" CACHE STRING "Maximum number of fused tensors") set(FF_MAX_NUM_FUSED_TENSORS "64" CACHE STRING "Maximum number of input and output tensors per fused op") set(FF_MAX_NUM_WORKERS "1024" CACHE STRING "Maximum number of GPUs") -set(FF_MAX_NUM_TASK_REGIONS "20" CACHE STRING +set(FF_MAX_NUM_TASK_REGIONS "20" CACHE STRING "Maximum number of regions that can be passed to a task through the TaskSpec interface") set(FF_MAX_NUM_TASK_ARGUMENTS "5" CACHE STRING "Maximum number of arguments that can be declared in a TaskSignature") option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF) option(FF_USE_PREALM "Build with PRealm profiling interface" ON) -option(FF_USE_ALL_PREBUILT_LIBRARIES "Enable use of all pre-compiled libraries, if available" OFF) option(FF_USE_PYTHON "Enable Python" ON) option(FF_BUILD_FROM_PYPI "Build from pypi" OFF) option(FF_USE_CODE_COVERAGE "Enable code coverage" OFF) @@ -43,16 +30,6 @@ set(FF_GPU_BACKENDS cuda hip_cuda hip_rocm intel) set(FF_GPU_BACKEND "cuda" CACHE STRING "Select GPU Backend ${FF_GPU_BACKENDS}") set_property(CACHE FF_GPU_BACKEND PROPERTY STRINGS ${FF_GPU_BACKENDS}) -option(FF_USE_EXTERNAL_NCCL "Use pre-installed NCCL" OFF) -option(FF_USE_EXTERNAL_JSON "Use pre-installed nlohmann::json" OFF) -option(FF_USE_EXTERNAL_FMT "Use pre-installed fmt" OFF) -option(FF_USE_EXTERNAL_SPDLOG "Use pre-installed spdlog" OFF) -option(FF_USE_EXTERNAL_DOCTEST "Use pre-installed doctest" OFF) -option(FF_USE_EXTERNAL_RAPIDCHECK "Use pre-installed rapidcheck" OFF) -option(FF_USE_EXTERNAL_EXPECTED "Use pre-installed tl::expected" OFF) -option(FF_USE_EXTERNAL_GBENCHMARK "Use pre-installed google benchmark" OFF) -option(FF_USE_EXTERNAL_LIBASSERT "Use pre-installed libassert" OFF) - option(FF_BUILD_RESNET "build resnet example" OFF) option(FF_BUILD_RESNEXT "build resnext example" OFF) option(FF_BUILD_ALEXNET "build alexnet example" OFF) @@ -72,15 +49,7 @@ option(FF_BUILD_VISUALIZATION_TOOL "build substitution visualization tool" ON) option(FF_BUILD_SP_IZATION_BENCHMARKING "build sp-ization benchmarking" ON) option(FF_BUILD_ARG_PARSER "build command line argument parser" OFF) option(FF_BUILD_BIN_EXPORT_MODEL_ARCH "build export-model-arch utility" ON) - -set(FF_CUDA_ARCH "autodetect" CACHE STRING "Target CUDA Arch") -if (FF_CUDA_ARCH STREQUAL "") - message(FATAL_ERROR "FF_CUDA_ARCH cannot be an empty string. Set it to `autodetect`, `all`, or pass one or multiple valid CUDA archs.") -endif() - -if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") - set(LIBEXT ".so") -endif() +option(FF_BUILD_BIN_RUN_MODEL "build run-model binary" ON) include(cuda) include(cudnn) diff --git a/bin/CMakeLists.txt b/bin/CMakeLists.txt index 6855537460..7bf7909320 100644 --- a/bin/CMakeLists.txt +++ b/bin/CMakeLists.txt @@ -13,3 +13,7 @@ endif() if(FF_BUILD_BIN_EXPORT_MODEL_ARCH) add_subdirectory(export-model-arch) endif() + +if(FF_BUILD_BIN_RUN_MODEL) + add_subdirectory(run-model) +endif() diff --git a/bin/run-model/CMakeLists.txt b/bin/run-model/CMakeLists.txt new file mode 100644 index 0000000000..a4e3f0ea9c --- /dev/null +++ b/bin/run-model/CMakeLists.txt @@ -0,0 +1,10 @@ +ff_add_executable( + NAME + run-model + SRC_PATTERNS + src/*.cc + PRIVATE_INCLUDE + include/ + DEPS + realm-execution +) diff --git a/bin/run-model/src/run-model/main.cc b/bin/run-model/src/run-model/main.cc new file mode 100644 index 0000000000..46e1186b6b --- /dev/null +++ b/bin/run-model/src/run-model/main.cc @@ -0,0 +1,117 @@ +#include "pcg/file_format/v1/v1_mapped_parallel_computation_graph.h" +#include "pcg/mapped_parallel_computation_graph/mapped_parallel_computation_graph.dtg.h" +#include "realm-execution/distributed_ff_handle.h" +#include "realm-execution/pcg_instance.h" +#include "realm-execution/realm_context.h" +#include "realm-execution/realm_manager.h" +#include "utils/cli/cli_get_help_message.h" +#include "utils/cli/cli_parse.h" +#include "utils/cli/cli_parse_result.h" +#include "utils/cli/cli_spec.h" +#include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/positive_int/positive_int.h" +#include +#include + +using namespace FlexFlow; + +static char *leak_string_contents(std::string_view str) { + // Realm command-line arguments require char* so intentionally leak the + // allocated string contents here + std::vector *content = new std::vector{str.begin(), str.end()}; + content->push_back(0); // NUL byte + return content->data(); +} + +static std::vector make_realm_args(std::string_view executable_name) { + std::vector result; + result.push_back(leak_string_contents(executable_name)); + return result; +} + +int main(int argc, char **argv) { + CLISpec cli = empty_cli_spec(); + + CLIArgumentKey arg_key_help = cli_add_help_flag(cli); + + CLIArgumentKey key_mapped_pcg_json = cli_add_positional_argument( + cli, + CLIPositionalArgumentSpec{ + "mapped_pcg_json", + std::nullopt, + "path to a file containing mappped PCG encoded as JSON"}); + + ASSERT(argc >= 1); + std::string prog_name = argv[0]; + + CLIParseResult parsed = ({ + tl::expected result = + cli_parse(cli, argc, argv); + if (!result.has_value()) { + std::string error_msg = result.error(); + std::cerr << cli_get_help_message(prog_name, cli); + std::cerr << std::endl; + std::cerr << "error: " << error_msg << std::endl; + return 1; + } + + result.value(); + }); + + bool help = cli_get_flag(parsed, arg_key_help); + if (help) { + std::cerr << cli_get_help_message(prog_name, cli); + return 1; + } + + std::string mapped_pcg_json = cli_get_argument(parsed, key_mapped_pcg_json); + + std::vector realm_args = make_realm_args(prog_name); + int realm_argc = realm_args.size(); + char **realm_argv = realm_args.data(); + RealmManager manager(&realm_argc, &realm_argv); + + ControllerTaskResult result = manager.start_controller([&](RealmContext + &ctx) { + MappedParallelComputationGraph mpcg = [&]() { + std::ifstream f(mapped_pcg_json); + nlohmann::json mpcg_json = nlohmann::json::parse(f); + return from_v1(mpcg_json.get()); + }(); + + // instantiate computation graph + OptimizerAttrs optimizer_attrs = + OptimizerAttrs{SGDOptimizerAttrs{/*lr=*/0.001, + /*momentum=*/0.9, + /*nesterov=*/false, + /*weight_decay=*/0.001}}; + + std::unordered_map input_tensors; + + DistributedFfHandle device_handle = + create_distributed_ff_handle(ctx, + /*workSpaceSize=*/1024 * 1024, + /*allowTensorOpMathConversion=*/true); + + PCGInstance pcg_instance = create_pcg_instance( + /*ctx=*/ctx, + /*mpcg=*/mpcg, + /*optimizer=*/optimizer_attrs, + /*loss=*/std::nullopt, + /*input_tensors=*/input_tensors, + /*profiling_settings=*/ProfilingSettings{0, 0}, + /*device_handle=*/device_handle); + + // begin training loop + int num_epochs = 5; + for (int i = 0; i < num_epochs; i++) { + perform_all_passes_for_pcg_instance( + /*instance=*/pcg_instance, + /*profiling_settings=*/ProfilingSettings{0, 1}, + /*device_handle=*/device_handle); + } + }); + result.wait(); + + return 0; +} diff --git a/bin/sp-ization-benchmarking/CMakeLists.txt b/bin/sp-ization-benchmarking/CMakeLists.txt index c211ec067a..b2f2408d30 100644 --- a/bin/sp-ization-benchmarking/CMakeLists.txt +++ b/bin/sp-ization-benchmarking/CMakeLists.txt @@ -5,6 +5,8 @@ ff_add_executable( include/ SRC_PATTERNS *.cc + PRIVATE_INCLUDE + include/ DEPS utils rapidcheck diff --git a/cmake/Modules/CodeCoverage.cmake b/cmake/Modules/CodeCoverage.cmake index 2556b19f7b..5a8fe67057 100644 --- a/cmake/Modules/CodeCoverage.cmake +++ b/cmake/Modules/CodeCoverage.cmake @@ -161,7 +161,7 @@ foreach(LANG ${LANGUAGES}) endif() elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU" AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang") - message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...") + message(FATAL_ERROR "Compiler ${CMAKE_${LANG}_COMPILER_ID} is not GNU or Flang! Aborting...") endif() endforeach() @@ -748,4 +748,4 @@ function(append_coverage_compiler_flags_to_target name) if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") target_link_libraries(${name} PRIVATE gcov) endif() -endfunction() \ No newline at end of file +endfunction() diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index da358e31dd..e12ac52589 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -1,89 +1,5 @@ -set(CUDA_USE_STATIC_CUDA_RUNTIME OFF) +include(aliasing) -set(CUDA_ROOT ${CUDA_PATH}) -set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_PATH}) -list(APPEND CMAKE_PREFIX_PATH ${CUDA_ROOT}) -find_package(CUDA REQUIRED) -message("cuda-libraries ${CUDA_LIBRARIES}") +find_package(CUDAToolkit 11.7 REQUIRED) -if(CUDA_FOUND) - # strip the cudart lib - string(REGEX REPLACE "[^\;]*cudart[^\;]*(\;?)" "" CUDA_LIBRARIES "${CUDA_LIBRARIES}") - set(CUDA_LIBRARIES ${CUDA_LIBRARIES}) - - # set cuda runtime and driver lib - # override cublas and curand because the FindCUDA module may not find the correct libs - set(CUDADRV_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libcuda${LIBEXT}) - set(CUDA_CUBLAS_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas${LIBEXT}) - set(CUDA_curand_LIBRARY ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand${LIBEXT}) - list(APPEND FLEXFLOW_EXT_LIBRARIES - ${CUDADRV_LIBRARIES} - ${CUDA_CUBLAS_LIBRARIES} - ${CUDA_curand_LIBRARY}) - - # Snippet below from legion/cmake/newcmake/FindCUDA.cmake - # Find the `nvcc` executable - find_program(CUDA_NVCC_EXECUTABLE - NAMES nvcc - PATHS "${CUDA_TOOLKIT_ROOT_DIR}" - ENV CUDA_PATH - ENV CUDA_BIN_PATH - PATH_SUFFIXES bin bin64 - NO_DEFAULT_PATH - ) - # Search default search paths, after we search our own set of paths. - find_program(CUDA_NVCC_EXECUTABLE nvcc) - mark_as_advanced(CUDA_NVCC_EXECUTABLE) - # Compute the CUDA version. - if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION) - execute_process (COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT}) - string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT}) - set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.") - mark_as_advanced(CUDA_VERSION) - else() - # Need to set these based off of the cached value - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${CUDA_VERSION}") - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${CUDA_VERSION}") - endif() - - # Set FF_CUDA_ARCH to the list of GPU architectures found on the machine. - if("${FF_CUDA_ARCH}" STREQUAL "autodetect") - include(utils) - detect_installed_gpus(DETECTED_CUDA_ARCH) - message( STATUS "CUDA Detected CUDA_ARCH : ${DETECTED_CUDA_ARCH}" ) - set(FF_CUDA_ARCH ${DETECTED_CUDA_ARCH}) - # Set FF_CUDA_ARCH to the list of all GPU architectures compatible with FlexFlow - elseif("${FF_CUDA_ARCH}" STREQUAL "all") - set(FF_CUDA_ARCH 60,61,62,70,72,75,80,86) - endif() - - # create CUDA_GENCODE list based on FF_CUDA_ARCH - string(REPLACE "," ";" CUDA_GENCODE "${FF_CUDA_ARCH}") - foreach(CODE ${CUDA_GENCODE}) - if(CODE LESS 60) - message( FATAL_ERROR "CUDA architecture <60 not supported") - endif() - endforeach() - string(REGEX REPLACE "([0-9]+)" "-gencode arch=compute_\\1,code=sm_\\1" CUDA_GENCODE "${CUDA_GENCODE}") - - #output - message( STATUS "CUDA_VERSION: ${CUDA_VERSION}") - message( STATUS "CUDA root path : ${CUDA_TOOLKIT_ROOT_DIR}" ) - message( STATUS "CUDA include path : ${CUDA_INCLUDE_DIRS}" ) - message( STATUS "CUDA runtime libraries : ${CUDA_LIBRARIES}" ) - message( STATUS "CUDA driver libraries : ${CUDADRV_LIBRARIES}" ) - message( STATUS "CUBLAS libraries : ${CUDA_CUBLAS_LIBRARIES}" ) - message( STATUS "CURAND libraries : ${CUDA_curand_LIBRARY}" ) - message( STATUS "CUDA Arch : ${FF_CUDA_ARCH}" ) - message( STATUS "CUDA_GENCODE: ${CUDA_GENCODE}") - - list(APPEND FLEXFLOW_INCLUDE_DIRS - ${CUDA_INCLUDE_DIRS}) - - add_library(cuda INTERFACE) - target_include_directories(cuda SYSTEM INTERFACE "${CUDA_INCLUDE_DIRS}") - -else() - message( FATAL_ERROR "CUDA package not found -> specify search path via CUDA_ROOT variable") -endif() +alias_library(deps::cublas CUDA::cublas) diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake index 1055931cef..326fce5cc1 100644 --- a/cmake/cudnn.cmake +++ b/cmake/cudnn.cmake @@ -1,46 +1,16 @@ -# find cudnn in CUDNN_ROOT and CUDA_ROOT -if(CUDNN_PATH) - set(CUDNN_ROOT ${CUDNN_PATH}) -else() - # if CUDNN_PATH is not set, let's try to find it in the CUDA root - set(CUDNN_ROOT ${CUDA_TOOLKIT_ROOT_DIR}) - message( "CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}" ) -endif() -find_library(CUDNN_LIBRARY - NAMES cudnn - PATHS ${CUDNN_ROOT} ${CUDA_ROOT} - PATH_SUFFIXES lib lib64 - DOC "CUDNN library." ) - -find_path(CUDNN_INCLUDE_DIR - NAMES cudnn.h - HINTS ${CUDNN_ROOT} ${CUDA_ROOT} - PATH_SUFFIXES include - DOC "CUDNN include directory." ) - -# find cudnn, set cudnn lib and include -if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR) - set(CUDNN_FOUND ON) - set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) - set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) -endif() +include(aliasing) -# find cuda and cudnn -if(CUDNN_FOUND) - list(APPEND FLEXFLOW_EXT_LIBRARIES - ${CUDNN_LIBRARIES}) +find_path(CUDNN_INCLUDE_DIR NAMES cudnn.h HINTS ${CUDAToolkit_INCLUDE_DIRS}) +find_library(CUDNN_LIBRARY NAMES cudnn HINTS ${CUDAToolkit_LIBRARY_DIR}) - list(APPEND FLEXFLOW_INCLUDE_DIRS - ${CUDNN_INCLUDE_DIR}) -endif() +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(CUDNN DEFAULT_MSG CUDNN_LIBRARY CUDNN_INCLUDE_DIR) if(CUDNN_FOUND) -message( STATUS "CUDNN include : ${CUDNN_INCLUDE_DIR}" ) - message( STATUS "CUDNN libraries : ${CUDNN_LIBRARIES}" ) -else() - message( FATAL_ERROR "CUDNN package not found -> specify search path via CUDNN_DIR variable") + add_library(cudnn UNKNOWN IMPORTED) + set_target_properties(cudnn PROPERTIES + IMPORTED_LOCATION "${CUDNN_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CUDNN_INCLUDE_DIR}" + ) + alias_library(deps::cudnn cudnn) endif() - -add_library(cudnn INTERFACE) -target_include_directories(cudnn SYSTEM INTERFACE ${CUDNN_INCLUDE_DIR}) -target_link_libraries(cudnn INTERFACE ${CUDNN_LIBRARY}) diff --git a/cmake/doctestlib.cmake b/cmake/doctestlib.cmake index 6a41a0d5f0..d6b5953cd4 100644 --- a/cmake/doctestlib.cmake +++ b/cmake/doctestlib.cmake @@ -1,16 +1,11 @@ include(aliasing) - -if (FF_USE_EXTERNAL_DOCTEST) - find_package(doctest REQUIRED) - include(doctest) # import doctest_discover_tests - target_compile_definitions( - doctest::doctest - INTERFACE - DOCTEST_CONFIG_REQUIRE_STRINGIFICATION_FOR_ALL_USED_TYPES - ) - alias_library(doctest doctest::doctest) -else() - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/doctest) - include(${CMAKE_CURRENT_SOURCE_DIR}/deps/doctest/scripts/cmake/doctest.cmake) -endif() +find_package(doctest REQUIRED) +include(doctest) # import doctest_discover_tests + +target_compile_definitions( + doctest::doctest + INTERFACE + DOCTEST_CONFIG_REQUIRE_STRINGIFICATION_FOR_ALL_USED_TYPES +) +alias_library(deps::doctest doctest::doctest) diff --git a/cmake/expected.cmake b/cmake/expected.cmake index 17c73532fe..86f6a69fcd 100644 --- a/cmake/expected.cmake +++ b/cmake/expected.cmake @@ -1,9 +1,3 @@ include(aliasing) -if (FF_USE_EXTERNAL_EXPECTED) - find_package(tl-expected REQUIRED) - alias_library(expected tl::expected) -else() - set(EXPECTED_BUILD_TESTS OFF) - set(EXPECTED_BUILD_PACKAGE OFF) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/expected) -endif() +find_package(tl-expected REQUIRED) +alias_library(deps::expected tl::expected) diff --git a/cmake/flexflow-utils.cmake b/cmake/flexflow-utils.cmake index 7d5d189b1c..5dccd5272d 100644 --- a/cmake/flexflow-utils.cmake +++ b/cmake/flexflow-utils.cmake @@ -113,8 +113,8 @@ function(ff_add_test_executable) DEPS PARSE ${ARGN} - rapidcheck - doctest + deps::rapidcheck + deps::doctest ) project(${FF_TEST_EXEC_NAME}) @@ -180,8 +180,8 @@ function(ff_add_benchmark_executable) target_link_libraries( ${FF_BENCHMARK_EXEC_NAME} ${FF_BENCHMARK_EXEC_DEPS} - gbenchmark - gbenchmark-main) + deps::gbenchmark + deps::gbenchmark-main) define_ff_vars(${FF_BENCHMARK_EXEC_NAME}) ff_set_cxx_properties(${FF_BENCHMARK_EXEC_NAME}) diff --git a/cmake/fmt.cmake b/cmake/fmt.cmake index 470de6a847..59df33a52f 100644 --- a/cmake/fmt.cmake +++ b/cmake/fmt.cmake @@ -1,8 +1,3 @@ include(aliasing) - -if (FF_USE_EXTERNAL_FMT) - find_package(fmt REQUIRED) -else() - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/fmt) -endif() -alias_library(fmt fmt::fmt) +find_package(fmt REQUIRED) +alias_library(deps::fmt fmt::fmt) diff --git a/cmake/gbenchmark.cmake b/cmake/gbenchmark.cmake index d5bab19a1f..30b820855f 100644 --- a/cmake/gbenchmark.cmake +++ b/cmake/gbenchmark.cmake @@ -1,10 +1,4 @@ include(aliasing) - -if (FF_USE_EXTERNAL_GBENCHMARK) - find_package(benchmark REQUIRED) - alias_library(gbenchmark benchmark::benchmark) - alias_library(gbenchmark-main benchmark::benchmark_main) -else() - message(FATAL_ERROR "Currently FF_USE_EXTERNAL_GBENCHMARK is required") -endif() - +find_package(benchmark REQUIRED) +alias_library(deps::gbenchmark benchmark::benchmark) +alias_library(deps::gbenchmark-main benchmark::benchmark_main) diff --git a/cmake/json.cmake b/cmake/json.cmake index 093ec51cdc..658afcb82e 100644 --- a/cmake/json.cmake +++ b/cmake/json.cmake @@ -1,12 +1,5 @@ include(aliasing) -if (FF_USE_EXTERNAL_JSON) - find_package(nlohmann_json REQUIRED) +find_package(nlohmann_json REQUIRED) - alias_library(json nlohmann_json) -else() - set(JSON_BuildTests OFF CACHE INTERNAL "") - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/json) - - alias_library(json nlohmann_json::nlohmann_json) -endif() +alias_library(deps::json nlohmann_json) diff --git a/cmake/libassert.cmake b/cmake/libassert.cmake index 3abe03b014..f9290ee919 100644 --- a/cmake/libassert.cmake +++ b/cmake/libassert.cmake @@ -1,9 +1,5 @@ include(aliasing) -if(FF_USE_EXTERNAL_LIBASSERT) - find_package(libassert REQUIRED) -else() - message(FATAL_ERROR "Currently FF_USE_EXTERNAL_LIBASSERT is required") -endif() +find_package(libassert REQUIRED) -alias_library(libassert libassert::assert) +alias_library(deps::libassert libassert::assert) diff --git a/cmake/nccl.cmake b/cmake/nccl.cmake index 755fe00f1b..d831dde1db 100644 --- a/cmake/nccl.cmake +++ b/cmake/nccl.cmake @@ -1,37 +1,8 @@ include(aliasing) -add_library(nccl INTERFACE) - -if (FF_USE_EXTERNAL_NCCL) - find_package(NCCL REQUIRED) -else() - message(STATUS "Building NCCL from source") - list(TRANSFORM CUDA_GENCODE PREPEND "NVCC_GENCODE=" OUTPUT_VARIABLE NCCL_BUILD_NVCC_GENCODE) - - include(ExternalProject) - ExternalProject_Add(nccl_source_build - SOURCE_DIR ${PROJECT_SOURCE_DIR}/deps/${NCCL_NAME} - PREFIX ${CMAKE_BINARY_DIR}/deps/${NCCL_NAME} - INSTALL_DIR ${CMAKE_BINARY_DIR}/deps/${NCCL_NAME} - BUILD_BYPRODUCTS ${CMAKE_BINARY_DIR}/deps/${NCCL_NAME}/lib/libnccl${LIBEXT} - INSTALL_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND make src.build "${NCCL_BUILD_NVCC_GENCODE}" "CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}" "BUILDDIR=${CMAKE_BINARY_DIR}/deps/${NCCL_NAME}" - BUILD_IN_SOURCE 1 - ) - - ExternalProject_Get_Property(nccl_source_build INSTALL_DIR) - set_directory_properties(PROPERTIES ADDITIONAL_CLEAN_FILES "${CMAKE_BINARY_DIR}/deps/nccl_source_build/lib/") - - install(DIRECTORY ${CMAKE_BINARY_DIR}/deps/nccl_source_build/include/ DESTINATION include) - install(DIRECTORY ${CMAKE_BINARY_DIR}/deps/nccl_source_build/lib/ DESTINATION lib PATTERN "pkgconfig" EXCLUDE) +find_package(NCCL REQUIRED) - set(NCCL_INCLUDE_DIR "${INSTALL_DIR}/include") - set(NCCL_LIBRARIES "${INSTALL_DIR}/lib/libnccl${LIBEXT}") - - add_dependencies(nccl nccl_source_build) -endif() - -message(STATUS "NCCL_LIBRARIES = ${NCCL_LIBRARIES}") +add_library(nccl INTERFACE) target_include_directories(nccl SYSTEM INTERFACE ${NCCL_INCLUDE_DIRS}) target_link_libraries(nccl INTERFACE ${NCCL_LIBRARIES}) +alias_library(deps::nccl nccl) diff --git a/cmake/rapidcheck.cmake b/cmake/rapidcheck.cmake index bf8f058e63..8180cc7436 100644 --- a/cmake/rapidcheck.cmake +++ b/cmake/rapidcheck.cmake @@ -1,5 +1,5 @@ -if (FF_USE_EXTERNAL_RAPIDCHECK) - find_package(rapidcheck REQUIRED) -else() - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/rapidcheck) -endif() +include(aliasing) + +find_package(rapidcheck REQUIRED) + +alias_library(deps::rapidcheck rapidcheck) diff --git a/cmake/realm.cmake b/cmake/realm.cmake index 91d8f1345c..ace8f13f1c 100644 --- a/cmake/realm.cmake +++ b/cmake/realm.cmake @@ -2,4 +2,4 @@ include(aliasing) find_package(Realm REQUIRED) -alias_library(realm Realm::Realm) +alias_library(deps::realm Realm::Realm) diff --git a/cmake/spdlog.cmake b/cmake/spdlog.cmake index 5ba1d6cc15..3dbe0c8e95 100644 --- a/cmake/spdlog.cmake +++ b/cmake/spdlog.cmake @@ -1,12 +1,5 @@ include(aliasing) -if (FF_USE_EXTERNAL_SPDLOG) - find_package(spdlog REQUIRED) -else() - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/spdlog) -endif() +find_package(spdlog REQUIRED) -add_library(ff_spdlog INTERFACE) -target_link_libraries(ff_spdlog INTERFACE spdlog::spdlog) -target_compile_definitions(ff_spdlog INTERFACE SPDLOG_FMT_EXTERNAL) -alias_library(spdlog ff_spdlog) +alias_library(deps::spdlog spdlog::spdlog) diff --git a/cmake/utils.cmake b/cmake/utils.cmake deleted file mode 100644 index 4e23ed2e3f..0000000000 --- a/cmake/utils.cmake +++ /dev/null @@ -1,56 +0,0 @@ -set(known_gpu_archs "") -function(remove_duplicate_args __string) - if(${__string}) - set(__list ${${__string}}) - separate_arguments(__list) - list(REMOVE_DUPLICATES __list) - foreach(__e ${__list}) - set(__str "${__str} ${__e}") - endforeach() - set(${__string} ${__str} PARENT_SCOPE) - endif() -endfunction() -function(detect_installed_gpus out_variable) - if(NOT CUDA_gpu_detect_output) - set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) - file(WRITE ${__cufile} "" - "#include \n" - "int main()\n" - "{\n" - " int count = 0;\n" - " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" - " if (count == 0) return -1;\n" - " for (int device = 0; device < count; ++device)\n" - " {\n" - " cudaDeviceProp prop;\n" - " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" - " std::printf(\"%d.%d \", prop.major, prop.minor);\n" - " }\n" - " return 0;\n" - "}\n") - execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${__cufile}" - WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" - RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - if(__nvcc_res EQUAL 0) - message(STATUS "No result from nvcc so building for 2.0") - string(REPLACE "2.1" "2.1(2.0)" __nvcc_out "${__nvcc_out}") - set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_gpus tool" FORCE) - endif() - endif() - if(NOT CUDA_gpu_detect_output) - message(STATUS "Automatic GPU detection failed, Architecture is not set: ${known_gpu_archs}.") - set(${out_variable} ${known_gpu_archs} PARENT_SCOPE) - else() - remove_duplicate_args(CUDA_gpu_detect_output) - #Strip leading and trailing whitespaces - string(STRIP "${CUDA_gpu_detect_output}" CUDA_gpu_detect_output) - #Replace spaces in between with commas so you go from "5.2 6.1" to "5.2,6.1" - string(REGEX REPLACE " " "," CUDA_gpu_detect_output "${CUDA_gpu_detect_output}") - # message(${CUDA_gpu_detect_output}) - string(REPLACE "." "" CUDA_gpu_detect_output "${CUDA_gpu_detect_output}") - # message(${CUDA_gpu_detect_output}) - set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) - # message(STATUS "Automatic GPU ARCH detection: ${CUDA_gpu_detect_output}") - endif() -endfunction() diff --git a/cmake/zlib.cmake b/cmake/zlib.cmake deleted file mode 100644 index 0281e02b88..0000000000 --- a/cmake/zlib.cmake +++ /dev/null @@ -1,8 +0,0 @@ -find_package(ZLIB REQUIRED) -if(ZLIB_FOUND) - list(APPEND FLEXFLOW_EXT_LIBRARIES - ${ZLIB_LIBRARIES}) - message( STATUS "ZLIB libraries : ${ZLIB_LIBRARIES}" ) -else() - message( FATAL_ERROR "ZLIB package not found") -endif() \ No newline at end of file diff --git a/deploy/sapling.sh b/deploy/sapling.sh new file mode 100755 index 0000000000..04d43e252d --- /dev/null +++ b/deploy/sapling.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# These modules are specific to Sapling, if deploying to another machine +# customize as necessary. +if [[ ! -v CI ]]; then + module load cuda cmake +fi + +err_out() { + >&2 echo "$@" + exit 1 +} + +while [[ $# -gt 0 ]]; do + case $1 in + --jobs|-j) + THREADS="$2" + shift + shift + ;; + -*) + err_out "Unknown option: $1" + ;; + *) + err_out "Unknown argument: $1" + ;; + esac +done + +: "${CC:=gcc-10}" +: "${CXX:=g++-10}" +: "${CUDAARCHS:=60}" +: "${THREADS:=$(nproc)}" +: "${CMAKE_PREFIX_PATH:=}" + +export CC +export CXX +export CUDAARCHS +export THREADS + +mkdir -p deploy/deps +pushd deploy/deps + +require_cmd() { + local cmd="$1" + if ! command -v "$cmd" >/dev/null 2>&1; then + err_out "Could not find command: $cmd" + fi +} + +require_cmd "$CC" +require_cmd "$CXX" +require_cmd cmake +require_cmd curl +require_cmd git +require_cmd make +require_cmd nvcc +require_cmd python3 +require_cmd tar + +function build_cmake_library { + dep_name="$1" + dep_url="$2" + dep_args=("${@:3}") + if [[ ! -e ${dep_name} ]]; then + if [[ ${dep_url} == *.git ]]; then + git clone --depth 1 --single-branch "${dep_url}" "${dep_name}" + else + mkdir "${dep_name}" + curl -LsSf -o "${dep_name}.tar.gz" "${dep_url}" + tar xfz "${dep_name}.tar.gz" -C "${dep_name}" --strip-components=1 + fi + fi + if [[ ! -e "${dep_name}"_install/include ]]; then + mkdir -p "${dep_name}"_build "${dep_name}"_install + pushd "${dep_name}"_build + cmake ../"${dep_name}" -DCMAKE_INSTALL_PREFIX="$PWD"/../"${dep_name}"_install "${dep_args[@]}" + make install -j"$THREADS" + popd + rm -rf "${dep_name}"_build + fi + export CMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH:$PWD/${dep_name}_install" +} + +if [[ ! -e gasnet ]]; then + git clone --depth 1 --single-branch https://github.com/StanfordLegion/gasnet.git +fi +if [[ ! -e gasnet/release ]]; then + make -C gasnet CONDUIT=ibv +fi +export GASNet_ROOT="$PWD"/gasnet/release + +set -x + +build_cmake_library zstd https://github.com/facebook/zstd.git -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library fmt https://github.com/fmtlib/fmt/archive/refs/tags/10.2.1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library cpptrace https://github.com/jeremy-rifkin/cpptrace/archive/refs/tags/v1.0.4.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCPPTRACE_USE_EXTERNAL_ZSTD=ON + +build_cmake_library libassert https://github.com/jeremy-rifkin/libassert/archive/refs/tags/v2.2.1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DLIBASSERT_USE_EXTERNAL_CPPTRACE=ON + +build_cmake_library Realm https://github.com/StanfordLegion/realm.git -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_SHARED_LIBS=ON -DREALM_ENABLE_CUDA=ON -DREALM_ENABLE_PREALM=ON -DREALM_ENABLE_CPPTRACE=ON -DREALM_ENABLE_HDF5=OFF -DREALM_MAX_DIM=5 + +build_cmake_library benchmark https://github.com/google/benchmark/archive/refs/tags/v1.9.5.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON + +build_cmake_library rapidcheck https://github.com/emil-e/rapidcheck.git -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library tl-expected https://github.com/TartanLlama/expected/archive/refs/tags/v1.3.1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library doctest https://github.com/doctest/doctest/archive/refs/tags/v2.4.12.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library spdlog https://github.com/gabime/spdlog/archive/refs/tags/v1.17.0.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DSPDLOG_FMT_EXTERNAL=ON + +build_cmake_library nlohmann_json https://github.com/nlohmann/json/archive/refs/tags/v3.12.0.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +build_cmake_library NCCL https://github.com/NVIDIA/nccl/archive/refs/tags/v2.29.7-1.tar.gz -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON + +mkdir -p proj/ +if [[ ! -e proj/venv ]]; then + python3 -m venv proj/venv +fi + +# shellcheck disable=SC1091 +source proj/venv/bin/activate + +if ! command -v proj >/dev/null 2>&1 +then + pip install --require-virtualenv 'git+https://git.sr.ht/~lockshaw/proj' +fi + +popd # deploy/deps + +ff_cmake_flags=( + -DCMAKE_BUILD_TYPE=RelWithDebInfo + -DCMAKE_INSTALL_PREFIX="$PWD/../install" +) + +proj dtgen + +mkdir build install +pushd build +cmake .. "${ff_cmake_flags[@]}" +make -j"$THREADS" +popd # build diff --git a/deps/doctest b/deps/doctest deleted file mode 160000 index b7c21ec5ce..0000000000 --- a/deps/doctest +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b7c21ec5ceeadb4951b00396fc1e4642dd347e5f diff --git a/deps/expected b/deps/expected deleted file mode 160000 index 292eff8bd8..0000000000 --- a/deps/expected +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 292eff8bd8ee230a7df1d6a1c00c4ea0eb2f0362 diff --git a/deps/fmt b/deps/fmt deleted file mode 160000 index f5e54359df..0000000000 --- a/deps/fmt +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f5e54359df4c26b6230fc61d38aa294581393084 diff --git a/deps/json b/deps/json deleted file mode 160000 index 4f8fba1406..0000000000 --- a/deps/json +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4f8fba14066156b73f1189a2b8bd568bde5284c5 diff --git a/deps/nccl b/deps/nccl deleted file mode 160000 index 3996562690..0000000000 --- a/deps/nccl +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 399656269027c1818fc999ccf8ec4dd838cec50d diff --git a/deps/rapidcheck b/deps/rapidcheck deleted file mode 160000 index 1505cbbce7..0000000000 --- a/deps/rapidcheck +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 1505cbbce733bde3b78042cf2e9309c0b7f227a2 diff --git a/deps/spdlog b/deps/spdlog deleted file mode 160000 index ad0e89cbfb..0000000000 --- a/deps/spdlog +++ /dev/null @@ -1 +0,0 @@ -Subproject commit ad0e89cbfb4d0c1ce4d097e134eb7be67baebb36 diff --git a/deps/visit_struct b/deps/visit_struct deleted file mode 160000 index add1752b7a..0000000000 --- a/deps/visit_struct +++ /dev/null @@ -1 +0,0 @@ -Subproject commit add1752b7a1d806da4cee6aa135518003afd5bf7 diff --git a/flake.nix b/flake.nix index da162eba26..3c06347214 100644 --- a/flake.nix +++ b/flake.nix @@ -81,17 +81,7 @@ devShells = rec { ci = mkShell { shellHook = '' - export PATH="$HOME/ff/.scripts/:$PATH" export RC_PARAMS="max_discard_ratio=100" - export CMAKE_FLAGS="-DFF_USE_EXTERNAL_NCCL=ON \ - -DFF_USE_EXTERNAL_JSON=ON \ - -DFF_USE_EXTERNAL_FMT=ON \ - -DFF_USE_EXTERNAL_SPDLOG=ON \ - -DFF_USE_EXTERNAL_DOCTEST=ON \ - -DFF_USE_EXTERNAL_RAPIDCHECK=ON \ - -DFF_USE_EXTERNAL_EXPECTED=ON \ - -DFF_USE_EXTERNAL_GBENCHMARK=ON \ - -DFF_USE_EXTERNAL_LIBASSERT=ON" ''; buildInputs = builtins.concatLists [ diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 501de709c1..c50e35620a 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -6,7 +6,7 @@ add_subdirectory(local-execution) add_subdirectory(realm-execution) add_subdirectory(task-spec) add_subdirectory(utils) -add_subdirectory(ffi) +#add_subdirectory(ffi) add_subdirectory(substitutions) add_subdirectory(substitution-generator) add_subdirectory(models) diff --git a/lib/compiler/CMakeLists.txt b/lib/compiler/CMakeLists.txt index 26fdc8a38a..61a8a16d79 100644 --- a/lib/compiler/CMakeLists.txt +++ b/lib/compiler/CMakeLists.txt @@ -10,9 +10,9 @@ ff_add_library( DEPS op-attrs utils - json + deps::json pcg - spdlog + deps::spdlog substitutions ) diff --git a/lib/compiler/test/CMakeLists.txt b/lib/compiler/test/CMakeLists.txt index 3399a45f0f..9e2b532806 100644 --- a/lib/compiler/test/CMakeLists.txt +++ b/lib/compiler/test/CMakeLists.txt @@ -8,7 +8,7 @@ ff_add_test_executable( DEPS utils compiler - doctest + deps::doctest utils-test-common models ) diff --git a/lib/kernels/CMakeLists.txt b/lib/kernels/CMakeLists.txt index f5d88f102f..d28cfbb94e 100644 --- a/lib/kernels/CMakeLists.txt +++ b/lib/kernels/CMakeLists.txt @@ -1,6 +1,6 @@ set(project_target kernels) -project(${project_target} +project(${project_target} LANGUAGES CXX CUDA) file(GLOB_RECURSE SRC @@ -25,9 +25,9 @@ target_include_directories( target_link_libraries( ${project_target} op-attrs - cuda - cudnn - nccl + deps::cublas + deps::cudnn + deps::nccl utils pcg ) @@ -35,8 +35,8 @@ target_link_libraries( define_ff_vars(${project_target}) set_target_properties( - ${project_target} - PROPERTIES + ${project_target} + PROPERTIES CUDA_STANDARD 17 ) diff --git a/lib/kernels/src/cuda/ops/attention_kernels.cu b/lib/kernels/src/cuda/ops/attention_kernels.cu index a5fd9ea648..c38f26e002 100644 --- a/lib/kernels/src/cuda/ops/attention_kernels.cu +++ b/lib/kernels/src/cuda/ops/attention_kernels.cu @@ -55,7 +55,11 @@ MHAPerDeviceState gpu_init_kernel(PerDeviceFFHandle const &handle, // Currently do not support adding bias to key/value projection assert(!add_bias_kv); +#if CUDNN_MAJOR >= 9 + unsigned attnMode = CUDNN_ATTN_QUERYMAP_ALL_TO_ONE; +#else cudnnAttnQueryMap_t attnMode = CUDNN_ATTN_QUERYMAP_ALL_TO_ONE; +#endif // Assume no beam search for now int maxBeamSize = 1; diff --git a/lib/kernels/test/CMakeLists.txt b/lib/kernels/test/CMakeLists.txt index 981f87b3d8..c3ba1c34b8 100644 --- a/lib/kernels/test/CMakeLists.txt +++ b/lib/kernels/test/CMakeLists.txt @@ -3,16 +3,12 @@ ff_add_test_executable( kernels-tests SRC_PATTERNS src/*.cc - PRIVATE_INCLUDE + PRIVATE_INCLUDE src/ DEPS - doctest + deps::doctest utils-test-common kernels op-attrs - cuda - cudnn - cudart - cublas pcg ) diff --git a/lib/local-execution/CMakeLists.txt b/lib/local-execution/CMakeLists.txt index b75f81fb3e..1f63838d76 100644 --- a/lib/local-execution/CMakeLists.txt +++ b/lib/local-execution/CMakeLists.txt @@ -13,7 +13,7 @@ ff_add_library( kernels task-spec pcg - spdlog + deps::spdlog compiler ) diff --git a/lib/local-execution/test/CMakeLists.txt b/lib/local-execution/test/CMakeLists.txt index 0e79376575..e364ed3aee 100644 --- a/lib/local-execution/test/CMakeLists.txt +++ b/lib/local-execution/test/CMakeLists.txt @@ -3,10 +3,10 @@ ff_add_test_executable( local-execution-tests SRC_PATTERNS src/*.cc - PRIVATE_INCLUDE + PRIVATE_INCLUDE src/ DEPS - doctest + deps::doctest utils-test-common local-execution kernels diff --git a/lib/models/CMakeLists.txt b/lib/models/CMakeLists.txt index 4f4b22ed47..16e076886f 100644 --- a/lib/models/CMakeLists.txt +++ b/lib/models/CMakeLists.txt @@ -11,7 +11,7 @@ ff_add_library( op-attrs utils pcg - rapidcheck + deps::rapidcheck ) add_subdirectory(test) diff --git a/lib/models/test/CMakeLists.txt b/lib/models/test/CMakeLists.txt index 9c2ae64d4e..349be009c8 100644 --- a/lib/models/test/CMakeLists.txt +++ b/lib/models/test/CMakeLists.txt @@ -7,6 +7,6 @@ ff_add_test_executable( src/ DEPS models - doctest + deps::doctest utils-test-common ) diff --git a/lib/op-attrs/test/CMakeLists.txt b/lib/op-attrs/test/CMakeLists.txt index b6ff72fc00..e79fdb45c8 100644 --- a/lib/op-attrs/test/CMakeLists.txt +++ b/lib/op-attrs/test/CMakeLists.txt @@ -8,6 +8,6 @@ ff_add_test_executable( DEPS utils op-attrs - doctest + deps::doctest utils-test-common ) diff --git a/lib/pcg/CMakeLists.txt b/lib/pcg/CMakeLists.txt index e6eb182740..7ad518bc79 100644 --- a/lib/pcg/CMakeLists.txt +++ b/lib/pcg/CMakeLists.txt @@ -10,7 +10,7 @@ ff_add_library( DEPS op-attrs utils - rapidcheck + deps::rapidcheck ) add_subdirectory(ffi) diff --git a/lib/pcg/ffi/src/pcg.cc b/lib/pcg/ffi/src/pcg.cc index 4e1bd55bc3..c8be9b47e7 100644 --- a/lib/pcg/ffi/src/pcg.cc +++ b/lib/pcg/ffi/src/pcg.cc @@ -1,2 +1,2 @@ #include "flexflow/pcg.h" -#include "pcg/model_compilation.h" +// #include "pcg/model_compilation.h" diff --git a/lib/pcg/test/CMakeLists.txt b/lib/pcg/test/CMakeLists.txt index 685d1d8b88..3aae2ed6f6 100644 --- a/lib/pcg/test/CMakeLists.txt +++ b/lib/pcg/test/CMakeLists.txt @@ -8,6 +8,6 @@ ff_add_test_executable( DEPS utils pcg - doctest + deps::doctest utils-test-common ) diff --git a/lib/realm-execution/CMakeLists.txt b/lib/realm-execution/CMakeLists.txt index 25a51ada54..49fbcfa4e0 100644 --- a/lib/realm-execution/CMakeLists.txt +++ b/lib/realm-execution/CMakeLists.txt @@ -13,10 +13,10 @@ ff_add_library( local-execution op-attrs pcg - spdlog + deps::spdlog task-spec utils - realm + deps::realm ) add_subdirectory(test) diff --git a/lib/realm-execution/test/CMakeLists.txt b/lib/realm-execution/test/CMakeLists.txt index b3beff42c0..5cdd6c8282 100644 --- a/lib/realm-execution/test/CMakeLists.txt +++ b/lib/realm-execution/test/CMakeLists.txt @@ -6,7 +6,7 @@ ff_add_test_executable( PRIVATE_INCLUDE src/ DEPS - doctest + deps::doctest utils-test-common realm-execution kernels diff --git a/lib/substitution-generator/test/CMakeLists.txt b/lib/substitution-generator/test/CMakeLists.txt index 166c7ab51f..0a61d2e049 100644 --- a/lib/substitution-generator/test/CMakeLists.txt +++ b/lib/substitution-generator/test/CMakeLists.txt @@ -13,7 +13,7 @@ ff_add_test_executable( src/ DEPS utils - doctest + deps::doctest substitution-generator utils-test-common ) diff --git a/lib/substitutions/test/CMakeLists.txt b/lib/substitutions/test/CMakeLists.txt index cfd6383e95..d11c2eab1a 100644 --- a/lib/substitutions/test/CMakeLists.txt +++ b/lib/substitutions/test/CMakeLists.txt @@ -7,7 +7,7 @@ ff_add_test_executable( src/ DEPS utils - doctest + deps::doctest substitutions utils-test-common ) diff --git a/lib/task-spec/CMakeLists.txt b/lib/task-spec/CMakeLists.txt index 3c7c91af67..af8f7f32e0 100644 --- a/lib/task-spec/CMakeLists.txt +++ b/lib/task-spec/CMakeLists.txt @@ -12,7 +12,7 @@ ff_add_library( utils kernels pcg - spdlog + deps::spdlog compiler ) diff --git a/lib/task-spec/test/CMakeLists.txt b/lib/task-spec/test/CMakeLists.txt index 9665dba88e..36de69734e 100644 --- a/lib/task-spec/test/CMakeLists.txt +++ b/lib/task-spec/test/CMakeLists.txt @@ -6,9 +6,8 @@ ff_add_test_executable( PRIVATE_INCLUDE src/ DEPS - doctest + deps::doctest utils-test-common - # local-execution kernels task-spec op-attrs diff --git a/lib/utils/CMakeLists.txt b/lib/utils/CMakeLists.txt index e2f7c433d6..73ac005ef2 100644 --- a/lib/utils/CMakeLists.txt +++ b/lib/utils/CMakeLists.txt @@ -8,13 +8,13 @@ ff_add_library( PRIVATE_INCLUDE src/ DEPS - expected - fmt - json - cuda - rapidcheck - libassert -) + deps::expected + deps::fmt + deps::json + CUDA::cudart + deps::rapidcheck + deps::libassert +) add_subdirectory(ffi) add_subdirectory(test) diff --git a/lib/utils/test/CMakeLists.txt b/lib/utils/test/CMakeLists.txt index 660ef99e87..fb2c7e0e07 100644 --- a/lib/utils/test/CMakeLists.txt +++ b/lib/utils/test/CMakeLists.txt @@ -7,7 +7,7 @@ ff_add_test_executable( src/ DEPS utils - doctest + deps::doctest utils-test-common ) diff --git a/lib/utils/test/common/CMakeLists.txt b/lib/utils/test/common/CMakeLists.txt index 18681af9c6..1e7abcaba3 100644 --- a/lib/utils/test/common/CMakeLists.txt +++ b/lib/utils/test/common/CMakeLists.txt @@ -9,6 +9,6 @@ ff_add_library( src/ DEPS utils - rapidcheck - doctest + deps::rapidcheck + deps::doctest )