Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
b6d00f8
Add support for replicate op in distributed training
Apr 9, 2026
3405621
Add support for replicate op in distributed training
Apr 9, 2026
d033e22
remove ReplicateAttr
Apr 15, 2026
6cd7060
Add comments to realm reductions, Use existing graph methods
Apr 15, 2026
adff3ca
Merge branch 'master' into realm-parallel-operators-replicate
lockshaw May 15, 2026
c50f384
Minor PR fixes
lockshaw May 16, 2026
ac4fffc
Clean up pass expansion code
lockshaw May 16, 2026
fdf4fe5
Remove unnecessary is_replicate_attrs function
lockshaw May 16, 2026
dce15e2
Format.
elliottslaughter May 21, 2026
f2b0754
Format Realm.
elliottslaughter May 21, 2026
9d03c97
Refactor redop infrastructure and switch to Legion's redops.
elliottslaughter May 21, 2026
0d589f2
Fix build for reductions.
elliottslaughter May 21, 2026
5ef0b07
Split reduction from copy and put back device op state init code.
elliottslaughter May 21, 2026
b737517
Replicate is not a task, don't represent it as one.
elliottslaughter May 21, 2026
c536cc9
Put back the per device op state return code path.
elliottslaughter May 21, 2026
48673bc
Changes for breaking up the replicate PR testing code
lockshaw May 28, 2026
164004d
Pass replicate copy insertion test case
lockshaw May 28, 2026
032f6ba
Pass shard expansion test for fwd replicate
lockshaw May 29, 2026
9bea615
Pass shard expansion test for bwd replicate
lockshaw May 29, 2026
d7cce60
Fix other test suites
lockshaw May 29, 2026
30d0320
Change OneToMany DynamicValueAttrs mapping back to bidict
lockshaw Jun 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,19 @@ features = [

includes = [
"utils/graph/digraph/digraph_view.h",
"utils/bidict/bidict.h",
"compiler/task_graph_simulator/pcg_task.dtg.h",
"pcg/device_id_t.dtg.h",
"utils/many_to_one/many_to_one.h",
"pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h",
"<unordered_set>",
"<unordered_map>"
"<set>",
"<map>"
]

src_includes = [
"utils/fmt/unordered_set.h",
"utils/hash/unordered_set.h",
"utils/fmt/unordered_map.h",
"utils/hash/unordered_map.h"
"utils/fmt/set.h",
"utils/hash/set.h",
"utils/fmt/map.h",
"utils/hash/map.h"
]

[[fields]]
Expand All @@ -28,8 +28,8 @@ type = "::FlexFlow::DiGraphView"

[[fields]]
name = "node_to_task"
type = "::FlexFlow::bidict<::FlexFlow::Node, ::FlexFlow::PCGTask>"
type = "::FlexFlow::ManyToOne<::FlexFlow::Node, ::FlexFlow::PCGTask>"

[[fields]]
name = "node_to_devices"
type = "std::unordered_map<::FlexFlow::Node, std::unordered_set<::FlexFlow::device_id_t>>"
type = "std::map<::FlexFlow::Node, std::set<::FlexFlow::device_id_t>>"
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.h"
#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_communication_edge.h"
#include "utils/containers/filtermap_keys.h"
#include "utils/containers/map_from_pairs.h"
#include "utils/containers/map_keys_with_value_merging.h"
#include "utils/containers/merge_maps_with.h"
#include "utils/containers/require_all_same1.h"
#include "utils/containers/require_same.h"
#include "utils/containers/transform.h"
#include "utils/containers/values.h"
#include "utils/containers/merge_unordered_maps_with.h"
#include "utils/containers/unordered_map_from_pairs.h"

namespace FlexFlow {

std::unordered_set<BinaryTreePath>
abstracted_single_tensor_movement_get_dst_layers(
AbstractedSingleTensorMovement const &m) {
return transform(
keys(m.edge_to_size),
unordered_keys(m.edge_to_size),
[](AbstractedSingleTensorCommunicationEdge const &e) -> BinaryTreePath {
return e.dst.operator_tree_path;
});
Expand All @@ -34,7 +34,7 @@ AbstractedSingleTensorMovement merge_abstracted_single_tensor_movements(
return AbstractedSingleTensorMovement{
/*src_op_tree_path=*/require_all_same1(src_paths),
/*edge_to_size=*/
merge_maps_with(transform(vector_of(movements),
merge_unordered_maps_with(transform(vector_of(movements),
[](AbstractedSingleTensorMovement const &m) {
return m.edge_to_size;
}),
Expand All @@ -51,7 +51,7 @@ AbstractedSingleTensorMovement
return AbstractedSingleTensorMovement{
/*src_op_tree_path=*/src_op_tree_path,
/*edge_to_size=*/
map_from_pairs(
unordered_map_from_pairs(
transform(communications,
[](AbstractedSingleTensorCommunication const &c) {
return std::pair{c.edge, c.size};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.dtg.h"
#include "compiler/machine_mapping/abstracted_tensor_set_movement/abstracted_single_tensor_movement.h"
#include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h"
#include "utils/containers/binary_merge_maps_with.h"
#include "utils/containers/flatmap.h"
#include "utils/containers/map_keys_with_value_merging.h"
#include "utils/containers/merge_maps_with.h"
#include "utils/containers/transform.h"
#include "utils/containers/unordered_set_of.h"
#include "utils/hash/unordered_map.h"
#include "utils/containers/binary_merge_unordered_maps_with.h"

namespace FlexFlow {

Expand Down Expand Up @@ -63,7 +63,7 @@ TensorSetMovement concretize_abstracted_tensor_set_movement(
[](TensorSetMovement const &lhs,
TensorSetMovement const &rhs) -> TensorSetMovement {
return TensorSetMovement{
binary_merge_maps_with(
binary_merge_unordered_maps_with(
lhs.edge_to_size,
rhs.edge_to_size,
[](num_bytes_t l, num_bytes_t r) { return l + r; }),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h"
#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h"
#include "utils/bidict/algorithms/unordered_set_of.h"
#include "utils/bidict/algorithms/bidict_unordered_set_of.h"
#include "utils/containers/binary_cartesian_product.h"
#include "utils/containers/flatmap.h"
#include "utils/containers/generate_map.h"
#include "utils/containers/get_only.h"
#include "utils/containers/group_by.h"
#include "utils/containers/map_from_pairs.h"
Expand Down Expand Up @@ -45,8 +44,8 @@ AbstractedSingleTensorMovement get_abstracted_single_tensor_movement_along_edge(
op_to_op_get_coord_mapping(mapping);

std::unordered_map<AbstractedSingleTensorCommunicationEdge, num_bytes_t>
single_comms = map_from_pairs(transform(
unordered_set_of(coord_mapping),
single_comms = unordered_map_from_pairs(transform(
bidict_unordered_set_of(coord_mapping),
[&](std::pair<TaskSpaceCoordinate, TaskSpaceCoordinate> const &
src_dst) -> std::pair<AbstractedSingleTensorCommunicationEdge,
num_bytes_t> {
Expand Down Expand Up @@ -101,9 +100,9 @@ AbstractedTensorSetMovement get_abstracted_tensor_set_movement_across_split(
};

return AbstractedTensorSetMovement{
transform(edges_by_tensor.right_groups(),
[&](nonempty_unordered_set<ParallelComputationGraphEdge> const
&edges) {
transform(unordered_set_of(edges_by_tensor.right_groups()),
[&](nonempty_set<ParallelComputationGraphEdge> const &edges)
{
return merge_abstracted_single_tensor_movements(transform(
unordered_multiset_of(edges.unwrap_as_unordered_set()),
to_abstracted_single_tensor_movement));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ SearchResult apply_substitution_and_update_machine_mapping(
select_random(substituted_machine_views));
}

ASSERT(is_subseteq_of(keys(post_node_data), keys(machine_views)));
ASSERT(is_subseteq_of(unordered_keys(post_node_data), unordered_keys(machine_views)));

std::unordered_map<parallel_layer_guid_t, MachineView>
post_node_machine_views =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include "utils/containers/contains.h"
#include "utils/containers/contains_key.h"
#include "utils/containers/flatmap.h"
#include "utils/containers/generate_map.h"
#include "utils/containers/generate_unordered_map.h"
#include "utils/containers/get_all_assignments.h"
#include "utils/containers/keys.h"
#include "utils/containers/set_minus.h"
Expand Down Expand Up @@ -103,7 +103,7 @@ MachineMappingResult
set_minus(boundary_layers, get_constrained_layers(sub_constraints));

std::unordered_map<BinaryTreePath, std::unordered_set<MachineView>>
allowed = generate_map(
allowed = generate_unordered_map(
unconstrained_boundary_layers,
[&](BinaryTreePath const &l) -> std::unordered_set<MachineView> {
UnmappedRuntimeOnlyOpCostEstimateKey leaf =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.dtg.h"
#include "pcg/parallel_computation_graph/parallel_computation_graph_edge.h"
#include "utils/containers/generate_map.h"
#include "utils/containers/keys.h"
#include "utils/containers/map_values.h"
#include "utils/containers/sum.h"
Expand Down
12 changes: 6 additions & 6 deletions lib/compiler/src/compiler/machine_mapping/machine_mapping.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#include "pcg/mapped_parallel_computation_graph/mapped_parallel_computation_graph.h"
#include "utils/bidict/algorithms/bidict_from_map.h"
#include "utils/containers/are_disjoint.h"
#include "utils/containers/binary_merge_disjoint_maps.h"
#include "utils/containers/keys.h"
#include "utils/containers/unordered_keys.h"
#include "utils/containers/binary_merge_disjoint_unordered_maps.h"

namespace FlexFlow {

Expand All @@ -20,7 +20,7 @@ MappedParallelComputationGraph
get_parallel_layers(pcg);

std::unordered_set<parallel_layer_guid_t> mapped_layers =
keys(mapping.machine_views);
unordered_keys(mapping.machine_views);

ASSERT(mapped_layers == pcg_layers);

Expand All @@ -40,7 +40,7 @@ MappedParallelComputationGraph
};

std::unordered_map<parallel_layer_guid_t, MappedOperatorTaskGroup>
mapped_op_task_groups = generate_map(mapped_layers, mapping_for_layer);
mapped_op_task_groups = generate_unordered_map(mapped_layers, mapping_for_layer);

return mapped_pcg_from_pcg_and_mapped_op_task_groups(pcg,
mapped_op_task_groups);
Expand All @@ -49,12 +49,12 @@ MappedParallelComputationGraph
MachineMapping combine_disjoint_mappings(MachineMapping const &m1,
MachineMapping const &m2) {
return MachineMapping{
binary_merge_disjoint_maps(m1.machine_views, m2.machine_views),
binary_merge_disjoint_unordered_maps(m1.machine_views, m2.machine_views),
};
}

bool nodes_are_disjoint(MachineMapping const &m1, MachineMapping const &m2) {
return are_disjoint(keys(m1.machine_views), keys(m2.machine_views));
return are_disjoint(unordered_keys(m1.machine_views), unordered_keys(m2.machine_views));
}

std::optional<MachineMapping> get_machine_mapping_from_machine_mapping_result(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#include "utils/containers/filter_values.h"
#include "utils/containers/filtermap_keys.h"
#include "utils/containers/flatmap.h"
#include "utils/containers/generate_map.h"
#include "utils/containers/keys.h"
#include "utils/containers/generate_unordered_map.h"
#include "utils/containers/unordered_keys.h"
#include "utils/containers/map_values.h"
#include "utils/containers/restrict_keys.h"
#include "utils/full_binary_tree/binary_tree_path.h"
Expand All @@ -14,7 +14,7 @@ namespace FlexFlow {
MachineMappingConstraints get_unconstrained_solution_for_layers(
std::unordered_set<BinaryTreePath> const &layers) {
return MachineMappingConstraints{
generate_map(layers,
generate_unordered_map(layers,
[](BinaryTreePath const &) -> std::optional<MachineView> {
return std::nullopt;
}),
Expand All @@ -24,22 +24,22 @@ MachineMappingConstraints get_unconstrained_solution_for_layers(
std::unordered_set<BinaryTreePath>
get_unconstrained_layers(MachineMappingConstraints const &constraints) {

return keys(filter_values(
return unordered_keys(filter_values(
constraints.machine_views,
[](std::optional<MachineView> const &mv) { return !mv.has_value(); }));
}

std::unordered_set<BinaryTreePath>
get_constrained_layers(MachineMappingConstraints const &constraints) {

return keys(filter_values(
return unordered_keys(filter_values(
constraints.machine_views,
[](std::optional<MachineView> const &mv) { return mv.has_value(); }));
}

std::unordered_set<BinaryTreePath>
get_all_layers(MachineMappingConstraints const &partial_solution) {
return keys(partial_solution.machine_views);
return unordered_keys(partial_solution.machine_views);
}

std::optional<MachineView> get_machine_view_for_layer(
Expand Down Expand Up @@ -103,7 +103,7 @@ MachineMappingConstraints with_additional_constraints(

std::optional<MachineView>
require_only_root(MachineMappingConstraints const &constraints) {
ASSERT(keys(constraints.machine_views) ==
ASSERT(unordered_keys(constraints.machine_views) ==
std::unordered_set{binary_tree_root_path()},
fmt::format("require_only_root expected constraints to have only a "
"single key (the root path), but received {}",
Expand Down
4 changes: 2 additions & 2 deletions lib/compiler/src/compiler/machine_mapping/machine_view.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,8 @@ static OperatorAtomicTaskShardBinding
mappings = get_operator_to_ptensor_mappings(op_attrs, inputs_dim_degrees);

std::unordered_map<TensorSlotName, ParallelTensorSpaceCoordinate>
ptensor_coords = generate_map(
keys(inputs_dim_degrees),
ptensor_coords = generate_unordered_map(
unordered_keys(inputs_dim_degrees),
[&](TensorSlotName const &slot_name)
-> ParallelTensorSpaceCoordinate {
num_ptensor_shard_dims_t num_shard_dims =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
#include "utils/containers/contains.h"
#include "utils/containers/flatmap.h"
#include "utils/containers/generate_map.h"
#include "utils/containers/generate_unordered_map.h"
#include "utils/containers/get_all_assignments.h"
#include "utils/containers/unordered_set_of.h"
#include "utils/exception.h"
Expand Down Expand Up @@ -84,7 +84,7 @@ MachineMappingWithMemoryResult get_optimal_machine_mapping_with_memory(
std::unordered_set<BinaryTreePath> const &boundary_layers)
-> std::unordered_set<ParallelLayerGuidObliviousMachineMapping> {
std::unordered_map<BinaryTreePath, std::unordered_set<MachineView>>
allowed = generate_map(
allowed = generate_unordered_map(
boundary_layers,
[&](BinaryTreePath const &l) -> std::unordered_set<MachineView> {
UnmappedRuntimeOnlyOpCostEstimateKey leaf =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@
#include "op-attrs/get_operator_task_space.h"
#include "op-attrs/parallel_tensor_shape.h"
#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
#include "utils/containers/binary_merge_disjoint_maps.h"
#include "utils/containers/map_keys.h"
#include "utils/containers/require_same.h"
#include "utils/containers/try_at.h"
#include "utils/full_binary_tree/binary_tree_path.h"
#include "utils/containers/binary_merge_disjoint_unordered_maps.h"

namespace FlexFlow {

ParallelLayerGuidObliviousMachineMapping binary_combine_mappings(
ParallelLayerGuidObliviousMachineMapping const &lhs,
ParallelLayerGuidObliviousMachineMapping const &rhs) {
return ParallelLayerGuidObliviousMachineMapping{
binary_merge_disjoint_maps(
binary_merge_disjoint_unordered_maps(
map_keys(lhs.raw_mapping, nest_inside_left_child),
map_keys(rhs.raw_mapping, nest_inside_right_child)),
};
Expand Down Expand Up @@ -45,7 +45,7 @@ std::unordered_map<BinaryTreePath, MachineSpaceStencil>
PCGBinarySPDecomposition const &decomposition,
ParallelLayerGuidObliviousMachineMapping const &mapping) {
std::unordered_set<BinaryTreePath> leaf_paths = require_same(
pcg_sp_tree_get_all_leaf_paths(decomposition), keys(mapping.raw_mapping));
pcg_sp_tree_get_all_leaf_paths(decomposition), unordered_keys(mapping.raw_mapping));

std::unordered_map<BinaryTreePath, OperatorTaskSpace>
path_to_op_task_space_map =
Expand All @@ -54,7 +54,7 @@ std::unordered_map<BinaryTreePath, MachineSpaceStencil>
return get_operator_task_space(pcg, l);
});

return generate_map(
return generate_unordered_map(
leaf_paths, [&](BinaryTreePath const &p) -> MachineSpaceStencil {
return MachineSpaceStencil{
/*operator_task_space=*/path_to_op_task_space_map.at(p),
Expand All @@ -71,12 +71,12 @@ std::unordered_map<BinaryTreePath, std::optional<MachineSpaceStencil>>
std::unordered_map<BinaryTreePath, UnmappedRuntimeOnlyOpCostEstimateKey>
tree_leaf_map = mm_problem_tree_get_path_to_leaf_map(tree);

std::unordered_set<BinaryTreePath> mapping_paths = keys(mapping.raw_mapping);
std::unordered_set<BinaryTreePath> tree_paths = keys(tree_leaf_map);
std::unordered_set<BinaryTreePath> mapping_paths = unordered_keys(mapping.raw_mapping);
std::unordered_set<BinaryTreePath> tree_paths = unordered_keys(tree_leaf_map);

ASSERT(is_subseteq_of(mapping_paths, tree_paths));

return generate_map(
return generate_unordered_map(
tree_paths,
[&](BinaryTreePath const &p) -> std::optional<MachineSpaceStencil> {
if (!contains_key(mapping.raw_mapping, p)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ SPDecompositionTreeNodeType

std::unordered_set<BinaryTreePath>
pcg_sp_tree_get_all_leaf_paths(PCGBinarySPDecomposition const &tree) {
return keys(pcg_sp_tree_get_path_to_leaf_map(tree));
return unordered_keys(pcg_sp_tree_get_path_to_leaf_map(tree));
}

std::unordered_set<BinaryTreePath>
Expand Down
Loading