Skip to content

Commit 3e6d6e2

Browse files
committed
Add simplest memory reuse algo linear scanner
1 parent f27ad3f commit 3e6d6e2

File tree

4 files changed

+153
-11
lines changed

4 files changed

+153
-11
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#pragma once
2+
#include "mlir/IR/Value.h"
3+
4+
struct Slot {
5+
int id;
6+
int64_t sizeBytes;
7+
int64_t availableAfter;
8+
};
9+
10+
struct MemorySlotAssignment {
11+
int64_t slotId = -1;
12+
int64_t offset = 0;
13+
};
14+
15+
struct MemoryPlanResult {
16+
llvm::DenseMap<mlir::Value, MemorySlotAssignment> assignments;
17+
int64_t totalPoolBytes = 0;
18+
};
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#pragma once
2+
#include "llvm/ADT/DenseMap.h"
3+
#include "llvm/ADT/SmallVector.h"
4+
5+
struct ScheduleCandidate {
6+
llvm::SmallVector<int64_t> tileSizes;
7+
bool promote = false;
8+
bool fuse = false;
9+
bool doubleBuffer = false;
10+
int pipelineStages = 1;
11+
12+
int64_t estimatedTrafficBytes = 0;
13+
int64_t estimatedPeakMemoryBytes = 0;
14+
double estimatedCost = 0.0;
15+
};

mlir/optimization/scheduler/lib/BufferAnalysisPass.cpp

Lines changed: 91 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
#include "lab/MemoryPlan.h"
12
#include "mlir/Analysis/AliasAnalysis.h"
23
#include "mlir/Analysis/Liveness.h"
3-
#include "mlir/IR/AsmState.h"
44
#include "mlir/Dialect/Func/IR/FuncOps.h"
55
#include "mlir/Dialect/Linalg/IR/Linalg.h"
66
#include "mlir/Dialect/MemRef/IR/MemRef.h"
7+
#include "mlir/IR/AsmState.h"
78
#include "mlir/IR/BuiltinTypes.h"
89
#include "mlir/Interfaces/DataLayoutInterfaces.h"
9-
#include "llvm/Support/DebugLog.h"
1010
#include "mlir/Pass/Pass.h"
11+
#include "llvm/ADT/STLExtras.h"
12+
#include "llvm/ADT/SmallVector.h"
13+
#include "llvm/Support/DebugLog.h"
1114
#include "llvm/Support/raw_ostream.h"
1215
#include <cstdint>
1316
#include <sys/types.h>
@@ -126,7 +129,6 @@ class LabBufferStatsAnalysis {
126129
return; // 没有语义使用点?先跳过
127130
int lastUse = opIndex[indexOp];
128131

129-
130132
buffers.push_back(BufferRecord{
131133
result,
132134
*sizeBytes,
@@ -179,8 +181,8 @@ class LabBufferStatsAnalysis {
179181
}
180182

181183
static Operation *findLastSemanticUser(func::FuncOp funcOp,
182-
mlir::Liveness &liveness,
183-
Value value) {
184+
mlir::Liveness &liveness,
185+
Value value) {
184186
Operation *lastUser = nullptr;
185187

186188
funcOp.walk([&](Operation *op) {
@@ -216,6 +218,80 @@ class LabBufferStatsAnalysis {
216218
SmallVector<BufferRecord> buffers;
217219
};
218220

221+
class MemoryPlaner {
222+
public:
223+
explicit MemoryPlaner(
224+
const SmallVector<LabBufferStatsAnalysis::BufferRecord> &buffers)
225+
: buffers_(buffers.begin(), buffers.end()) {}
226+
227+
void plan() {
228+
llvm::sort(buffers_, [](const LabBufferStatsAnalysis::BufferRecord &a,
229+
const LabBufferStatsAnalysis::BufferRecord &b) {
230+
return a.defIndex < b.defIndex;
231+
});
232+
233+
int nextSlotId = 0;
234+
int globalSlotOffset = 0;
235+
for (const auto &buf : buffers_) {
236+
// Try to find a slot for this buffer
237+
int64_t offset = 0;
238+
bool assigned = false;
239+
for (auto &slot : slots_) {
240+
if (slot.availableAfter <= buf.defIndex &&
241+
slot.sizeBytes >= buf.sizeBytes) {
242+
// This slot is available and large enough, assign it to the buffer
243+
result_.assignments[buf.value] =
244+
MemorySlotAssignment{slot.id, offset};
245+
slot.availableAfter = buf.lastUseIndex + 1; // Update availability
246+
assigned = true;
247+
break;
248+
}
249+
offset += slot.sizeBytes; // Next buffer in the same slot will be placed
250+
// after this one
251+
}
252+
if (!assigned) {
253+
// No existing slot can accommodate this buffer, create a new slot
254+
Slot newSlot{nextSlotId++, buf.sizeBytes, buf.lastUseIndex + 1};
255+
result_.assignments[buf.value] =
256+
MemorySlotAssignment{newSlot.id, globalSlotOffset};
257+
globalSlotOffset +=
258+
newSlot.sizeBytes; // Update offset for the next buffer
259+
slots_.push_back(newSlot);
260+
}
261+
}
262+
263+
for (const auto &slot : slots_) {
264+
result_.totalPoolBytes += slot.sizeBytes;
265+
}
266+
};
267+
const MemoryPlanResult &getPlan() const { return result_; }
268+
269+
void printPlan(func::FuncOp func) const {
270+
llvm::outs() << "Memory Plan:\n";
271+
llvm::outs() << " num_slots=" << slots_.size() << "\n";
272+
for (const auto &slot : slots_) {
273+
llvm::outs() << " slot_id=" << slot.id << " size=" << slot.sizeBytes
274+
<< "B available_after_op_index=" << slot.availableAfter
275+
<< "\n";
276+
}
277+
for (const auto &entry : result_.assignments) {
278+
std::string valueStr;
279+
llvm::raw_string_ostream rso(valueStr);
280+
AsmState asmState(func);
281+
entry.first.printAsOperand(rso, asmState);
282+
llvm::outs() << " value=" << rso.str()
283+
<< " -> slot=" << entry.second.slotId
284+
<< " offset=" << entry.second.offset << "\n";
285+
}
286+
llvm::outs() << " total_pool_bytes=" << result_.totalPoolBytes << "B\n";
287+
}
288+
289+
private:
290+
SmallVector<LabBufferStatsAnalysis::BufferRecord> buffers_;
291+
MemoryPlanResult result_;
292+
llvm::SmallVector<Slot> slots_;
293+
};
294+
219295
struct LabBufferStats
220296
: public PassWrapper<LabBufferStats, OperationPass<func::FuncOp>> {
221297
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(LabBufferStats)
@@ -241,14 +317,18 @@ struct LabBufferStats
241317
llvm::raw_string_ostream rso(valueStr);
242318
AsmState asmState(func);
243319
buf.value.printAsOperand(rso, asmState);
244-
llvm::outs() << " value=" << rso.str() << " size=" << buf.sizeBytes << "B"
245-
<< " def=#" << buf.defIndex
246-
<< " last_use=#" << buf.lastUseIndex
247-
<< " lifetime=[" << buf.defIndex
248-
<< "," << buf.lastUseIndex << "]\n";
320+
llvm::outs() << " value=" << rso.str() << " size=" << buf.sizeBytes
321+
<< "B"
322+
<< " def=#" << buf.defIndex << " last_use=#"
323+
<< buf.lastUseIndex << " lifetime=[" << buf.defIndex << ","
324+
<< buf.lastUseIndex << "]\n";
249325
}
250-
llvm::outs() << " peak_live_memory=" << analysis.getStates().peak_memory << "B\n";
326+
llvm::outs() << " peak_live_memory=" << analysis.getStates().peak_memory
327+
<< "B\n";
251328

329+
MemoryPlaner planer(buffers);
330+
planer.plan();
331+
planer.printPlan(func);
252332
}
253333
};
254334
} // namespace
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: ../build/scheduler/lab-scheduler %s --pass-pipeline='builtin.module(func.func(lab-buffer-stats))' --mlir-disable-threading
2+
3+
module {
4+
func.func @memory_plan_reuse_demo() {
5+
%c0 = arith.constant 0.0 : f32
6+
7+
// Buffer A lives first.
8+
%a = memref.alloc() : memref<64x64xf32>
9+
linalg.fill ins(%c0 : f32) outs(%a : memref<64x64xf32>)
10+
11+
// Buffer B overlaps with A during the copy, so it cannot reuse A's slot.
12+
%b = memref.alloc() : memref<64x64xf32>
13+
memref.copy %a, %b : memref<64x64xf32> to memref<64x64xf32>
14+
memref.dealloc %a : memref<64x64xf32>
15+
16+
// Buffer C starts after A is dead and is smaller, so it is a good reuse candidate.
17+
%c = memref.alloc() : memref<32x32xf32>
18+
linalg.fill ins(%c0 : f32) outs(%c : memref<32x32xf32>)
19+
memref.dealloc %c : memref<32x32xf32>
20+
21+
// Buffer D overlaps with B during the copy, so it needs another live slot.
22+
%d = memref.alloc() : memref<64x64xf32>
23+
memref.copy %b, %d : memref<64x64xf32> to memref<64x64xf32>
24+
25+
memref.dealloc %b : memref<64x64xf32>
26+
memref.dealloc %d : memref<64x64xf32>
27+
return
28+
}
29+
}

0 commit comments

Comments
 (0)