InfiniTensor · jwu773 · Feb 4, 2026 · Feb 4, 2026
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/TinyInfiniTensor.iml b/.idea/TinyInfiniTensor.iml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/include/core/allocator.h b/include/core/allocator.h
@@ -27,7 +27,7 @@ namespace infini {
     // TODO：可能需要设计一个数据结构来存储free block，以便于管理和合并
     // HINT: 可以使用一个 map 来存储 free block，key 为 block 的起始/结尾地址，value 为 block 的大小
     // =================================== 作业 ===================================
-
+    std::map<size_t, size_t> freeByStart;
   public:
     Allocator(Runtime runtime);
 

diff --git a/include/core/graph.h b/include/core/graph.h
@@ -50,6 +50,7 @@ namespace infini
          * so the topological sorting fails.
          */
         bool topo_sort();
+        void reconstruct(Operator &op1, Operator &op2, Operator &op3);
 
         void optimize();
 

diff --git a/src/core/allocator.cc b/src/core/allocator.cc
@@ -32,8 +32,32 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO: 设计一个算法来分配内存，返回起始地址偏移量
         // =================================== 作业 ===================================
+        //std::cout<<" ******************* map size "<<freeByStart.size()<<std::endl;
+        used += size;
+        if(freeByStart.empty()){
+            peak += size;
+            return peak - size;
+        }
+
+        bool freeBlkOk = false;  //is there a free block large enough to alloc?
+        auto it = freeByStart.begin();
+        for(; it != freeByStart.end(); it++){
+            if(it->second >= size){
+                freeBlkOk = true;
+                if(it->second > size)
+                    freeByStart.insert({it->first + size, it->second - size});
+                break;
+            }
+        }
+
+        if(!freeBlkOk){
+            it--;
+            size_t moreMem = size - it->second;
+            peak += moreMem;
+        }
+        freeByStart.erase(it);
+        return it->first;
 
-        return 0;
     }
 
     void Allocator::free(size_t addr, size_t size)
@@ -44,6 +68,30 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO: 设计一个算法来回收内存
         // =================================== 作业 ===================================
+                auto it = freeByStart.begin();
+                int flag = 0;
+                for(; it != freeByStart.end(); it++){
+                    if(it->first + it->second == addr){
+                        flag = -1;
+                        break;
+                    }
+                    else if(it->first == addr + size){
+                        flag = 1;
+                        break;
+                    }
+                }
+                if(flag == -1){
+                    freeByStart.insert({it->first, it->second + size});
+                    freeByStart.erase(it);
+                }
+                else if(flag == 0){
+                    freeByStart.insert({addr, size});
+                }
+                else if(flag == 1){
+                    freeByStart.insert({addr, size + it->second});
+                    freeByStart.erase(it);
+                }
+                used -= size;
     }
 
     void *Allocator::getPtr()

diff --git a/src/core/graph.cc b/src/core/graph.cc
@@ -2,6 +2,10 @@
 #include <algorithm>
 #include <numeric>
 #include <queue>
+#include "operators/transpose.h"
+#include "operators/matmul.h"
+#include <unordered_set>
+#include <unordered_map>
 
 namespace infini
 {
@@ -98,6 +102,49 @@ namespace infini
         return this->sorted = true;
     }
 
+    bool transposeOpsCancel(vector<int> a, vector<int> b){
+    	if(a.size() != b.size())
+    		return false;
+    	for(int i = 0; i < (int)a.size(); i++){
+    		if(b[a[i]] != i)
+    			return false;
+    	}
+    	return true;
+    }
+
+    bool transOpCanIntegrateToMatmul(vector<int> perm){
+        int size = perm.size();
+        for(int i = 0; i < size - 2; i++){
+            if(perm[i] != i)
+                return false;
+        }
+        if(perm[size - 2] != size - 1 || perm[size - 1] != size - 2)
+            return false;
+        else
+            return true;
+    }
+
+    void GraphObj::reconstruct(Operator &op1, Operator &op2, Operator &op3){
+        Tensor input = op1->getInputs(0);
+        if(input) {input->addTarget(op3);}
+        if(op2 == nullptr){
+            op3->replaceInput(op1->getOutput(), input);
+            op1->removeSuccessors(op3);
+            op3->removePredecessors(op1);
+            op1->getOutput()->removeTarget(op3);
+        }
+        else{
+            op3->replaceInput(op2->getOutput(), input);
+            op3->removePredecessors(op2);
+            op1->removeSuccessors(op2);
+            op1->getOutput()->removeTarget(op2);
+        }
+        for(auto &pred: op1->getPredecessors()){
+            op3->addPredecessors(pred);
+            pred->addSuccessors(op3);
+        }
+    }
+
     void GraphObj::optimize()
     {
         // =================================== 作业 ===================================
@@ -106,6 +153,93 @@ namespace infini
         // 1. 去除冗余的算子（例如，两个相邻的算子都是 transpose 算子，且做的是相反的操作，可以将其全部删除）
         // 2. 合并算子（例如，矩阵乘算子中含有属性transA、transB，如果其输入存在transpose，且对最后两个维度做交换，就可以将transpose融入到矩阵乘算子的属性中去）
         // =================================== 作业 ===================================
+        std::unordered_set<OperatorObj *> toDelete;
+        std::shared_ptr<OperatorObj> emptyPtr;
+
+        for(auto &op: ops){
+            if(toDelete.find(op.get()) != toDelete.end()){
+                continue;
+            }
+        	OpType opType = op->getOpType();
+            //std::cout<<".....................  .....................OP start................  ...................\n";
+            //op->print();
+            //std::cout<<"......................  ....................Op end.................  ....................\n";
+        	if(opType == OpType::Concat){            //case1:Concat
+        		if(op->numInputs() == 1){
+                    toDelete.insert(op.get());
+                    for(auto &succ: op->getSuccessors()){
+                        reconstruct(op, emptyPtr, succ);
+                    }
+        		}
+        	}
+        	else if(opType == OpType::Transpose){    //case2:Transpose
+        	    TransposeObj* transOp = dynamic_cast<TransposeObj*>(op.get());
+                //std::cout<<"......2....OP is Transpose........"<<std::endl;
+        		Tensor input = op->getInputs(0);
+        		for(auto &succ: op->getSuccessors()){
+        			if(succ->getOpType() == OpType::Transpose){  //case2.1:Transpose-->Transpose
+        				TransposeObj* transSucc = dynamic_cast<TransposeObj*>(succ.get());
+        				//std::cout<<".......2.1...successor is Transpose op.......\n";
+        				//transSucc->print();
+        				bool cancelOut = false;
+        				if(transOp && transSucc){
+        				    cancelOut = transposeOpsCancel(transOp->getPermute(), transSucc->getPermute());
+        				}
+        				//std::cout<<"......2.1...successor is Transpose :they cancel? "<<cancelOut<<std::endl;
+        				if(cancelOut){
+                            toDelete.insert(succ.get());
+                            removeTensor(succ->getOutput());
+        					for(auto &succ_succ: succ->getSuccessors()){
+        					    reconstruct(op, succ, succ_succ);
+        					}
+        					if(op->getSuccessors().size() == 0){
+                                toDelete.insert(op.get());
+                                removeTensor(op->getOutput());
+                                if(input) {input->removeTarget(op);}
+                                for(auto &pred: op->getPredecessors())
+                                    pred->removeSuccessors(op);
+                            }
+        				}
+        			}
+        			else if(succ->getOpType() == OpType::MatMul){  //case2.2:Transpose-->MatMul
+        			    bool ok = transOpCanIntegrateToMatmul(transOp->getPermute());
+        			    //std::cout<<"------2.2---successor is Matmul op, can integrate? "<<ok<<std::endl;
+        			    if(ok){
+        			        MatmulObj* matmulSucc = dynamic_cast<MatmulObj*>(succ.get());
+                            if (matmulSucc){
+                                if(succ->getInputs(0)->getGuid() == op->getOutput()->getGuid())
+                                    matmulSucc->setTransA(!(matmulSucc->getTransA()));
+                                else
+                                    matmulSucc->setTransB(!(matmulSucc->getTransB()));
+                            }
+                            reconstruct(op, emptyPtr, succ);
+                            if(op->getSuccessors().size() == 0){
+                                toDelete.insert(op.get());
+                                removeTensor(op->getOutput());
+                                if(input) {input->removeTarget(op);}
+                                for(auto &pred: op->getPredecessors())
+                                    pred->removeSuccessors(op);
+                            }
+        			    }
+        			}
+        		}
+        	}
+        }
+        //delete redundant operators
+        //std::cout<<" --------ooo--------- before optimize "<<ops.size()<<std::endl;
+        for(int i = 0; i < (int)ops.size();){
+            if(toDelete.find(ops[i].get()) != toDelete.end())
+                ops.erase(ops.begin() + i);
+            else
+                i++;
+        }
+        /*
+        std::cout<<" --------ooo--------- after optimize "<<ops.size()<<std::endl;
+        sorted = false;
+        topo_sort();
+        for(auto &op: ops){
+            op->print();
+        }**/
     }
 
     Tensor GraphObj::getTensor(int fuid) const
@@ -152,7 +286,38 @@ namespace infini
         // TODO：利用 allocator 给计算图分配内存
         // HINT: 获取分配好的内存指针后，可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存
         // =================================== 作业 ===================================
+        std::unordered_map<int, size_t> tensorOffset;
+        for(auto &tensor: tensors){
+            if(!tensor->getSource())
+                tensorOffset.insert({tensor->getFuid(), allocator.alloc(tensor->getBytes())});
+        }
 
+        std::unordered_set<int> graphOutputsFuid;
+        for(auto &output: this->getOutputs()){
+            graphOutputsFuid.insert(output->getFuid());
+        }
+        for(auto &op: ops){
+            for(auto &output: op->getOutputs()){
+                tensorOffset.insert({output->getFuid(), allocator.alloc(output->getBytes())});
+            }
+            for(auto input: op->getInputs()){
+                if(input->getTargets().size() == 0){
+                    int fuid = input->getFuid();
+                    if(graphOutputsFuid.find(fuid) == graphOutputsFuid.end()){
+                        allocator.free(tensorOffset[fuid], input->getBytes());
+                    }
+                }
+            }
+        }
+        void *pointer = allocator.getPtr();
+        for(auto &tensor: tensors){
+            int fuid = tensor->getFuid();
+            if(tensorOffset.count(fuid)){
+                size_t offset = tensorOffset[fuid];
+                auto blob = make_ref<BlobObj>(runtime, pointer + offset);
+                tensor->setDataBlob(blob);
+            }
+        }
         allocator.info();
     }
 

diff --git a/src/operators/concat.cc b/src/operators/concat.cc
@@ -17,7 +17,10 @@ optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) {
     // TODO：修改 dims，返回正确的 concat 后的 shape
     // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13
     // =================================== 作业 ===================================
-
+    int n = inputs.size();
+    for(int i = 1; i < n; i++){
+        dims[dim] += inputs[i]->getDims()[dim];
+    }
     return {{dims}};
 }
 

diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc
@@ -27,7 +27,27 @@ namespace infini
         // TODO：返回经过 matmul 操作后的 shape
         // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm
         // =================================== 作业 ===================================
-        return std::nullopt;
+        const auto A = inputs[0];
+        auto a_dim = A->getDims();
+        const auto B = inputs[1];
+        auto b_dim = B->getDims();
+        int size = a_dim.size();
+        Shape res = a_dim;
+        //multi-dimensional broadcasting except for the last two dimensions
+        for(int i = 0; i < size - 2; i++){
+            if(a_dim[i] < b_dim[i])
+                res[i] = b_dim[i];
+        }
+
+        if(transA)
+            res[size - 2] = a_dim[size - 1];
+        if(transB)
+            res[size - 1] = b_dim[size - 2];
+        else
+            res[size - 1] = b_dim[size - 1];
+
+        std::vector<Shape> vec {res};
+        return vec;
     }
 
 } // namespace infini
diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc
@@ -33,8 +33,13 @@ namespace infini
         // TODO：修改 output_dim，返回正确的 transpose 后的 shape
         // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21
         // =================================== 作业 ===================================
+        for (int i = 0; i < rank; i++){
+           output_dim[i] = input_dim[transposePermute[i]];
+        }
+        std::vector<Shape> res {output_dim};
+        return res;
 
-        return std::nullopt;
+        //return std::nullopt;
     }
 
     std::string TransposeObj::toString() const