diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..ab1f416
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,10 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Ignored default folder with query files
+/queries/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/.idea/TinyInfiniTensor.iml b/.idea/TinyInfiniTensor.iml
new file mode 100644
index 0000000..d6ebd48
--- /dev/null
+++ b/.idea/TinyInfiniTensor.iml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..07115cd
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_17" default="true" project-jdk-name="17" project-jdk-type="JavaSDK">
+    <output url="file://$PROJECT_DIR$/out" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..62de32f
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/TinyInfiniTensor.iml" filepath="$PROJECT_DIR$/.idea/TinyInfiniTensor.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..67a1dcf
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/3rd-party/googletest" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/3rd-party/nlohmann_json_cmake_fetchcontent" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/include/core/allocator.h b/include/core/allocator.h
index 002601d..f3ff75a 100644
--- a/include/core/allocator.h
+++ b/include/core/allocator.h
@@ -27,7 +27,7 @@ namespace infini {
     // TODO：可能需要设计一个数据结构来存储free block，以便于管理和合并
     // HINT: 可以使用一个 map 来存储 free block，key 为 block 的起始/结尾地址，value 为 block 的大小
     // =================================== 作业 ===================================
-
+    std::map<size_t, size_t> freeByStart;
   public:
     Allocator(Runtime runtime);
 
diff --git a/include/core/graph.h b/include/core/graph.h
index c45580c..4c534c8 100644
--- a/include/core/graph.h
+++ b/include/core/graph.h
@@ -50,6 +50,7 @@ namespace infini
          * so the topological sorting fails.
          */
         bool topo_sort();
+        void reconstruct(Operator &op1, Operator &op2, Operator &op3);
 
         void optimize();
 
diff --git a/src/core/allocator.cc b/src/core/allocator.cc
index ff593ae..47dd942 100644
--- a/src/core/allocator.cc
+++ b/src/core/allocator.cc
@@ -32,8 +32,32 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO: 设计一个算法来分配内存，返回起始地址偏移量
         // =================================== 作业 ===================================
+        //std::cout<<" ******************* map size "<<freeByStart.size()<<std::endl;
+        used += size;
+        if(freeByStart.empty()){
+            peak += size;
+            return peak - size;
+        }
+
+        bool freeBlkOk = false;  //is there a free block large enough to alloc?
+        auto it = freeByStart.begin();
+        for(; it != freeByStart.end(); it++){
+            if(it->second >= size){
+                freeBlkOk = true;
+                if(it->second > size)
+                    freeByStart.insert({it->first + size, it->second - size});
+                break;
+            }
+        }
+
+        if(!freeBlkOk){
+            it--;
+            size_t moreMem = size - it->second;
+            peak += moreMem;
+        }
+        freeByStart.erase(it);
+        return it->first;
 
-        return 0;
     }
 
     void Allocator::free(size_t addr, size_t size)
@@ -44,6 +68,30 @@ namespace infini
         // =================================== 作业 ===================================
         // TODO: 设计一个算法来回收内存
         // =================================== 作业 ===================================
+                auto it = freeByStart.begin();
+                int flag = 0;
+                for(; it != freeByStart.end(); it++){
+                    if(it->first + it->second == addr){
+                        flag = -1;
+                        break;
+                    }
+                    else if(it->first == addr + size){
+                        flag = 1;
+                        break;
+                    }
+                }
+                if(flag == -1){
+                    freeByStart.insert({it->first, it->second + size});
+                    freeByStart.erase(it);
+                }
+                else if(flag == 0){
+                    freeByStart.insert({addr, size});
+                }
+                else if(flag == 1){
+                    freeByStart.insert({addr, size + it->second});
+                    freeByStart.erase(it);
+                }
+                used -= size;
     }
 
     void *Allocator::getPtr()
diff --git a/src/core/graph.cc b/src/core/graph.cc
index 3a90637..dca2eef 100644
--- a/src/core/graph.cc
+++ b/src/core/graph.cc
@@ -2,6 +2,10 @@
 #include <algorithm>
 #include <numeric>
 #include <queue>
+#include "operators/transpose.h"
+#include "operators/matmul.h"
+#include <unordered_set>
+#include <unordered_map>
 
 namespace infini
 {
@@ -98,6 +102,49 @@ namespace infini
         return this->sorted = true;
     }
 
+    bool transposeOpsCancel(vector<int> a, vector<int> b){
+    	if(a.size() != b.size())
+    		return false;
+    	for(int i = 0; i < (int)a.size(); i++){
+    		if(b[a[i]] != i)
+    			return false;
+    	}
+    	return true;
+    }
+
+    bool transOpCanIntegrateToMatmul(vector<int> perm){
+        int size = perm.size();
+        for(int i = 0; i < size - 2; i++){
+            if(perm[i] != i)
+                return false;
+        }
+        if(perm[size - 2] != size - 1 || perm[size - 1] != size - 2)
+            return false;
+        else
+            return true;
+    }
+
+    void GraphObj::reconstruct(Operator &op1, Operator &op2, Operator &op3){
+        Tensor input = op1->getInputs(0);
+        if(input) {input->addTarget(op3);}
+        if(op2 == nullptr){
+            op3->replaceInput(op1->getOutput(), input);
+            op1->removeSuccessors(op3);
+            op3->removePredecessors(op1);
+            op1->getOutput()->removeTarget(op3);
+        }
+        else{
+            op3->replaceInput(op2->getOutput(), input);
+            op3->removePredecessors(op2);
+            op1->removeSuccessors(op2);
+            op1->getOutput()->removeTarget(op2);
+        }
+        for(auto &pred: op1->getPredecessors()){
+            op3->addPredecessors(pred);
+            pred->addSuccessors(op3);
+        }
+    }
+
     void GraphObj::optimize()
     {
         // =================================== 作业 ===================================
@@ -106,6 +153,93 @@ namespace infini
         // 1. 去除冗余的算子（例如，两个相邻的算子都是 transpose 算子，且做的是相反的操作，可以将其全部删除）
         // 2. 合并算子（例如，矩阵乘算子中含有属性transA、transB，如果其输入存在transpose，且对最后两个维度做交换，就可以将transpose融入到矩阵乘算子的属性中去）
         // =================================== 作业 ===================================
+        std::unordered_set<OperatorObj *> toDelete;
+        std::shared_ptr<OperatorObj> emptyPtr;
+
+        for(auto &op: ops){
+            if(toDelete.find(op.get()) != toDelete.end()){
+                continue;
+            }
+        	OpType opType = op->getOpType();
+            //std::cout<<".....................  .....................OP start................  ...................\n";
+            //op->print();
+            //std::cout<<"......................  ....................Op end.................  ....................\n";
+        	if(opType == OpType::Concat){            //case1:Concat
+        		if(op->numInputs() == 1){
+                    toDelete.insert(op.get());
+                    for(auto &succ: op->getSuccessors()){
+                        reconstruct(op, emptyPtr, succ);
+                    }
+        		}
+        	}
+        	else if(opType == OpType::Transpose){    //case2:Transpose
+        	    TransposeObj* transOp = dynamic_cast<TransposeObj*>(op.get());
+                //std::cout<<"......2....OP is Transpose........"<<std::endl;
+        		Tensor input = op->getInputs(0);
+        		for(auto &succ: op->getSuccessors()){
+        			if(succ->getOpType() == OpType::Transpose){  //case2.1:Transpose-->Transpose
+        				TransposeObj* transSucc = dynamic_cast<TransposeObj*>(succ.get());
+        				//std::cout<<".......2.1...successor is Transpose op.......\n";
+        				//transSucc->print();
+        				bool cancelOut = false;
+        				if(transOp && transSucc){
+        				    cancelOut = transposeOpsCancel(transOp->getPermute(), transSucc->getPermute());
+        				}
+        				//std::cout<<"......2.1...successor is Transpose :they cancel? "<<cancelOut<<std::endl;
+        				if(cancelOut){
+                            toDelete.insert(succ.get());
+                            removeTensor(succ->getOutput());
+        					for(auto &succ_succ: succ->getSuccessors()){
+        					    reconstruct(op, succ, succ_succ);
+        					}
+        					if(op->getSuccessors().size() == 0){
+                                toDelete.insert(op.get());
+                                removeTensor(op->getOutput());
+                                if(input) {input->removeTarget(op);}
+                                for(auto &pred: op->getPredecessors())
+                                    pred->removeSuccessors(op);
+                            }
+        				}
+        			}
+        			else if(succ->getOpType() == OpType::MatMul){  //case2.2:Transpose-->MatMul
+        			    bool ok = transOpCanIntegrateToMatmul(transOp->getPermute());
+        			    //std::cout<<"------2.2---successor is Matmul op, can integrate? "<<ok<<std::endl;
+        			    if(ok){
+        			        MatmulObj* matmulSucc = dynamic_cast<MatmulObj*>(succ.get());
+                            if (matmulSucc){
+                                if(succ->getInputs(0)->getGuid() == op->getOutput()->getGuid())
+                                    matmulSucc->setTransA(!(matmulSucc->getTransA()));
+                                else
+                                    matmulSucc->setTransB(!(matmulSucc->getTransB()));
+                            }
+                            reconstruct(op, emptyPtr, succ);
+                            if(op->getSuccessors().size() == 0){
+                                toDelete.insert(op.get());
+                                removeTensor(op->getOutput());
+                                if(input) {input->removeTarget(op);}
+                                for(auto &pred: op->getPredecessors())
+                                    pred->removeSuccessors(op);
+                            }
+        			    }
+        			}
+        		}
+        	}
+        }
+        //delete redundant operators
+        //std::cout<<" --------ooo--------- before optimize "<<ops.size()<<std::endl;
+        for(int i = 0; i < (int)ops.size();){
+            if(toDelete.find(ops[i].get()) != toDelete.end())
+                ops.erase(ops.begin() + i);
+            else
+                i++;
+        }
+        /*
+        std::cout<<" --------ooo--------- after optimize "<<ops.size()<<std::endl;
+        sorted = false;
+        topo_sort();
+        for(auto &op: ops){
+            op->print();
+        }**/
     }
 
     Tensor GraphObj::getTensor(int fuid) const
@@ -152,7 +286,38 @@ namespace infini
         // TODO：利用 allocator 给计算图分配内存
         // HINT: 获取分配好的内存指针后，可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存
         // =================================== 作业 ===================================
+        std::unordered_map<int, size_t> tensorOffset;
+        for(auto &tensor: tensors){
+            if(!tensor->getSource())
+                tensorOffset.insert({tensor->getFuid(), allocator.alloc(tensor->getBytes())});
+        }
 
+        std::unordered_set<int> graphOutputsFuid;
+        for(auto &output: this->getOutputs()){
+            graphOutputsFuid.insert(output->getFuid());
+        }
+        for(auto &op: ops){
+            for(auto &output: op->getOutputs()){
+                tensorOffset.insert({output->getFuid(), allocator.alloc(output->getBytes())});
+            }
+            for(auto input: op->getInputs()){
+                if(input->getTargets().size() == 0){
+                    int fuid = input->getFuid();
+                    if(graphOutputsFuid.find(fuid) == graphOutputsFuid.end()){
+                        allocator.free(tensorOffset[fuid], input->getBytes());
+                    }
+                }
+            }
+        }
+        void *pointer = allocator.getPtr();
+        for(auto &tensor: tensors){
+            int fuid = tensor->getFuid();
+            if(tensorOffset.count(fuid)){
+                size_t offset = tensorOffset[fuid];
+                auto blob = make_ref<BlobObj>(runtime, pointer + offset);
+                tensor->setDataBlob(blob);
+            }
+        }
         allocator.info();
     }
 
diff --git a/src/operators/concat.cc b/src/operators/concat.cc
index d196330..7408414 100644
--- a/src/operators/concat.cc
+++ b/src/operators/concat.cc
@@ -17,7 +17,10 @@ optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) {
     // TODO：修改 dims，返回正确的 concat 后的 shape
     // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13
     // =================================== 作业 ===================================
-
+    int n = inputs.size();
+    for(int i = 1; i < n; i++){
+        dims[dim] += inputs[i]->getDims()[dim];
+    }
     return {{dims}};
 }
 
diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc
index 7a16ca2..b57063a 100644
--- a/src/operators/matmul.cc
+++ b/src/operators/matmul.cc
@@ -27,7 +27,27 @@ namespace infini
         // TODO：返回经过 matmul 操作后的 shape
         // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm
         // =================================== 作业 ===================================
-        return std::nullopt;
+        const auto A = inputs[0];
+        auto a_dim = A->getDims();
+        const auto B = inputs[1];
+        auto b_dim = B->getDims();
+        int size = a_dim.size();
+        Shape res = a_dim;
+        //multi-dimensional broadcasting except for the last two dimensions
+        for(int i = 0; i < size - 2; i++){
+            if(a_dim[i] < b_dim[i])
+                res[i] = b_dim[i];
+        }
+
+        if(transA)
+            res[size - 2] = a_dim[size - 1];
+        if(transB)
+            res[size - 1] = b_dim[size - 2];
+        else
+            res[size - 1] = b_dim[size - 1];
+
+        std::vector<Shape> vec {res};
+        return vec;
     }
 
 } // namespace infini
\ No newline at end of file
diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc
index faab2b6..8379a9c 100644
--- a/src/operators/transpose.cc
+++ b/src/operators/transpose.cc
@@ -33,8 +33,13 @@ namespace infini
         // TODO：修改 output_dim，返回正确的 transpose 后的 shape
         // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21
         // =================================== 作业 ===================================
+        for (int i = 0; i < rank; i++){
+           output_dim[i] = input_dim[transposePermute[i]];
+        }
+        std::vector<Shape> res {output_dim};
+        return res;
 
-        return std::nullopt;
+        //return std::nullopt;
     }
 
     std::string TransposeObj::toString() const
diff --git a/src/operators/unary.cc b/src/operators/unary.cc
index 3daad36..ceb50bc 100644
--- a/src/operators/unary.cc
+++ b/src/operators/unary.cc
@@ -39,7 +39,9 @@ namespace infini
         // TODO：返回经过 clip 操作后的 shape
         // REF: https://onnx.ai/onnx/operators/onnx__Clip.html#clip-13
         // =================================== 作业 ===================================
-        return std::nullopt;
+        const auto A = inputs[0];
+        return {{A->getDims()}};
+        //return std::nullopt;
     }
 
     std::string ClipObj::toString() const
@@ -66,7 +68,11 @@ namespace infini
         // REF_FILE: src/core/operator.cc
         // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21
         // =================================== 作业 ===================================
-        return {};
+        DataType outputType = getOutputDataType();
+        int num = inputs.size();
+        std::vector<DataType> res(num, outputType);
+        return res;
+        //return {};
     }
 
     optional<vector<Shape>> CastObj::inferShape(const TensorVec &inputs)
@@ -75,7 +81,9 @@ namespace infini
         // TODO：返回经过 cast 操作后的 shape
         // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21
         // =================================== 作业 ===================================
-        return std::nullopt;
+        const auto A = inputs[0];
+        return {{A->getDims()}};
+        //return std::nullopt;
     }
 
     std::string CastObj::toString() const
diff --git a/src/utils/operator_utils.cc b/src/utils/operator_utils.cc
index edbd2c8..37eab0d 100644
--- a/src/utils/operator_utils.cc
+++ b/src/utils/operator_utils.cc
@@ -9,8 +9,32 @@ Shape infer_broadcast(const Shape &A, const Shape &B) {
     // TODO：对 A 和 B 进行双向广播，返回广播后的形状。
     // REF: https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md
     // =================================== 作业 ===================================
-    
-    return {};
+    Shape res;
+    long unsigned int i = 0;
+    if(A.size() > B.size()){
+        long unsigned int diff = A.size() - B.size();
+        for(; i < diff; i++)
+            res.push_back(A[i]);
+        for(; i < A.size(); i++){
+            if(A[i] == 1)
+                res.push_back(B[i-diff]);
+            else
+                res.push_back(A[i]);
+        }
+    }
+    else{
+        long unsigned int diff = B.size() - A.size();
+        for(; i < diff; i++)
+            res.push_back(B[i]);
+        for(; i < B.size(); i++){
+            if(B[i] == 1)
+                res.push_back(A[i-diff]);
+            else
+                res.push_back(B[i]);
+        }
+    }
+
+    return res;
 }
 
 int get_real_axis(const int &axis, const int &rank) {
diff --git a/test/kernels/nativecpu/test_nativecpu_concat.cc b/test/kernels/nativecpu/test_nativecpu_concat.cc
index fc87fb1..8e23bc9 100644
--- a/test/kernels/nativecpu/test_nativecpu_concat.cc
+++ b/test/kernels/nativecpu/test_nativecpu_concat.cc
@@ -18,8 +18,9 @@ TEST(Concat, NativeCpu) {
     t1->setData(IncrementalGenerator());
     t2->setData(OneGenerator());
     t3->setData(OneGenerator());
-
     runtime->run(g);
+    //std::cout<<" --------------###--------------- concat output\n";
+    op->getOutput()->printData();
     EXPECT_TRUE(op->getOutput()->equalData(
         vector<float>{0, 1, 2, 1, 1, 1, 3, 4,  5,  1, 1, 1,
                       6, 7, 8, 1, 1, 1, 9, 10, 11, 1, 1, 1}));