Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ A lightweight, distributed SQL database engine. Designed for cloud environments
- **Analytics Performance**:
- **Columnar Storage**: Binary-per-column persistence for efficient analytical scanning.
- **Vectorized Execution**: Batch-at-a-time processing model for high-throughput query execution.
- **Multi-Node Transactions**: ACID guarantees across the cluster via Two-Phase Commit (2PC).
- **Multi-Node Transactions**: ACID guarantees across the cluster via Two-Phase Commit (2PC) and connection-aware execution state supporting `BEGIN`, `COMMIT`, and `ROLLBACK`.
- **Advanced Execution Engine**:
- **Full Outer Join Support**: Specialized `HashJoinOperator` implementing `LEFT`, `RIGHT`, and `FULL` outer join semantics with automatic null-padding.
- **B+ Tree Indexing**: Persistent indexing for high-speed point lookups and optimized query planning.
- **Type-Safe Value System**: Robust handling of SQL data types using `std::variant`.
- **Volcano & Vectorized Engine**: Flexible execution models supporting traditional row-based and high-performance columnar processing.
- **PostgreSQL Wire Protocol**: Handshake and simple query protocol implementation for tool compatibility.
Expand Down Expand Up @@ -46,17 +49,18 @@ A lightweight, distributed SQL database engine. Designed for cloud environments
mkdir build
cd build
cmake ..
make -j$(nproc)
make -j$(nproc) # Or ../tests/run_test.sh for automated multi-OS build
```

### Running Tests

```bash
# Run all tests
# Run the integrated test suite (Unit + E2E + Logic)
./tests/run_test.sh

# Or run individual binaries
./build/sqlEngine_tests
# Run distributed-specific tests
./build/distributed_tests
./build/distributed_txn_tests
```

### Starting the Cluster
Expand Down
8 changes: 4 additions & 4 deletions docs/phases/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ This directory contains the technical documentation for the lifecycle of the clo

### Phase 9 — Stability & Testing Refinement
**Focus**: Engine Robustness & E2E Validation.
- Slotted-page layout fixes for large table support.
- Buffer Pool Manager lifecycle management (destructor flushing).
- Robust Python E2E client with partial-read handling and numeric validation.
- Standardized test orchestration via `run_test.sh`.
- **Advanced Execution**: Full support for `LEFT`, `RIGHT`, and `FULL` outer joins.
- **Transactional Integrity**: Persistent connection-based execution state and comprehensive `ROLLBACK` support for all DML operations.
- **Logic Validation**: Integration of the SqlLogicTest (SLT) suite with 80+ logic test cases covering Joins, Transactions, Aggregates, and Indexes.
- **Automation**: Standardized cross-platform test orchestration via `run_test.sh` with automatic CPU detection.

---

Expand Down
6 changes: 3 additions & 3 deletions include/executor/operator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,12 +326,12 @@ class HashJoinOperator : public Operator {
class LimitOperator : public Operator {
private:
std::unique_ptr<Operator> child_;
uint64_t limit_;
uint64_t offset_;
int64_t limit_;
int64_t offset_;
uint64_t current_count_ = 0;

public:
LimitOperator(std::unique_ptr<Operator> child, uint64_t limit, uint64_t offset = 0);
LimitOperator(std::unique_ptr<Operator> child, int64_t limit, int64_t offset = 0);

bool init() override;
bool open() override;
Expand Down
3 changes: 2 additions & 1 deletion include/executor/query_executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class QueryExecutor {
transaction::TransactionManager& transaction_manager,
recovery::LogManager* log_manager = nullptr,
cluster::ClusterManager* cluster_manager = nullptr);
~QueryExecutor() = default;
~QueryExecutor();

// Disable copy/move for executor
QueryExecutor(const QueryExecutor&) = delete;
Expand Down Expand Up @@ -74,6 +74,7 @@ class QueryExecutor {

QueryResult execute_select(const parser::SelectStatement& stmt, transaction::Transaction* txn);
QueryResult execute_create_table(const parser::CreateTableStatement& stmt);
QueryResult execute_create_index(const parser::CreateIndexStatement& stmt);
QueryResult execute_drop_table(const parser::DropTableStatement& stmt);
QueryResult execute_drop_index(const parser::DropIndexStatement& stmt);
QueryResult execute_insert(const parser::InsertStatement& stmt, transaction::Transaction* txn);
Expand Down
6 changes: 3 additions & 3 deletions include/parser/statement.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ class SelectStatement : public Statement {
std::vector<std::unique_ptr<Expression>> group_by_;
std::unique_ptr<Expression> having_;
std::vector<std::unique_ptr<Expression>> order_by_;
int64_t limit_ = 0;
int64_t offset_ = 0;
int64_t limit_ = -1;
int64_t offset_ = -1;
bool distinct_ = false;

public:
Expand Down Expand Up @@ -112,7 +112,7 @@ class SelectStatement : public Statement {
[[nodiscard]] int64_t limit() const { return limit_; }
[[nodiscard]] int64_t offset() const { return offset_; }
[[nodiscard]] bool distinct() const { return distinct_; }
[[nodiscard]] bool has_limit() const { return limit_ > 0; }
[[nodiscard]] bool has_limit() const { return limit_ >= 0; }
[[nodiscard]] bool has_offset() const { return offset_ > 0; }

[[nodiscard]] std::string to_string() const override;
Expand Down
1 change: 1 addition & 0 deletions include/parser/token.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ enum class TokenType : uint8_t {
Join,
Left,
Right,
Full,
Inner,
Outer,
Order,
Expand Down
6 changes: 6 additions & 0 deletions include/storage/heap_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ class HeapTable {
*/
bool physical_remove(const TupleId& tuple_id);

/**
* @brief Resets xmax to 0 (used for rollback of a DELETE)
* @return true on success
*/
bool undo_remove(const TupleId& tuple_id);

/**
* @brief Replaces an existing record with new data
* @param tuple_id The record to update
Expand Down
15 changes: 14 additions & 1 deletion include/transaction/transaction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
#define CLOUDSQL_TRANSACTION_TRANSACTION_HPP

#include <atomic>
#include <cassert>
#include <mutex>
#include <optional>
Comment on lines +10 to +12
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify whether release builds define NDEBUG and inspect current add_undo_log call sites.

set -euo pipefail

echo "== Build definitions related to assert/NDEBUG =="
fd 'CMakeLists\.txt$|\.cmake$' . -X rg -n 'NDEBUG|CMAKE_(C|CXX)_FLAGS_RELEASE|target_compile_definitions|add_compile_definitions' || true

echo
echo "== Current add_undo_log call sites =="
rg -n -C2 '\badd_undo_log\s*\(' include src tests || true

Repository: poyrazK/cloudSQL

Length of output: 2228


🏁 Script executed:

sed -n '120,140p' include/transaction/transaction.hpp

Repository: poyrazK/cloudSQL

Length of output: 1034


🏁 Script executed:

cat CMakeLists.txt | head -50

Repository: poyrazK/cloudSQL

Length of output: 1521


🏁 Script executed:

grep -i "ndebug\|release\|debug\|CMAKE_BUILD_TYPE" CMakeLists.txt

Repository: poyrazK/cloudSQL

Length of output: 42


🏁 Script executed:

# Check if there are any compile-time constraints (templates, concepts, etc.) on add_undo_log
sed -n '60,140p' include/transaction/transaction.hpp | grep -A 5 -B 5 "add_undo_log\|concept\|enable_if"

Repository: poyrazK/cloudSQL

Length of output: 1010


Don't use debug-only asserts to enforce undo-log invariants.

Lines 127 and 135 stop protecting these overloads in release builds. If a caller passes the wrong UndoLog::Type, this still records a malformed undo log and can break rollback/index cleanup later. Prefer dedicated APIs or a real runtime check here.

Suggested hardening
 `#include` <atomic>
-#include <cassert>
 `#include` <mutex>
 `#include` <optional>
+#include <stdexcept>
 `#include` <unordered_set>
 `#include` <vector>
@@
     void add_undo_log(UndoLog::Type type, const std::string& table_name,
                       const storage::HeapTable::TupleId& rid) {
-        /* Enforce invariant: non-UPDATE types should not provide old_rid through this overload */
-        assert(type != UndoLog::Type::UPDATE);
+        if (type == UndoLog::Type::UPDATE) {
+            throw std::invalid_argument("UPDATE undo logs require old_rid");
+        }
         undo_logs_.push_back({type, table_name, rid, std::nullopt});
     }
@@
     void add_undo_log(UndoLog::Type type, const std::string& table_name,
                       const storage::HeapTable::TupleId& rid,
                       const storage::HeapTable::TupleId& old_rid) {
-        /* Enforce invariant: this overload is primarily for UPDATE types providing old_rid */
-        assert(type == UndoLog::Type::UPDATE);
+        if (type != UndoLog::Type::UPDATE) {
+            throw std::invalid_argument("old_rid is only valid for UPDATE undo logs");
+        }
         undo_logs_.push_back({type, table_name, rid, old_rid});
     }

Also applies to: 124-129 and 131-136

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@include/transaction/transaction.hpp` around lines 10 - 12, The code currently
uses debug-only asserts to validate UndoLog::Type in the overloads that record
undo-log entries, which allows malformed undo entries in release builds; replace
those assert(...) checks with runtime validation: explicitly check the provided
UndoLog::Type in the overloads that accept it, and if invalid either throw a
descriptive exception or return an error status (do not rely on asserts), and
consider adding dedicated API wrappers (e.g., type-specific record_* methods) so
callers cannot pass the wrong type; update the undo-log write paths (the
overloads that record undo entries) to refuse/handle invalid types and include
clear error messages to prevent corrupt rollback/index cleanup.

#include <unordered_set>
#include <vector>

Expand Down Expand Up @@ -55,6 +57,7 @@ struct UndoLog {
Type type = Type::INSERT;
std::string table_name;
storage::HeapTable::TupleId rid;
std::optional<storage::HeapTable::TupleId> old_rid;
};

/**
Expand Down Expand Up @@ -120,7 +123,17 @@ class Transaction {

void add_undo_log(UndoLog::Type type, const std::string& table_name,
const storage::HeapTable::TupleId& rid) {
undo_logs_.push_back({type, table_name, rid});
/* Enforce invariant: non-UPDATE types should not provide old_rid through this overload */
assert(type != UndoLog::Type::UPDATE);
undo_logs_.push_back({type, table_name, rid, std::nullopt});
}

void add_undo_log(UndoLog::Type type, const std::string& table_name,
const storage::HeapTable::TupleId& rid,
const storage::HeapTable::TupleId& old_rid) {
/* Enforce invariant: this overload is primarily for UPDATE types providing old_rid */
assert(type == UndoLog::Type::UPDATE);
undo_logs_.push_back({type, table_name, rid, old_rid});
}

[[nodiscard]] const std::vector<UndoLog>& get_undo_logs() const { return undo_logs_; }
Expand Down
2 changes: 1 addition & 1 deletion include/transaction/transaction_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class TransactionManager {
/**
* @brief Undo changes made by a transaction
*/
void undo_transaction(Transaction* txn);
bool undo_transaction(Transaction* txn);
};

} // namespace cloudsql::transaction
Expand Down
17 changes: 12 additions & 5 deletions src/executor/operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,12 @@ bool IndexScanOperator::next(Tuple& out_tuple) {
while (current_match_index_ < matching_ids_.size()) {
const auto& tid = matching_ids_[current_match_index_++];

storage::HeapTable::TupleId rid;
rid.page_num = tid.page_num;
rid.slot_num = tid.slot_num;

storage::HeapTable::TupleMeta meta;
if (table_->get_meta(tid, meta)) {
if (table_->get_meta(rid, meta)) {
/* MVCC Visibility Check */
bool visible = true;
const Transaction* const txn = get_txn();
Expand Down Expand Up @@ -734,7 +738,7 @@ void HashJoinOperator::add_child(std::unique_ptr<Operator> child) {

/* --- LimitOperator --- */

LimitOperator::LimitOperator(std::unique_ptr<Operator> child, uint64_t limit, uint64_t offset)
LimitOperator::LimitOperator(std::unique_ptr<Operator> child, int64_t limit, int64_t offset)
: Operator(OperatorType::Limit, child->get_txn(), child->get_lock_manager()),
child_(std::move(child)),
limit_(limit),
Expand All @@ -750,17 +754,20 @@ bool LimitOperator::open() {
}

/* Skip offset rows */
current_count_ = 0;
Tuple tuple;
while (current_count_ < offset_ && child_->next(tuple)) {
current_count_++;
if (offset_ > 0) {
while (current_count_ < static_cast<uint64_t>(offset_) && child_->next(tuple)) {
current_count_++;
}
}
current_count_ = 0;
set_state(ExecState::Open);
return true;
}

bool LimitOperator::next(Tuple& out_tuple) {
if (current_count_ >= limit_) {
if (limit_ >= 0 && current_count_ >= static_cast<uint64_t>(limit_)) {
set_state(ExecState::Done);
return false;
}
Expand Down
Loading