From ed8ecad184ea3c01470b795836202050e599f702 Mon Sep 17 00:00:00 2001
From: "Victor M. Varela" <vmvarela@hiberus.com>
Date: Thu, 18 Jun 2026 20:34:03 +0200
Subject: [PATCH] feat: add Markdown table output format (-O markdown)

- Add 'markdown' to OutputFormat enum with 'md' alias
- Implement two-pass markdown writer in src/markdown.zig
- Left-align text columns, right-align numeric columns
- NULL values render as empty cells
- Add 6 integration tests covering basic output, alias, alignment, NULL handling
- Update help text, man page, and README

Closes #167
---
 README.md           |   2 +-
 build.zig           |  56 ++++++++++
 docs/sql-pipe.1.scd |  14 ++-
 src/args.zig        |   2 +-
 src/format.zig      |   4 +
 src/main.zig        |   9 +-
 src/markdown.zig    | 267 ++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 350 insertions(+), 4 deletions(-)
 create mode 100644 src/markdown.zig
diff --git a/README.md b/README.md
index 7698f78..a98cb25 100644
--- a/README.md
+++ b/README.md
@@ -308,7 +308,7 @@ When `-f` is used, all positional arguments are treated as data files (no positi
 | `-d`, `--delimiter <char>` | Input field delimiter (single character, default `,`) |
 | `--tsv` | Alias for `--delimiter '\t'` |
 | `-I`, `--input-format <fmt>` | Input format: `csv` (default), `tsv`, `json`, `ndjson`, `xml`. Overrides file extension auto-detection. |
-| `-O`, `--output-format <fmt>` | Output format: `csv` (default), `tsv`, `json`, `ndjson`, `xml` |
+| `-O`, `--output-format <fmt>` | Output format: `csv` (default), `tsv`, `json`, `ndjson`, `xml`, `markdown` (alias: `md`) |
 | `--no-type-inference` | Treat all columns as TEXT (skip auto-detection) |
 | `-H`, `--header` | Print column names as the first output row |
 | `--json` | Alias for `--output-format json` (mutually exclusive with `-H`) |
diff --git a/build.zig b/build.zig
index dcec06f..75a2282 100644
--- a/build.zig
+++ b/build.zig
@@ -2028,6 +2028,62 @@ pub fn build(b: *std.Build) void {
     test_autodetect_xml.step.dependOn(b.getInstallStep());
     test_step.dependOn(&test_autodetect_xml.step);
 
+    // ─── Markdown output integration tests ──────────────────────────────────────
+
+    // Integration test 158a: Basic markdown output
+    const test_markdown_basic = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'name,age\nAlice,30\nBob,25\n' | ./zig-out/bin/sql-pipe -O markdown 'SELECT * FROM t ORDER BY name')
+        \\echo "$result" | grep -Fq '| name' && echo "$result" | grep -Fq '| Alice' && echo "$result" | grep -q -e '---'
+    });
+    test_markdown_basic.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_markdown_basic.step);
+
+    // Integration test 158b: -O md alias works
+    const test_markdown_alias = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'name,age\nAlice,30\n' | ./zig-out/bin/sql-pipe -O md 'SELECT * FROM t')
+        \\echo "$result" | grep -Fq '| name' && echo "$result" | grep -Fq '| Alice'
+    });
+    test_markdown_alias.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_markdown_alias.step);
+
+    // Integration test 158c: Numeric right-alignment (age column)
+    const test_markdown_numeric = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'name,age\nAlice,100\nBob,5\n' | ./zig-out/bin/sql-pipe -O markdown 'SELECT * FROM t ORDER BY name')
+        \\echo "$result" | grep -Fq '100' && echo "$result" | grep -Fq '   5'
+    });
+    test_markdown_numeric.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_markdown_numeric.step);
+
+    // Integration test 158d: NULL renders as empty cell (not the string "NULL")
+    const test_markdown_null = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'name,age\nAlice,30\nBob,\n' | ./zig-out/bin/sql-pipe -O markdown 'SELECT * FROM t ORDER BY name')
+        \\echo "$result" | grep -Fq 'Bob' && echo "$result" | grep -qv 'NULL'
+    });
+    test_markdown_null.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_markdown_null.step);
+
+    // Integration test 158e: Aggregation query produces valid markdown table
+    const test_markdown_aggregate = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'region,amount\nEast,100\nWest,200\nEast,150\n' | ./zig-out/bin/sql-pipe -O markdown 'SELECT region, SUM(amount) as total FROM t GROUP BY region ORDER BY region')
+        \\echo "$result" | grep -Fq '| region' && echo "$result" | grep -Fq '| East' && echo "$result" | grep -Fq 'West' && echo "$result" | grep -q -e '---'
+    });
+    test_markdown_aggregate.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_markdown_aggregate.step);
+
+    // Integration test 158f: Markdown with empty result set (headers + separator only)
+    const test_markdown_empty = b.addSystemCommand(&.{
+        "bash", "-c",
+        \\result=$(printf 'name,age\nAlice,30\n' | ./zig-out/bin/sql-pipe -O markdown 'SELECT * FROM t WHERE age > 100')
+        \\echo "$result" | grep -Fq '|' && echo "$result" | grep -q -e '---' && ! echo "$result" | grep -q 'Alice'
+    });
+    test_markdown_empty.step.dependOn(b.getInstallStep());
+    test_step.dependOn(&test_markdown_empty.step);
+
     // ─── Fixture-based integration tests ─────────────────────────────────────
     // These tests use sample files committed in tests/fixtures/ to exercise
     // the binary end-to-end with realistic data across all supported formats.
diff --git a/docs/sql-pipe.1.scd b/docs/sql-pipe.1.scd
index 214cee3..7b89c31 100644
--- a/docs/sql-pipe.1.scd
+++ b/docs/sql-pipe.1.scd
@@ -64,7 +64,7 @@ OPTIONS
 
 	*-O, --output-format* <fmt>
 		Set the output format: *csv* (default), *tsv*, *json*, *ndjson*,
-		or *xml*.
+		*xml*, or *markdown* (alias: *md*).
 
 	*--no-type-inference*
 		Treat all columns as TEXT. Skips automatic type detection and uses plain
@@ -264,6 +264,18 @@ EXAMPLES
 		$ cat data.csv \
 		    | sql-pipe -O xml --xml-root feed --xml-row entry 'SELECT * FROM t'
 
+	Output results as a Markdown table:
+
+		$ printf 'name,age\nAlice,30\nBob,25\nCarol,35' \
+		    | sql-pipe -O markdown 'SELECT * FROM t'
+
+	Output:++
+	| name  | age |++
+	|-------|-----|++
+	| Alice |  30 |++
+	| Bob   |  25 |++
+	| Carol |  35 |
+
 	Preview schema and first 3 rows of a CSV file:
 
 		$ cat sales.csv | sql-pipe --sample 3
diff --git a/src/args.zig b/src/args.zig
index 7726f5a..2a84212 100644
--- a/src/args.zig
+++ b/src/args.zig
@@ -199,7 +199,7 @@ pub fn printUsage(writer: *std.Io.Writer) !void {
         \\  --tsv                        Alias for --delimiter '\t'
         \\  -I, --input-format <fmt>     Input format: csv (default), tsv, json, ndjson, xml
         \\                               Overrides file extension auto-detection; stdin always uses this value
-        \\  -O, --output-format <fmt>    Output format: csv (default), tsv, json, ndjson, xml
+        \\  -O, --output-format <fmt>    Output format: csv (default), tsv, json, ndjson, xml, markdown (alias: md)
         \\  --json                       Alias for --output-format json
         \\  --no-type-inference          Treat all columns as TEXT (CSV input only)
         \\  -H, --header                 Print column names as the first output row (CSV/TSV output only)
diff --git a/src/format.zig b/src/format.zig
index b95cd72..26cddcd 100644
--- a/src/format.zig
+++ b/src/format.zig
@@ -48,10 +48,12 @@ pub const OutputFormat = enum {
     json,
     ndjson,
     xml,
+    markdown,
 
     /// Parse a format name string.
     /// Returns error.InvalidOutputFormat when the value is unrecognised.
     pub fn parse(s: []const u8) error{InvalidOutputFormat}!OutputFormat {
+        if (std.mem.eql(u8, s, "md")) return .markdown;
         return std.meta.stringToEnum(OutputFormat, s) orelse error.InvalidOutputFormat;
     }
 };
@@ -142,6 +144,7 @@ pub const OutputWriter = struct {
                 if (self.opts.header and col_count > 0)
                     try csvPrintHeaderRow(stmt, col_count, writer, self.csvDelimiter());
             },
+            .markdown => unreachable, // handled before OutputWriter in execQuery
         }
 
         // Write format-specific preamble.
@@ -174,6 +177,7 @@ pub const OutputWriter = struct {
                 writer,
                 self.opts.xml_row,
             ),
+            .markdown => unreachable, // handled before OutputWriter in execQuery
         }
     }
 
diff --git a/src/main.zig b/src/main.zig
index ac35da5..182e72f 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -4,6 +4,7 @@ const json = @import("json.zig");
 const xml = @import("xml.zig");
 const format = @import("format.zig");
 const table = @import("table.zig");
+const markdown = @import("markdown.zig");
 const build_options = @import("build_options");
 const args_mod = @import("args.zig");
 const sqlite_mod = @import("sqlite.zig");
@@ -69,6 +70,12 @@ fn execQuery(
         return;
     }
 
+    // Markdown output: two-pass writer (not streaming)
+    if (output_format == .markdown) {
+        try markdown.writeMarkdown(allocator, writer, stmt.?, col_count);
+        return;
+    }
+
     var out_writer = format.OutputWriter.init(output_format, .{
         .header = header,
         .xml_root = xml_root,
@@ -214,7 +221,7 @@ pub fn main(init: std.process.Init.Minimal) void {
             error.SilentVerboseConflict => fatal("--silent cannot be combined with --verbose", stderr_writer, .usage, .{}),
             error.InvalidMaxRows => fatal("--max-rows must be a positive integer", stderr_writer, .usage, .{}),
             error.InvalidInputFormat => fatal("unknown input format; supported: csv, tsv, json, ndjson, xml", stderr_writer, .usage, .{}),
-            error.InvalidOutputFormat => fatal("unknown output format; supported: csv, tsv, json, ndjson, xml", stderr_writer, .usage, .{}),
+            error.InvalidOutputFormat => fatal("unknown output format; supported: csv, tsv, json, ndjson, xml, markdown (md)", stderr_writer, .usage, .{}),
             error.ColumnsWithQuery => fatal("--columns cannot be combined with a query argument", stderr_writer, .usage, .{}),
             error.ValidateWithQuery => fatal("--validate cannot be combined with a query argument", stderr_writer, .usage, .{}),
             error.InvalidOutputPath => fatal("--output requires a non-empty file path", stderr_writer, .usage, .{}),
diff --git a/src/markdown.zig b/src/markdown.zig
new file mode 100644
index 0000000..894ae3d
--- /dev/null
+++ b/src/markdown.zig
@@ -0,0 +1,267 @@
+//! Markdown table output format.
+//!
+//! Uses two-pass streaming: first pass computes column widths and detects
+//! numeric columns directly from SQLite column data without copying strings;
+//! second pass prints header, separator, and all rows while reading directly
+//! from SQLite.
+//!
+//! Memory is O(cols) — rows are never buffered in memory.
+
+const std = @import("std");
+const c = @import("c");
+const sqlite_mod = @import("sqlite.zig");
+
+/// Write a Markdown table from SQLite query results to the given writer.
+///
+/// Pre:  stmt is a valid prepared statement that has NOT been stepped yet
+///       col_count = sqlite3_column_count(stmt)
+/// Post: all rows are consumed via sqlite3_step, table is written to writer
+///
+/// Memory: uses an arena allocator internally; all memory is freed on return.
+pub fn writeMarkdown(
+    allocator: std.mem.Allocator,
+    writer: *std.Io.Writer,
+    stmt: *c.sqlite3_stmt,
+    col_count: c_int,
+) (std.mem.Allocator.Error || error{WriteFailed, StepFailed})!void {
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    defer arena.deinit();
+    const a = arena.allocator();
+
+    const ncols: usize = @intCast(col_count);
+    if (ncols == 0) return;
+
+    // 1. Collect column names (duped for safety)
+    const col_names = try a.alloc([]const u8, ncols);
+    for (0..ncols) |i| {
+        col_names[i] = try a.dupe(u8, sqlite_mod.columnName(stmt, @intCast(i)) orelse "");
+    }
+
+    // 2. Pass 1: Compute column widths and detect numeric columns
+    const widths = try a.alloc(usize, ncols);
+    for (0..ncols) |i| {
+        widths[i] = visualWidth(col_names[i]);
+    }
+    const numeric = try a.alloc(bool, ncols);
+    @memset(numeric, true);
+    const has_value = try a.alloc(bool, ncols);
+    @memset(has_value, false);
+
+    var rc = c.sqlite3_step(stmt);
+    while (rc == c.SQLITE_ROW) {
+        for (0..ncols) |i| {
+            const idx: c_int = @intCast(i);
+            const col_type = c.sqlite3_column_type(stmt, idx);
+            if (col_type == c.SQLITE_NULL) {
+                // NULL renders as empty cell (width 0), but ensure minimum width of 3
+                // for the column to show header + dashes properly.
+                if (3 > widths[i]) widths[i] = 3;
+            } else {
+                has_value[i] = true;
+                if (col_type != c.SQLITE_INTEGER and col_type != c.SQLITE_FLOAT) {
+                    numeric[i] = false;
+                }
+                const ptr = c.sqlite3_column_text(stmt, idx);
+                if (ptr != null) {
+                    const s = std.mem.span(@as([*:0]const u8, @ptrCast(ptr)));
+                    const vw = visualWidth(s);
+                    if (vw > widths[i]) widths[i] = vw;
+                }
+            }
+        }
+        rc = c.sqlite3_step(stmt);
+    }
+    if (rc != c.SQLITE_DONE) return error.StepFailed;
+
+    // Minimum width of 1 to avoid zero-width columns
+    for (0..ncols) |i| {
+        if (widths[i] == 0) widths[i] = 1;
+        numeric[i] = numeric[i] and has_value[i];
+    }
+
+    // 3. Reset statement for second pass
+    _ = c.sqlite3_reset(stmt);
+
+    // 4. Pass 2: Print the markdown table
+    // Header row: | col1 | col2 |
+    try writeRow(writer, col_names, widths, false);
+    // Separator: |------|------|
+    try writeSeparator(writer, widths, numeric);
+    // Data rows: | val1 | val2 |
+    rc = c.sqlite3_step(stmt);
+    while (rc == c.SQLITE_ROW) {
+        try writeDataRow(writer, stmt, widths, numeric);
+        rc = c.sqlite3_step(stmt);
+    }
+    if (rc != c.SQLITE_DONE) return error.StepFailed;
+}
+
+/// Write a header or data row with pipe-delimited cells.
+/// When `numeric` is null, all cells are left-aligned (used for header).
+fn writeRow(
+    writer: *std.Io.Writer,
+    values: []const []const u8,
+    widths: []const usize,
+    right_align: bool,
+) error{WriteFailed}!void {
+    try writer.writeByte('|');
+    for (values, 0..) |val, i| {
+        try writer.writeByte(' ');
+        const w = widths[i];
+        const vw = visualWidth(val);
+        const padding = w - vw;
+        if (right_align) {
+            try writeSpaces(writer, padding);
+            try writer.writeAll(val);
+        } else {
+            try writer.writeAll(val);
+            try writeSpaces(writer, padding);
+        }
+        try writer.writeByte(' ');
+        try writer.writeByte('|');
+    }
+    try writer.writeByte('\n');
+}
+
+/// Write the header separator line: |------|------|
+/// Dashes fill the column width (plus 1 space padding each side).
+fn writeSeparator(
+    writer: *std.Io.Writer,
+    widths: []const usize,
+    numeric: []const bool,
+) error{WriteFailed}!void {
+    _ = numeric;
+    try writer.writeByte('|');
+    for (widths) |w| {
+        try writer.writeByte(' ');
+        try writeCharRepeated(writer, "-", w);
+        try writer.writeByte(' ');
+        try writer.writeByte('|');
+    }
+    try writer.writeByte('\n');
+}
+
+/// Write a single data row directly from SQLite statement (no buffering).
+fn writeDataRow(
+    writer: *std.Io.Writer,
+    stmt: *c.sqlite3_stmt,
+    widths: []const usize,
+    numeric: []const bool,
+) error{WriteFailed}!void {
+    try writer.writeByte('|');
+    for (0..widths.len) |i| {
+        const idx: c_int = @intCast(i);
+        try writer.writeByte(' ');
+        const w = widths[i];
+
+        if (c.sqlite3_column_type(stmt, idx) == c.SQLITE_NULL) {
+            // NULL renders as empty cell
+            if (numeric[i]) {
+                try writeSpaces(writer, w);
+            } else {
+                try writeSpaces(writer, w);
+            }
+        } else {
+            if (sqlite_mod.columnText(stmt, idx)) |val| {
+                const vw = visualWidth(val);
+                const padding = w - vw;
+                if (numeric[i] and val.len > 0) {
+                    try writeSpaces(writer, padding);
+                    try writer.writeAll(val);
+                } else {
+                    try writer.writeAll(val);
+                    try writeSpaces(writer, padding);
+                }
+            } else {
+                try writeSpaces(writer, w);
+            }
+        }
+        try writer.writeByte(' ');
+        try writer.writeByte('|');
+    }
+    try writer.writeByte('\n');
+}
+
+// ── UTF-8 / visual-width helpers (copied from table.zig) ──────────────────
+
+fn utf8CharLen(first: u8) usize {
+    if (first < 0x80) return 1;
+    if (first < 0xC0) return 1;
+    if (first < 0xE0) return 2;
+    if (first < 0xF0) return 3;
+    if (first < 0xF8) return 4;
+    return 1;
+}
+
+fn utf8DecodeRaw(bytes: []const u8) ?u21 {
+    return switch (bytes.len) {
+        1 => bytes[0],
+        2 => std.unicode.utf8Decode2(bytes[0..2].*) catch null,
+        3 => std.unicode.utf8Decode3(bytes[0..3].*) catch null,
+        4 => std.unicode.utf8Decode4(bytes[0..4].*) catch null,
+        else => null,
+    };
+}
+
+fn isWideCodepoint(cp: u21) bool {
+    return (cp >= 0x3400 and cp <= 0x4DBF) or
+        (cp >= 0x4E00 and cp <= 0x9FFF) or
+        (cp >= 0xAC00 and cp <= 0xD7AF) or
+        (cp >= 0xFF00 and cp <= 0xFFEF);
+}
+
+fn visualWidth(s: []const u8) usize {
+    var width: usize = 0;
+    var i: usize = 0;
+    while (i < s.len) {
+        const byte_len = utf8CharLen(s[i]);
+        if (i + byte_len > s.len) {
+            width += 1;
+            i += 1;
+            continue;
+        }
+        const slice = s[i..][0..byte_len];
+        const codepoint = utf8DecodeRaw(slice) orelse {
+            width += 1;
+            i += 1;
+            continue;
+        };
+        if (isWideCodepoint(codepoint)) {
+            width += 2;
+        } else {
+            width += 1;
+        }
+        i += byte_len;
+    }
+    return width;
+}
+
+fn writeCharRepeated(writer: *std.Io.Writer, char: []const u8, n: usize) error{WriteFailed}!void {
+    var buf: [256]u8 = undefined;
+    const char_len = char.len;
+    var filled: usize = 0;
+    while (filled + char_len <= buf.len) : (filled += char_len) {
+        @memcpy(buf[filled..][0..char_len], char);
+    }
+    var remaining = n;
+    while (remaining > 0) {
+        const chunk = @min(remaining, filled / char_len);
+        try writer.writeAll(buf[0..chunk * char_len]);
+        remaining -= chunk;
+    }
+}
+
+const spaces_buf = " " ** 256;
+
+fn writeSpaces(writer: *std.Io.Writer, n: usize) error{WriteFailed}!void {
+    var remaining = n;
+    while (remaining > 0) {
+        const chunk = @min(remaining, spaces_buf.len);
+        try writer.writeAll(spaces_buf[0..chunk]);
+        remaining -= chunk;
+    }
+}
+
+test "writeMarkdown parameter order" {
+    try std.testing.expect(true);
+}