From ed8ecad184ea3c01470b795836202050e599f702 Mon Sep 17 00:00:00 2001 From: "Victor M. Varela" Date: Thu, 18 Jun 2026 20:34:03 +0200 Subject: [PATCH] feat: add Markdown table output format (-O markdown) - Add 'markdown' to OutputFormat enum with 'md' alias - Implement two-pass markdown writer in src/markdown.zig - Left-align text columns, right-align numeric columns - NULL values render as empty cells - Add 6 integration tests covering basic output, alias, alignment, NULL handling - Update help text, man page, and README Closes #167 --- README.md | 2 +- build.zig | 56 ++++++++++ docs/sql-pipe.1.scd | 14 ++- src/args.zig | 2 +- src/format.zig | 4 + src/main.zig | 9 +- src/markdown.zig | 267 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 350 insertions(+), 4 deletions(-) create mode 100644 src/markdown.zig diff --git a/README.md b/README.md index 7698f78..a98cb25 100644 --- a/README.md +++ b/README.md @@ -308,7 +308,7 @@ When `-f` is used, all positional arguments are treated as data files (no positi | `-d`, `--delimiter ` | Input field delimiter (single character, default `,`) | | `--tsv` | Alias for `--delimiter '\t'` | | `-I`, `--input-format ` | Input format: `csv` (default), `tsv`, `json`, `ndjson`, `xml`. Overrides file extension auto-detection. | -| `-O`, `--output-format ` | Output format: `csv` (default), `tsv`, `json`, `ndjson`, `xml` | +| `-O`, `--output-format ` | Output format: `csv` (default), `tsv`, `json`, `ndjson`, `xml`, `markdown` (alias: `md`) | | `--no-type-inference` | Treat all columns as TEXT (skip auto-detection) | | `-H`, `--header` | Print column names as the first output row | | `--json` | Alias for `--output-format json` (mutually exclusive with `-H`) | diff --git a/build.zig b/build.zig index dcec06f..75a2282 100644 --- a/build.zig +++ b/build.zig @@ -2028,6 +2028,62 @@ pub fn build(b: *std.Build) void { test_autodetect_xml.step.dependOn(b.getInstallStep()); test_step.dependOn(&test_autodetect_xml.step); + // ─── Markdown output integration tests ────────────────────────────────────── + + // Integration test 158a: Basic markdown output + const test_markdown_basic = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,30\nBob,25\n' | ./zig-out/bin/sql-pipe -O markdown 'SELECT * FROM t ORDER BY name') + \\echo "$result" | grep -Fq '| name' && echo "$result" | grep -Fq '| Alice' && echo "$result" | grep -q -e '---' + }); + test_markdown_basic.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_markdown_basic.step); + + // Integration test 158b: -O md alias works + const test_markdown_alias = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,30\n' | ./zig-out/bin/sql-pipe -O md 'SELECT * FROM t') + \\echo "$result" | grep -Fq '| name' && echo "$result" | grep -Fq '| Alice' + }); + test_markdown_alias.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_markdown_alias.step); + + // Integration test 158c: Numeric right-alignment (age column) + const test_markdown_numeric = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,100\nBob,5\n' | ./zig-out/bin/sql-pipe -O markdown 'SELECT * FROM t ORDER BY name') + \\echo "$result" | grep -Fq '100' && echo "$result" | grep -Fq ' 5' + }); + test_markdown_numeric.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_markdown_numeric.step); + + // Integration test 158d: NULL renders as empty cell (not the string "NULL") + const test_markdown_null = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,30\nBob,\n' | ./zig-out/bin/sql-pipe -O markdown 'SELECT * FROM t ORDER BY name') + \\echo "$result" | grep -Fq 'Bob' && echo "$result" | grep -qv 'NULL' + }); + test_markdown_null.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_markdown_null.step); + + // Integration test 158e: Aggregation query produces valid markdown table + const test_markdown_aggregate = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'region,amount\nEast,100\nWest,200\nEast,150\n' | ./zig-out/bin/sql-pipe -O markdown 'SELECT region, SUM(amount) as total FROM t GROUP BY region ORDER BY region') + \\echo "$result" | grep -Fq '| region' && echo "$result" | grep -Fq '| East' && echo "$result" | grep -Fq 'West' && echo "$result" | grep -q -e '---' + }); + test_markdown_aggregate.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_markdown_aggregate.step); + + // Integration test 158f: Markdown with empty result set (headers + separator only) + const test_markdown_empty = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,30\n' | ./zig-out/bin/sql-pipe -O markdown 'SELECT * FROM t WHERE age > 100') + \\echo "$result" | grep -Fq '|' && echo "$result" | grep -q -e '---' && ! echo "$result" | grep -q 'Alice' + }); + test_markdown_empty.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_markdown_empty.step); + // ─── Fixture-based integration tests ───────────────────────────────────── // These tests use sample files committed in tests/fixtures/ to exercise // the binary end-to-end with realistic data across all supported formats. diff --git a/docs/sql-pipe.1.scd b/docs/sql-pipe.1.scd index 214cee3..7b89c31 100644 --- a/docs/sql-pipe.1.scd +++ b/docs/sql-pipe.1.scd @@ -64,7 +64,7 @@ OPTIONS *-O, --output-format* Set the output format: *csv* (default), *tsv*, *json*, *ndjson*, - or *xml*. + *xml*, or *markdown* (alias: *md*). *--no-type-inference* Treat all columns as TEXT. Skips automatic type detection and uses plain @@ -264,6 +264,18 @@ EXAMPLES $ cat data.csv \ | sql-pipe -O xml --xml-root feed --xml-row entry 'SELECT * FROM t' + Output results as a Markdown table: + + $ printf 'name,age\nAlice,30\nBob,25\nCarol,35' \ + | sql-pipe -O markdown 'SELECT * FROM t' + + Output:++ + | name | age |++ + |-------|-----|++ + | Alice | 30 |++ + | Bob | 25 |++ + | Carol | 35 | + Preview schema and first 3 rows of a CSV file: $ cat sales.csv | sql-pipe --sample 3 diff --git a/src/args.zig b/src/args.zig index 7726f5a..2a84212 100644 --- a/src/args.zig +++ b/src/args.zig @@ -199,7 +199,7 @@ pub fn printUsage(writer: *std.Io.Writer) !void { \\ --tsv Alias for --delimiter '\t' \\ -I, --input-format Input format: csv (default), tsv, json, ndjson, xml \\ Overrides file extension auto-detection; stdin always uses this value - \\ -O, --output-format Output format: csv (default), tsv, json, ndjson, xml + \\ -O, --output-format Output format: csv (default), tsv, json, ndjson, xml, markdown (alias: md) \\ --json Alias for --output-format json \\ --no-type-inference Treat all columns as TEXT (CSV input only) \\ -H, --header Print column names as the first output row (CSV/TSV output only) diff --git a/src/format.zig b/src/format.zig index b95cd72..26cddcd 100644 --- a/src/format.zig +++ b/src/format.zig @@ -48,10 +48,12 @@ pub const OutputFormat = enum { json, ndjson, xml, + markdown, /// Parse a format name string. /// Returns error.InvalidOutputFormat when the value is unrecognised. pub fn parse(s: []const u8) error{InvalidOutputFormat}!OutputFormat { + if (std.mem.eql(u8, s, "md")) return .markdown; return std.meta.stringToEnum(OutputFormat, s) orelse error.InvalidOutputFormat; } }; @@ -142,6 +144,7 @@ pub const OutputWriter = struct { if (self.opts.header and col_count > 0) try csvPrintHeaderRow(stmt, col_count, writer, self.csvDelimiter()); }, + .markdown => unreachable, // handled before OutputWriter in execQuery } // Write format-specific preamble. @@ -174,6 +177,7 @@ pub const OutputWriter = struct { writer, self.opts.xml_row, ), + .markdown => unreachable, // handled before OutputWriter in execQuery } } diff --git a/src/main.zig b/src/main.zig index ac35da5..182e72f 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4,6 +4,7 @@ const json = @import("json.zig"); const xml = @import("xml.zig"); const format = @import("format.zig"); const table = @import("table.zig"); +const markdown = @import("markdown.zig"); const build_options = @import("build_options"); const args_mod = @import("args.zig"); const sqlite_mod = @import("sqlite.zig"); @@ -69,6 +70,12 @@ fn execQuery( return; } + // Markdown output: two-pass writer (not streaming) + if (output_format == .markdown) { + try markdown.writeMarkdown(allocator, writer, stmt.?, col_count); + return; + } + var out_writer = format.OutputWriter.init(output_format, .{ .header = header, .xml_root = xml_root, @@ -214,7 +221,7 @@ pub fn main(init: std.process.Init.Minimal) void { error.SilentVerboseConflict => fatal("--silent cannot be combined with --verbose", stderr_writer, .usage, .{}), error.InvalidMaxRows => fatal("--max-rows must be a positive integer", stderr_writer, .usage, .{}), error.InvalidInputFormat => fatal("unknown input format; supported: csv, tsv, json, ndjson, xml", stderr_writer, .usage, .{}), - error.InvalidOutputFormat => fatal("unknown output format; supported: csv, tsv, json, ndjson, xml", stderr_writer, .usage, .{}), + error.InvalidOutputFormat => fatal("unknown output format; supported: csv, tsv, json, ndjson, xml, markdown (md)", stderr_writer, .usage, .{}), error.ColumnsWithQuery => fatal("--columns cannot be combined with a query argument", stderr_writer, .usage, .{}), error.ValidateWithQuery => fatal("--validate cannot be combined with a query argument", stderr_writer, .usage, .{}), error.InvalidOutputPath => fatal("--output requires a non-empty file path", stderr_writer, .usage, .{}), diff --git a/src/markdown.zig b/src/markdown.zig new file mode 100644 index 0000000..894ae3d --- /dev/null +++ b/src/markdown.zig @@ -0,0 +1,267 @@ +//! Markdown table output format. +//! +//! Uses two-pass streaming: first pass computes column widths and detects +//! numeric columns directly from SQLite column data without copying strings; +//! second pass prints header, separator, and all rows while reading directly +//! from SQLite. +//! +//! Memory is O(cols) — rows are never buffered in memory. + +const std = @import("std"); +const c = @import("c"); +const sqlite_mod = @import("sqlite.zig"); + +/// Write a Markdown table from SQLite query results to the given writer. +/// +/// Pre: stmt is a valid prepared statement that has NOT been stepped yet +/// col_count = sqlite3_column_count(stmt) +/// Post: all rows are consumed via sqlite3_step, table is written to writer +/// +/// Memory: uses an arena allocator internally; all memory is freed on return. +pub fn writeMarkdown( + allocator: std.mem.Allocator, + writer: *std.Io.Writer, + stmt: *c.sqlite3_stmt, + col_count: c_int, +) (std.mem.Allocator.Error || error{WriteFailed, StepFailed})!void { + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + const a = arena.allocator(); + + const ncols: usize = @intCast(col_count); + if (ncols == 0) return; + + // 1. Collect column names (duped for safety) + const col_names = try a.alloc([]const u8, ncols); + for (0..ncols) |i| { + col_names[i] = try a.dupe(u8, sqlite_mod.columnName(stmt, @intCast(i)) orelse ""); + } + + // 2. Pass 1: Compute column widths and detect numeric columns + const widths = try a.alloc(usize, ncols); + for (0..ncols) |i| { + widths[i] = visualWidth(col_names[i]); + } + const numeric = try a.alloc(bool, ncols); + @memset(numeric, true); + const has_value = try a.alloc(bool, ncols); + @memset(has_value, false); + + var rc = c.sqlite3_step(stmt); + while (rc == c.SQLITE_ROW) { + for (0..ncols) |i| { + const idx: c_int = @intCast(i); + const col_type = c.sqlite3_column_type(stmt, idx); + if (col_type == c.SQLITE_NULL) { + // NULL renders as empty cell (width 0), but ensure minimum width of 3 + // for the column to show header + dashes properly. + if (3 > widths[i]) widths[i] = 3; + } else { + has_value[i] = true; + if (col_type != c.SQLITE_INTEGER and col_type != c.SQLITE_FLOAT) { + numeric[i] = false; + } + const ptr = c.sqlite3_column_text(stmt, idx); + if (ptr != null) { + const s = std.mem.span(@as([*:0]const u8, @ptrCast(ptr))); + const vw = visualWidth(s); + if (vw > widths[i]) widths[i] = vw; + } + } + } + rc = c.sqlite3_step(stmt); + } + if (rc != c.SQLITE_DONE) return error.StepFailed; + + // Minimum width of 1 to avoid zero-width columns + for (0..ncols) |i| { + if (widths[i] == 0) widths[i] = 1; + numeric[i] = numeric[i] and has_value[i]; + } + + // 3. Reset statement for second pass + _ = c.sqlite3_reset(stmt); + + // 4. Pass 2: Print the markdown table + // Header row: | col1 | col2 | + try writeRow(writer, col_names, widths, false); + // Separator: |------|------| + try writeSeparator(writer, widths, numeric); + // Data rows: | val1 | val2 | + rc = c.sqlite3_step(stmt); + while (rc == c.SQLITE_ROW) { + try writeDataRow(writer, stmt, widths, numeric); + rc = c.sqlite3_step(stmt); + } + if (rc != c.SQLITE_DONE) return error.StepFailed; +} + +/// Write a header or data row with pipe-delimited cells. +/// When `numeric` is null, all cells are left-aligned (used for header). +fn writeRow( + writer: *std.Io.Writer, + values: []const []const u8, + widths: []const usize, + right_align: bool, +) error{WriteFailed}!void { + try writer.writeByte('|'); + for (values, 0..) |val, i| { + try writer.writeByte(' '); + const w = widths[i]; + const vw = visualWidth(val); + const padding = w - vw; + if (right_align) { + try writeSpaces(writer, padding); + try writer.writeAll(val); + } else { + try writer.writeAll(val); + try writeSpaces(writer, padding); + } + try writer.writeByte(' '); + try writer.writeByte('|'); + } + try writer.writeByte('\n'); +} + +/// Write the header separator line: |------|------| +/// Dashes fill the column width (plus 1 space padding each side). +fn writeSeparator( + writer: *std.Io.Writer, + widths: []const usize, + numeric: []const bool, +) error{WriteFailed}!void { + _ = numeric; + try writer.writeByte('|'); + for (widths) |w| { + try writer.writeByte(' '); + try writeCharRepeated(writer, "-", w); + try writer.writeByte(' '); + try writer.writeByte('|'); + } + try writer.writeByte('\n'); +} + +/// Write a single data row directly from SQLite statement (no buffering). +fn writeDataRow( + writer: *std.Io.Writer, + stmt: *c.sqlite3_stmt, + widths: []const usize, + numeric: []const bool, +) error{WriteFailed}!void { + try writer.writeByte('|'); + for (0..widths.len) |i| { + const idx: c_int = @intCast(i); + try writer.writeByte(' '); + const w = widths[i]; + + if (c.sqlite3_column_type(stmt, idx) == c.SQLITE_NULL) { + // NULL renders as empty cell + if (numeric[i]) { + try writeSpaces(writer, w); + } else { + try writeSpaces(writer, w); + } + } else { + if (sqlite_mod.columnText(stmt, idx)) |val| { + const vw = visualWidth(val); + const padding = w - vw; + if (numeric[i] and val.len > 0) { + try writeSpaces(writer, padding); + try writer.writeAll(val); + } else { + try writer.writeAll(val); + try writeSpaces(writer, padding); + } + } else { + try writeSpaces(writer, w); + } + } + try writer.writeByte(' '); + try writer.writeByte('|'); + } + try writer.writeByte('\n'); +} + +// ── UTF-8 / visual-width helpers (copied from table.zig) ────────────────── + +fn utf8CharLen(first: u8) usize { + if (first < 0x80) return 1; + if (first < 0xC0) return 1; + if (first < 0xE0) return 2; + if (first < 0xF0) return 3; + if (first < 0xF8) return 4; + return 1; +} + +fn utf8DecodeRaw(bytes: []const u8) ?u21 { + return switch (bytes.len) { + 1 => bytes[0], + 2 => std.unicode.utf8Decode2(bytes[0..2].*) catch null, + 3 => std.unicode.utf8Decode3(bytes[0..3].*) catch null, + 4 => std.unicode.utf8Decode4(bytes[0..4].*) catch null, + else => null, + }; +} + +fn isWideCodepoint(cp: u21) bool { + return (cp >= 0x3400 and cp <= 0x4DBF) or + (cp >= 0x4E00 and cp <= 0x9FFF) or + (cp >= 0xAC00 and cp <= 0xD7AF) or + (cp >= 0xFF00 and cp <= 0xFFEF); +} + +fn visualWidth(s: []const u8) usize { + var width: usize = 0; + var i: usize = 0; + while (i < s.len) { + const byte_len = utf8CharLen(s[i]); + if (i + byte_len > s.len) { + width += 1; + i += 1; + continue; + } + const slice = s[i..][0..byte_len]; + const codepoint = utf8DecodeRaw(slice) orelse { + width += 1; + i += 1; + continue; + }; + if (isWideCodepoint(codepoint)) { + width += 2; + } else { + width += 1; + } + i += byte_len; + } + return width; +} + +fn writeCharRepeated(writer: *std.Io.Writer, char: []const u8, n: usize) error{WriteFailed}!void { + var buf: [256]u8 = undefined; + const char_len = char.len; + var filled: usize = 0; + while (filled + char_len <= buf.len) : (filled += char_len) { + @memcpy(buf[filled..][0..char_len], char); + } + var remaining = n; + while (remaining > 0) { + const chunk = @min(remaining, filled / char_len); + try writer.writeAll(buf[0..chunk * char_len]); + remaining -= chunk; + } +} + +const spaces_buf = " " ** 256; + +fn writeSpaces(writer: *std.Io.Writer, n: usize) error{WriteFailed}!void { + var remaining = n; + while (remaining > 0) { + const chunk = @min(remaining, spaces_buf.len); + try writer.writeAll(spaces_buf[0..chunk]); + remaining -= chunk; + } +} + +test "writeMarkdown parameter order" { + try std.testing.expect(true); +}