diff --git a/.gitignore b/.gitignore index a617a4c..808a976 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ result AGENTS.md skills-lock.json .agents/ +.slim/ diff --git a/src/args.zig b/src/args.zig index fc37026..7726f5a 100644 --- a/src/args.zig +++ b/src/args.zig @@ -311,19 +311,6 @@ pub fn parseArgs(allocator: std.mem.Allocator, args: []const [:0]const u8) (SqlP var positional_args: std.ArrayList([]const u8) = .empty; defer positional_args.deinit(allocator); - // Loop invariant I: all args[1..i] have been processed; - // query holds the first non-flag argument seen, or null; - // type_inference reflects the presence of --no-type-inference; - // delimiter reflects -d/--delimiter/--tsv if present; - // header reflects the presence of --header/-H; - // output_format reflects the last --output-format/--json flag seen; - // input_format reflects the last --input-format flag seen; - // max_rows reflects the presence of --max-rows; - // disk reflects the presence of --disk; - // positional_args accumulates non-flag arguments for later - // conversion into file inputs and the query string; - // files is built from positional_args after the loop - // Bounding function: args.len - i var i: usize = 1; while (i < args.len) : (i += 1) { const arg = args[i]; diff --git a/src/csv.zig b/src/csv.zig index 2d49b74..15cb175 100644 --- a/src/csv.zig +++ b/src/csv.zig @@ -99,14 +99,6 @@ pub const CsvReader = struct { var state: State = .field_start; var has_data = false; - // Loop invariant I: - // `state` satisfies the representation invariant of the automaton. - // `field` contains the decoded bytes of the field currently being parsed. - // `fields` contains the completed, heap-allocated fields of this record. - // All bytes read from `reader` so far have been processed exactly once. - // Bounding function: - // Number of bytes remaining in `reader` (finite input; decreases by 1 - // each iteration except on the EOF branch which exits immediately). while (true) { const byte = self.reader.takeByte() catch |err| switch (err) { error.EndOfStream => { diff --git a/src/format.zig b/src/format.zig index 5140a27..b95cd72 100644 --- a/src/format.zig +++ b/src/format.zig @@ -3,7 +3,6 @@ //! This module owns: //! InputFormat — supported input formats, with parse() //! OutputFormat — supported output formats, with parse() -//! LoadOpts — common options forwarded to input-format loaders //! WriteOpts — options forwarded to OutputWriter //! OutputWriter — stateful writer that dispatches on OutputFormat //! writeField — RFC 4180 CSV field writer (used by OutputWriter and --sample mode) @@ -11,6 +10,7 @@ const std = @import("std"); const c = @import("c"); const json_mod = @import("json.zig"); +const sqlite_mod = @import("sqlite.zig"); const xml_mod = @import("xml.zig"); // ─── Input format ────────────────────────────────────── @@ -56,22 +56,6 @@ pub const OutputFormat = enum { } }; -// ─── Load options ─────────────────────────────────────── - -/// Options forwarded to input-format loaders. -pub const LoadOpts = struct { - /// Abort if more than this many data rows are read; null = unlimited. - max_rows: ?usize = null, - /// CSV/TSV field delimiter (1–8 bytes). - delimiter: []const u8 = ",", - /// Infer INTEGER/REAL column types from the first 100 rows (CSV/TSV only). - type_inference: bool = true, - /// Root element to navigate to for XML input; null = actual document root. - xml_root: ?[]const u8 = null, - /// Row tag filter for XML input; null = any direct child element. - xml_row: ?[]const u8 = null, -}; - // ─── Write options ────────────────────────────────────── /// Options forwarded to OutputWriter. @@ -239,17 +223,14 @@ fn csvPrintRow( writer: *std.Io.Writer, delimiter: []const u8, ) !void { - // Loop invariant I: columns 0..i-1 have been written, separated by delimiter - // Bounding function: col_count - i var i: c_int = 0; while (i < col_count) : (i += 1) { if (i > 0) try writer.writeAll(delimiter); if (c.sqlite3_column_type(stmt, i) == c.SQLITE_NULL) { try writer.writeAll("NULL"); } else { - const ptr = c.sqlite3_column_text(stmt, i); - if (ptr != null) { - try writeField(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr))), delimiter); + if (sqlite_mod.columnText(stmt, i)) |text| { + try writeField(writer, text, delimiter); } else { try writer.writeAll("NULL"); } @@ -265,14 +246,11 @@ fn csvPrintHeaderRow( writer: *std.Io.Writer, delimiter: []const u8, ) !void { - // Loop invariant I: columns 0..i-1 names have been written, separated by delimiter - // Bounding function: col_count - i var i: c_int = 0; while (i < col_count) : (i += 1) { if (i > 0) try writer.writeAll(delimiter); - const name_ptr = c.sqlite3_column_name(stmt, i); - if (name_ptr != null) { - try writeField(writer, std.mem.span(@as([*:0]const u8, @ptrCast(name_ptr))), delimiter); + if (sqlite_mod.columnName(stmt, i)) |name| { + try writeField(writer, name, delimiter); } } try writer.writeByte('\n'); diff --git a/src/json.zig b/src/json.zig index a22a2c4..ab06d6b 100644 --- a/src/json.zig +++ b/src/json.zig @@ -47,8 +47,6 @@ pub fn readLine( var line: std.ArrayList(u8) = .empty; errdefer line.deinit(allocator); var got_any = false; - // Loop invariant I: line contains bytes of the current line read so far (excl. terminator) - // Bounding function: bytes remaining in stream (stream is finite for well-formed input) while (true) { const byte = reader.takeByte() catch |err| switch (err) { error.EndOfStream => { @@ -148,8 +146,6 @@ pub fn insertRowFromJson( deferred_allocs.deinit(allocator); } - // Loop invariant I: params 1..col_idx-1 are bound for cols[0..col_idx-2] - // Bounding function: cols.len - j for (cols, 0..) |col, j| { const col_idx: c_int = @intCast(j + 1); if (obj.get(col)) |val| { @@ -174,8 +170,6 @@ pub fn navigateJsonPath( ) std.json.Value { var current = value; var remaining = path; - // Loop invariant: current is the value at the path prefix consumed so far - // Bounding function: remaining.len (strictly decreasing per segment consumed) while (remaining.len > 0) { const dot = std.mem.indexOfScalar(u8, remaining, '.') orelse remaining.len; const key = remaining[0..dot]; @@ -192,6 +186,40 @@ pub fn navigateJsonPath( return current; } +/// Result of firstJsonObject, providing both the resolved array and its first object. +pub const FirstJsonResult = struct { + array: std.json.Array, + first_obj: ?std.json.ObjectMap, +}; + +/// Navigate a parsed JSON value to the target array and return the array and its first object. +/// Returns `first_obj = null` for empty arrays. Fatals if the path doesn't resolve to an array of objects. +pub fn firstJsonObject( + parsed_value: std.json.Value, + json_path: ?[]const u8, + stderr_writer: *std.Io.Writer, +) FirstJsonResult { + const target: std.json.Value = if (json_path) |path| + navigateJsonPath(parsed_value, path, stderr_writer) + else + parsed_value; + + const array = switch (target) { + .array => |a| a, + else => if (json_path) |path| + fatal("--json-path '{s}': resolved to a non-array value; expected an array of objects", stderr_writer, .csv_error, .{path}) + else + fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}), + }; + if (array.items.len == 0) return .{ .array = array, .first_obj = null }; + + const first_obj = switch (array.items[0]) { + .object => |o| o, + else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}), + }; + return .{ .array = array, .first_obj = first_obj }; +} + // ─── Input loading ──────────────────────────────────── /// loadJsonArray(allocator, reader, db, table_name, max_rows, json_path, stderr_writer) → usize @@ -212,11 +240,7 @@ pub fn loadJsonArray( stderr_writer: *std.Io.Writer, ) usize { // Read all input into a buffer using block reads instead of byte-by-byte takeByte() - const buf = reader.allocRemaining(allocator, .unlimited) catch |err| switch (err) { - error.OutOfMemory => fatal("out of memory reading JSON input", stderr_writer, .csv_error, .{}), - error.ReadFailed => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}), - error.StreamTooLong => unreachable, // .unlimited never triggers this - }; + const buf = sqlite_helpers.readAllInput(reader, allocator, stderr_writer, "JSON input"); defer allocator.free(buf); if (buf.len == 0) return 0; // Empty input - return 0 rows gracefully @@ -225,26 +249,9 @@ pub fn loadJsonArray( fatal("failed to parse JSON input", stderr_writer, .csv_error, .{}); defer parsed.deinit(); - const target: std.json.Value = if (json_path) |path| - navigateJsonPath(parsed.value, path, stderr_writer) - else - parsed.value; - - const array = switch (target) { - .array => |a| a, - else => if (json_path) |path| - fatal("--json-path '{s}': resolved to a non-array value; expected an array of objects", stderr_writer, .csv_error, .{path}) - else - fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}), - }; - - if (array.items.len == 0) return 0; // Empty array - return 0 rows gracefully - - // Extract column names from the first object's keys (insertion order) - const first_obj = switch (array.items[0]) { - .object => |o| o, - else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}), - }; + const fj = firstJsonObject(parsed.value, json_path, stderr_writer); + const first_obj = fj.first_obj orelse return 0; // Empty array - return 0 rows gracefully + const array = fj.array; var cols: std.ArrayList([]const u8) = .empty; defer cols.deinit(allocator); @@ -263,18 +270,13 @@ pub fn loadJsonArray( defer _ = c.sqlite3_finalize(stmt); var rows_inserted: usize = 0; - // Loop invariant I: array.items[0..rows_inserted] have been inserted into t - // Bounding function: array.items.len - rows_inserted for (array.items) |item| { const obj = switch (item) { .object => |o| o, else => fatal("JSON array element is not an object", stderr_writer, .csv_error, .{}), }; rows_inserted += 1; - if (max_rows) |limit| { - if (rows_inserted > limit) - fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit}); - } + sqlite_helpers.checkMaxRows(rows_inserted, max_rows, stderr_writer); insertRowFromJson(allocator, stmt, cols.items, obj) catch fatal("{s}", stderr_writer, .sql_error, .{std.mem.span(c.sqlite3_errmsg(db))}); } @@ -311,9 +313,6 @@ pub fn loadNdjsonInput( var rows_inserted: usize = 0; var in_transaction = false; - // Loop invariant I: all non-blank lines 1..line_num have been processed; - // rows_inserted = number of objects inserted; in_transaction is true after first object - // Bounding function: lines remaining in reader (finite input) while (true) { line_num += 1; const line = readLine(allocator, reader) catch |err| switch (err) { @@ -366,10 +365,7 @@ pub fn loadNdjsonInput( } rows_inserted += 1; - if (max_rows) |limit| { - if (rows_inserted > limit) - fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit}); - } + sqlite_helpers.checkMaxRows(rows_inserted, max_rows, stderr_writer); const cols_const: []const []const u8 = @ptrCast(cols_owned.?); insertRowFromJson(allocator, insert_stmt.?, cols_const, obj) catch @@ -425,8 +421,6 @@ pub fn printJsonRow( ) !void { if (!is_first) try writer.writeByte(','); try writer.writeByte('{'); - // Loop invariant I: columns 0..i-1 have been written, separated by commas - // Bounding function: col_count - i var i: c_int = 0; while (i < col_count) : (i += 1) { if (i > 0) try writer.writeByte(','); @@ -445,9 +439,8 @@ pub fn printJsonRow( } }, else => { - const ptr = c.sqlite3_column_text(stmt, i); - if (ptr != null) { - try writeJsonString(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr)))); + if (sqlite_helpers.columnText(stmt, i)) |text| { + try writeJsonString(writer, text); } else { try writer.writeAll("null"); } @@ -469,8 +462,6 @@ pub fn printNdjsonRow( writer: *std.Io.Writer, ) !void { try writer.writeByte('{'); - // Loop invariant I: columns 0..i-1 have been written, separated by commas - // Bounding function: col_count - i var i: c_int = 0; while (i < col_count) : (i += 1) { if (i > 0) try writer.writeByte(','); @@ -489,9 +480,8 @@ pub fn printNdjsonRow( } }, else => { - const ptr = c.sqlite3_column_text(stmt, i); - if (ptr != null) { - try writeJsonString(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr)))); + if (sqlite_helpers.columnText(stmt, i)) |text| { + try writeJsonString(writer, text); } else { try writer.writeAll("null"); } diff --git a/src/loader.zig b/src/loader.zig index a121448..f101d06 100644 --- a/src/loader.zig +++ b/src/loader.zig @@ -26,8 +26,6 @@ pub fn isInteger(val: []const u8) bool { var i: usize = 0; if (val[0] == '+' or val[0] == '-') i = 1; if (i >= val.len) return false; // sign only → not an integer - // Loop invariant I: val[0..i] is a valid integer prefix (sign + digits) - // Bounding function: val.len - i while (i < val.len) : (i += 1) { if (val[i] < '0' or val[i] > '9') return false; } @@ -143,6 +141,25 @@ fn accumSlashOrder(current: SlashOrder, vote: SlashOrder) SlashOrder { }; } +/// Per-column inference state, packed into a single allocation instead of 11 +/// parallel arrays. All fields have sensible defaults via struct initialisation. +const ColumnInference = struct { + // Numeric + can_be_integer: bool = true, + can_be_real: bool = true, + has_data: bool = false, + // Datetime + can_be_datetime: bool = true, + dt_has_iso: bool = false, + dt_has_slash: bool = false, + slash_order_dt: SlashOrder = .unknown, + // Date + can_be_date: bool = true, + d_has_nonslash: bool = false, + d_has_slash: bool = false, + slash_order_d: SlashOrder = .unknown, +}; + /// inferTypes(buffer, num_cols, allocator) → []ColumnType /// Pre: buffer is a slice of rows (each row is a slice of field strings) /// num_cols > 0; allocator is valid @@ -169,113 +186,69 @@ pub fn inferTypes( const types = try allocator.alloc(ColumnType, num_cols); errdefer allocator.free(types); - // Numeric tracking (existing) - const can_be_integer = try allocator.alloc(bool, num_cols); - defer allocator.free(can_be_integer); - const can_be_real = try allocator.alloc(bool, num_cols); - defer allocator.free(can_be_real); - const has_data = try allocator.alloc(bool, num_cols); - defer allocator.free(has_data); - - // Datetime tracking - const can_be_datetime = try allocator.alloc(bool, num_cols); - defer allocator.free(can_be_datetime); - const dt_has_iso = try allocator.alloc(bool, num_cols); // 19-char ISO datetime values seen - defer allocator.free(dt_has_iso); - const dt_has_slash = try allocator.alloc(bool, num_cols); // 16-char slash datetime values seen - defer allocator.free(dt_has_slash); - const slash_order_dt = try allocator.alloc(SlashOrder, num_cols); - defer allocator.free(slash_order_dt); - - // Date tracking - const can_be_date = try allocator.alloc(bool, num_cols); - defer allocator.free(can_be_date); - const d_has_nonslash = try allocator.alloc(bool, num_cols); // YYYY-MM-DD or DD-MM-YYYY seen - defer allocator.free(d_has_nonslash); - const d_has_slash = try allocator.alloc(bool, num_cols); // D1/D2/YYYY slash values seen - defer allocator.free(d_has_slash); - const slash_order_d = try allocator.alloc(SlashOrder, num_cols); - defer allocator.free(slash_order_d); - - for (0..num_cols) |j| { - can_be_integer[j] = true; - can_be_real[j] = true; - has_data[j] = false; - can_be_datetime[j] = true; - dt_has_iso[j] = false; - dt_has_slash[j] = false; - slash_order_dt[j] = .unknown; - can_be_date[j] = true; - d_has_nonslash[j] = false; - d_has_slash[j] = false; - slash_order_d[j] = .unknown; - } + // Single struct allocation — initialize each element to apply field defaults + const inferences = try allocator.alloc(ColumnInference, num_cols); + defer allocator.free(inferences); + for (inferences) |*inf| inf.* = .{}; - // Loop invariant I: for each j in 0..num_cols and each value seen so far, - // can_be_datetime[j] = true ⟺ all non-empty values pass isDateTime - // can_be_date[j] = true ⟺ all non-empty values pass isDate and not isDateTime - // can_be_integer[j] = true ⟺ all non-empty values are integers - // can_be_real[j] = true ⟺ all non-empty values are numeric - // has_data[j] = true ⟺ at least one non-empty value has been seen - // Bounding function: buffer.len - row_idx for (buffer) |row| { for (row, 0..) |val, j| { if (j >= num_cols) break; if (val.len == 0) continue; - has_data[j] = true; + inferences[j].has_data = true; // ── Datetime check (highest priority) ──────────────────────────── - if (can_be_datetime[j]) { + if (inferences[j].can_be_datetime) { if (!isDateTime(val)) { - can_be_datetime[j] = false; + inferences[j].can_be_datetime = false; } else if (val.len == 16) { // Slash datetime: accumulate D1/D2 order vote - dt_has_slash[j] = true; + inferences[j].dt_has_slash = true; const d1: u8 = (val[0] - '0') * 10 + (val[1] - '0'); const d2: u8 = (val[3] - '0') * 10 + (val[4] - '0'); const vote: SlashOrder = if (d1 > 12) .eu else if (d2 > 12) .us else .unknown; - slash_order_dt[j] = accumSlashOrder(slash_order_dt[j], vote); + inferences[j].slash_order_dt = accumSlashOrder(inferences[j].slash_order_dt, vote); } else { - dt_has_iso[j] = true; + inferences[j].dt_has_iso = true; } } // ── Date check (isDate is length-10 only; no overlap with isDateTime) ── - if (can_be_date[j]) { + if (inferences[j].can_be_date) { if (!isDate(val)) { - can_be_date[j] = false; + inferences[j].can_be_date = false; } else if (val[2] == '/') { // Slash date: accumulate D1/D2 order vote - d_has_slash[j] = true; + inferences[j].d_has_slash = true; const d1: u8 = (val[0] - '0') * 10 + (val[1] - '0'); const d2: u8 = (val[3] - '0') * 10 + (val[4] - '0'); const vote: SlashOrder = if (d1 > 12) .eu else if (d2 > 12) .us else .unknown; - slash_order_d[j] = accumSlashOrder(slash_order_d[j], vote); + inferences[j].slash_order_d = accumSlashOrder(inferences[j].slash_order_d, vote); } else { - d_has_nonslash[j] = true; // YYYY-MM-DD or DD-MM-YYYY + inferences[j].d_has_nonslash = true; // YYYY-MM-DD or DD-MM-YYYY } } // ── Numeric check ──────────────────────────────────────────────── - if (!can_be_real[j]) continue; + if (!inferences[j].can_be_real) continue; if (!isReal(val)) { - can_be_real[j] = false; - can_be_integer[j] = false; + inferences[j].can_be_real = false; + inferences[j].can_be_integer = false; } else if (!isInteger(val)) { - can_be_integer[j] = false; + inferences[j].can_be_integer = false; } } } // Determine final type per column (DATETIME > DATE > INTEGER > REAL > TEXT) for (0..num_cols) |j| { - if (!has_data[j]) { + if (!inferences[j].has_data) { types[j] = .TEXT; - } else if (can_be_datetime[j]) { - if (dt_has_iso[j] and dt_has_slash[j]) { + } else if (inferences[j].can_be_datetime) { + if (inferences[j].dt_has_iso and inferences[j].dt_has_slash) { types[j] = .TEXT; // mixed ISO + slash datetime formats - } else if (dt_has_slash[j]) { - types[j] = switch (slash_order_dt[j]) { + } else if (inferences[j].dt_has_slash) { + types[j] = switch (inferences[j].slash_order_dt) { .eu => .DATETIME_EU, .us => .DATETIME_US, else => .TEXT, // unknown (all ambiguous) or contradictory @@ -283,11 +256,11 @@ pub fn inferTypes( } else { types[j] = .DATETIME; // pure ISO datetime } - } else if (can_be_date[j]) { - if (d_has_nonslash[j] and d_has_slash[j]) { + } else if (inferences[j].can_be_date) { + if (inferences[j].d_has_nonslash and inferences[j].d_has_slash) { types[j] = .TEXT; // mixed ISO/dash + slash date formats - } else if (d_has_slash[j]) { - types[j] = switch (slash_order_d[j]) { + } else if (inferences[j].d_has_slash) { + types[j] = switch (inferences[j].slash_order_d) { .eu => .DATE_EU, .us => .DATE_US, else => .TEXT, // unknown (all ambiguous) or contradictory @@ -295,9 +268,9 @@ pub fn inferTypes( } else { types[j] = .DATE; // YYYY-MM-DD or DD-MM-YYYY (detected at bind time) } - } else if (can_be_integer[j]) { + } else if (inferences[j].can_be_integer) { types[j] = .INTEGER; - } else if (can_be_real[j]) { + } else if (inferences[j].can_be_real) { types[j] = .REAL; } else { types[j] = .TEXT; @@ -344,10 +317,6 @@ pub fn parseHeader( var seen = std.StringHashMap(usize).init(allocator); defer seen.deinit(); - // Loop invariant I: cols contains trimmed, non-empty (possibly suffixed) names for record[0..i] - // seen maps each base name to its occurrence count up to i - // all items in cols are heap-allocated (owned by allocator) - // Bounding function: record.len - i (natural, decreasing, lower-bounded by 0) for (record) |field| { const base = std.mem.trim(u8, field, " \t\r"); if (base.len == 0) return error.EmptyColumnName; @@ -388,9 +357,6 @@ pub fn insertRowTyped( var col_idx: c_int = 1; - // Loop invariant I: row[0..col_idx-1] are bound to params 1..col_idx-1 - // using the appropriate SQLite bind function for each column type. - // Bounding function: row.len + 1 - col_idx (decreasing toward 0) for (row) |val| { if (col_idx > param_count) break; const j: usize = @intCast(col_idx - 1); @@ -444,7 +410,6 @@ pub fn insertRowTyped( } // Bind NULL for any trailing columns the row is short of - // Loop invariant: params 1..col_idx-1 are bound; col_idx..param_count become NULL while (col_idx <= param_count) : (col_idx += 1) { if (c.sqlite3_bind_null(stmt, col_idx) != c.SQLITE_OK) return error.BindFailed; @@ -554,9 +519,6 @@ pub fn fmtThousands(buf: []u8, n: usize) []const u8 { const len = digits.len; const first_group = len % 3; // digits in the leading group (0 means groups of 3 from start) var out_len: usize = 0; - // Loop invariant I: buf[0..out_len] = formatted prefix of digits[0..i] - // commas inserted before every third digit counted from the right - // Bounding function: len - i for (digits, 0..) |ch, i| { if ((i > 0 and i == first_group) or (i > first_group and (i - first_group) % 3 == 0)) @@ -689,10 +651,7 @@ pub fn loadCsvInput( // Insert buffered rows for (row_buffer.items) |row| { rows_inserted += 1; - if (parsed.max_rows) |limit| { - if (rows_inserted > limit) - fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit}); - } + sqlite_mod.checkMaxRows(rows_inserted, parsed.max_rows, stderr_writer); insertRowTyped(stmt, row, types, @intCast(num_cols)) catch fatalSqlWithContext(allocator, db, table_name, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer); if (is_tty and rows_inserted % progress_interval == 0) @@ -721,10 +680,7 @@ pub fn loadCsvInput( if (record.len == 0) continue; rows_inserted += 1; - if (parsed.max_rows) |limit| { - if (rows_inserted > limit) - fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit}); - } + sqlite_mod.checkMaxRows(rows_inserted, parsed.max_rows, stderr_writer); insertRowTyped(stmt, record, types, @intCast(num_cols)) catch fatalSqlWithContext(allocator, db, table_name, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer); if (is_tty and rows_inserted % progress_interval == 0) @@ -1036,7 +992,7 @@ test "inferTypes: detects DATETIME_US (slash datetime with d2 > 12)" { } test "inferTypes: mixed ISO date and slash date → TEXT (d_has_nonslash && d_has_slash)" { - // Exercises loader.zig line 287: d_has_nonslash[j] and d_has_slash[j] → TEXT + // Exercises loader.zig line 287: inferences[j].d_has_nonslash and inferences[j].d_has_slash → TEXT const allocator = std.testing.allocator; var f1: [1][]u8 = .{@constCast("2024-01-15")}; // ISO → d_has_nonslash var f2: [1][]u8 = .{@constCast("15/01/2024")}; // EU slash → d_has_slash diff --git a/src/main.zig b/src/main.zig index 23275d3..ac35da5 100644 --- a/src/main.zig +++ b/src/main.zig @@ -77,14 +77,38 @@ fn execQuery( defer out_writer.deinit(allocator); try out_writer.begin(allocator, stmt.?, col_count, writer); - // Loop invariant I: all SQLITE_ROW results returned so far have been written - // Bounding function: number of remaining rows in the result set (finite) while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { try out_writer.writeRow(stmt.?, writer); } try out_writer.end(writer); } +/// loadInput(allocator, io, db, table_name, input_format, reader, parsed, stderr_writer) → usize +/// Pre: reader points to open input (file or stdin) +/// Post: dispatches to the correct loader based on format; returns number of rows loaded +fn loadInput( + allocator: std.mem.Allocator, + io: std.Io, + db: *c.sqlite3, + table_name: []const u8, + input_format: InputFormat, + reader: *std.Io.Reader, + parsed: ParsedArgs, + stderr_writer: *std.Io.Writer, +) usize { + return switch (input_format) { + .csv => loadCsvInput(allocator, io, db, table_name, reader, parsed, stderr_writer), + .tsv => blk: { + var tsv_parsed = parsed; + tsv_parsed.delimiter = "\t"; + break :blk loadCsvInput(allocator, io, db, table_name, reader, tsv_parsed, stderr_writer); + }, + .json => json.loadJsonArray(allocator, reader, db, table_name, parsed.max_rows, parsed.json_path, stderr_writer), + .ndjson => json.loadNdjsonInput(allocator, reader, db, table_name, parsed.max_rows, stderr_writer), + .xml => xml.loadXmlInput(allocator, reader, db, table_name, parsed.xml_root_input, parsed.xml_row_input, parsed.max_rows, stderr_writer), + }; +} + /// run(allocator, io, parsed, stderr_writer, stdout_writer, use_table) → void /// Pre: parsed contains a valid query; allocator and writers are valid /// use_table is true when output should be formatted as a pretty table @@ -110,50 +134,12 @@ fn run( // Load each file argument into its named table for (parsed.files) |file_input| { - const rows = switch (file_input.format) { - .csv => blk: { - var file_buf: [4096]u8 = undefined; - const file = std.Io.Dir.openFile(std.Io.Dir.cwd(), io, file_input.path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ file_input.path, @errorName(err) }); - defer std.Io.File.close(file, io); - var file_reader = std.Io.File.reader(file, io, &file_buf); - break :blk loadCsvInput(allocator, io, db, file_input.table_name, &file_reader.interface, parsed, stderr_writer); - }, - .tsv => blk: { - var file_buf: [4096]u8 = undefined; - const file = std.Io.Dir.openFile(std.Io.Dir.cwd(), io, file_input.path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ file_input.path, @errorName(err) }); - defer std.Io.File.close(file, io); - var file_reader = std.Io.File.reader(file, io, &file_buf); - var tsv_parsed = parsed; - tsv_parsed.delimiter = "\t"; - break :blk loadCsvInput(allocator, io, db, file_input.table_name, &file_reader.interface, tsv_parsed, stderr_writer); - }, - .json => blk: { - var file_buf: [4096]u8 = undefined; - const file = std.Io.Dir.openFile(std.Io.Dir.cwd(), io, file_input.path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ file_input.path, @errorName(err) }); - defer std.Io.File.close(file, io); - var file_reader = std.Io.File.reader(file, io, &file_buf); - break :blk json.loadJsonArray(allocator, &file_reader.interface, db, file_input.table_name, parsed.max_rows, parsed.json_path, stderr_writer); - }, - .ndjson => blk: { - var file_buf: [4096]u8 = undefined; - const file = std.Io.Dir.openFile(std.Io.Dir.cwd(), io, file_input.path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ file_input.path, @errorName(err) }); - defer std.Io.File.close(file, io); - var file_reader = std.Io.File.reader(file, io, &file_buf); - break :blk json.loadNdjsonInput(allocator, &file_reader.interface, db, file_input.table_name, parsed.max_rows, stderr_writer); - }, - .xml => blk: { - var file_buf: [4096]u8 = undefined; - const file = std.Io.Dir.openFile(std.Io.Dir.cwd(), io, file_input.path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ file_input.path, @errorName(err) }); - defer std.Io.File.close(file, io); - var file_reader = std.Io.File.reader(file, io, &file_buf); - break :blk xml.loadXmlInput(allocator, &file_reader.interface, db, file_input.table_name, parsed.xml_root_input, parsed.xml_row_input, parsed.max_rows, stderr_writer); - }, - }; + var file_buf: [4096]u8 = undefined; + const file = std.Io.Dir.openFile(std.Io.Dir.cwd(), io, file_input.path, .{}) catch |err| + fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ file_input.path, @errorName(err) }); + defer std.Io.File.close(file, io); + var file_reader = std.Io.File.reader(file, io, &file_buf); + const rows = loadInput(allocator, io, db, file_input.table_name, file_input.format, &file_reader.interface, parsed, stderr_writer); if (rows == 0) { fatal("empty input file: '{s}'", stderr_writer, .csv_error, .{file_input.path}); } @@ -162,35 +148,9 @@ fn run( // Load stdin as `t` if piped if (parsed.has_stdin) { - const rows = switch (parsed.input_format) { - .csv => blk: { - var stdin_buf: [4096]u8 = undefined; - var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); - break :blk loadCsvInput(allocator, io, db, "t", &stdin_reader.interface, parsed, stderr_writer); - }, - .tsv => blk: { - var stdin_buf: [4096]u8 = undefined; - var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); - var tsv_parsed = parsed; - tsv_parsed.delimiter = "\t"; - break :blk loadCsvInput(allocator, io, db, "t", &stdin_reader.interface, tsv_parsed, stderr_writer); - }, - .json => blk: { - var stdin_buf: [4096]u8 = undefined; - var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); - break :blk json.loadJsonArray(allocator, &stdin_reader.interface, db, "t", parsed.max_rows, parsed.json_path, stderr_writer); - }, - .ndjson => blk: { - var stdin_buf: [4096]u8 = undefined; - var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); - break :blk json.loadNdjsonInput(allocator, &stdin_reader.interface, db, "t", parsed.max_rows, stderr_writer); - }, - .xml => blk: { - var stdin_buf: [4096]u8 = undefined; - var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); - break :blk xml.loadXmlInput(allocator, &stdin_reader.interface, db, "t", parsed.xml_root_input, parsed.xml_row_input, parsed.max_rows, stderr_writer); - }, - }; + var stdin_buf: [4096]u8 = undefined; + var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); + const rows = loadInput(allocator, io, db, "t", parsed.input_format, &stdin_reader.interface, parsed, stderr_writer); total_rows += rows; } diff --git a/src/modes/columns.zig b/src/modes/columns.zig index 273de35..bafd02d 100644 --- a/src/modes/columns.zig +++ b/src/modes/columns.zig @@ -11,6 +11,7 @@ const inference_buffer_size = loader.inference_buffer_size; const ExitCode = args_mod.ExitCode; const fatal = @import("../sqlite.zig").fatal; +const readAllInput = @import("../sqlite.zig").readAllInput; pub fn runColumns( allocator: std.mem.Allocator, @@ -105,10 +106,7 @@ pub fn runColumns( defer if (input_source == .file) std.Io.File.close(source_file, io); var source_reader = std.Io.File.reader(source_file, io, &read_buf); - const input = source_reader.interface.allocRemaining(allocator, .unlimited) catch |err| switch (err) { - error.OutOfMemory => fatal("out of memory reading JSON input", stderr_writer, .csv_error, .{}), - error.ReadFailed, error.StreamTooLong => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}), - }; + const input = readAllInput(&source_reader.interface, allocator, stderr_writer, "JSON input"); defer allocator.free(input); if (input.len == 0) fatal("empty input", stderr_writer, .csv_error, .{}); @@ -116,24 +114,8 @@ pub fn runColumns( fatal("failed to parse JSON input", stderr_writer, .csv_error, .{}); defer parsed.deinit(); - const target: std.json.Value = if (args.json_path) |path| - json_mod.navigateJsonPath(parsed.value, path, stderr_writer) - else - parsed.value; - - const array = switch (target) { - .array => |a| a, - else => if (args.json_path) |path| - fatal("--json-path '{s}': resolved to a non-array value; expected an array of objects", stderr_writer, .csv_error, .{path}) - else - fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}), - }; - if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{}); - - const first_obj = switch (array.items[0]) { - .object => |o| o, - else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}), - }; + const first_obj = json_mod.firstJsonObject(parsed.value, args.json_path, stderr_writer).first_obj orelse + fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{}); var ki = first_obj.iterator(); while (ki.next()) |entry| { diff --git a/src/modes/sample.zig b/src/modes/sample.zig index 14aa180..2ef063e 100644 --- a/src/modes/sample.zig +++ b/src/modes/sample.zig @@ -70,8 +70,6 @@ pub fn runSample( } var csv_row_count: usize = 1; - // Loop invariant I: row_buffer contains all non-empty data rows read so far (up to buf_size) - // Bounding function: buf_size - row_buffer.items.len while (row_buffer.items.len < buf_size) { const rec = csv_reader.nextRecord() catch |err| switch (err) { error.UnterminatedQuotedField => fatal( @@ -110,8 +108,6 @@ pub fn runSample( stderr_writer.print("# Schema ({d} columns):\n", .{cols.len}) catch |err| { std.log.err("failed to write schema: {}", .{err}); }; - // Loop invariant I: cols[0..i] have been printed with aligned type annotation - // Bounding function: cols.len - i for (cols, types) |col, t| { stderr_writer.writeAll("# ") catch |err| { std.log.err("failed to write schema: {}", .{err}); @@ -133,8 +129,6 @@ pub fn runSample( stderr_writer.flush() catch |err| std.log.err("failed to flush stderr: {}", .{err}); // ─── Print header row to stdout ──────────────────────────────────────── - // Loop invariant I: cols[0..i] names have been written, separated by col_delim - // Bounding function: cols.len - i for (cols, 0..) |col, i| { if (i > 0) stdout_writer.writeAll(col_delim) catch fatal("failed to write header", stderr_writer, .csv_error, .{}); @@ -146,12 +140,8 @@ pub fn runSample( // ─── Print first n data rows to stdout ──────────────────────────────── const rows_to_print = @min(args.n, row_buffer.items.len); - // Loop invariant I: row_buffer[0..r] have been printed as delimited rows - // Bounding function: rows_to_print - r for (row_buffer.items[0..rows_to_print]) |row| { var col_idx: usize = 0; - // Loop invariant I: cols[0..col_idx] fields have been written for this row - // Bounding function: cols.len - col_idx while (col_idx < cols.len) : (col_idx += 1) { if (col_idx > 0) stdout_writer.writeAll(col_delim) catch fatal("failed to write field separator", stderr_writer, .csv_error, .{}); diff --git a/src/modes/validate.zig b/src/modes/validate.zig index eefa18a..26d0702 100644 --- a/src/modes/validate.zig +++ b/src/modes/validate.zig @@ -14,6 +14,7 @@ const inference_buffer_size = loader.inference_buffer_size; const ExitCode = args_mod.ExitCode; const fatal = @import("../sqlite.zig").fatal; +const readAllInput = @import("../sqlite.zig").readAllInput; pub fn runValidate( allocator: std.mem.Allocator, @@ -161,10 +162,7 @@ pub fn runValidate( defer if (input_source == .file) std.Io.File.close(source_file, io); var source_reader = std.Io.File.reader(source_file, io, &read_buf); - const input = source_reader.interface.allocRemaining(allocator, .unlimited) catch |err| switch (err) { - error.OutOfMemory => fatal("out of memory reading JSON input", stderr_writer, .csv_error, .{}), - error.ReadFailed, error.StreamTooLong => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}), - }; + const input = readAllInput(&source_reader.interface, allocator, stderr_writer, "JSON input"); defer allocator.free(input); if (input.len == 0) fatal("empty input", stderr_writer, .csv_error, .{}); @@ -172,24 +170,10 @@ pub fn runValidate( fatal("failed to parse JSON input", stderr_writer, .csv_error, .{}); defer parsed.deinit(); - const target: std.json.Value = if (args.json_path) |path| - json_mod.navigateJsonPath(parsed.value, path, stderr_writer) - else - parsed.value; - - const array = switch (target) { - .array => |a| a, - else => if (args.json_path) |path| - fatal("--json-path '{s}': resolved to a non-array value; expected an array of objects", stderr_writer, .csv_error, .{path}) - else - fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}), - }; - if (array.items.len == 0) fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{}); - - const first_obj = switch (array.items[0]) { - .object => |o| o, - else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}), - }; + const fj = json_mod.firstJsonObject(parsed.value, args.json_path, stderr_writer); + const first_obj = fj.first_obj orelse + fatal("empty JSON array: cannot determine column names", stderr_writer, .csv_error, .{}); + const array = fj.array; var num_cols: usize = 0; var ki = first_obj.iterator(); diff --git a/src/sqlite.zig b/src/sqlite.zig index c7c55a0..3d4aea3 100644 --- a/src/sqlite.zig +++ b/src/sqlite.zig @@ -81,6 +81,48 @@ pub fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: ExitCode, a std.process.exit(@intFromEnum(code)); } +/// Read all remaining input from a reader into an allocated buffer. Fatal on error. +/// `context` is included in error messages (e.g. "JSON input", "XML input"). +pub fn readAllInput(reader: *std.Io.Reader, allocator: std.mem.Allocator, stderr_writer: *std.Io.Writer, context: []const u8) []u8 { + return reader.allocRemaining(allocator, .unlimited) catch |err| switch (err) { + error.OutOfMemory => fatal("out of memory reading {s}", stderr_writer, .csv_error, .{context}), + error.ReadFailed => fatal("failed to read {s}", stderr_writer, .csv_error, .{context}), + error.StreamTooLong => unreachable, + }; +} + +/// Fatal if rows_inserted exceeds the max_rows limit. +pub fn checkMaxRows(rows_inserted: usize, max_rows: ?usize, stderr_writer: *std.Io.Writer) void { + if (max_rows) |limit| { + if (rows_inserted > limit) + fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit}); + } +} + +/// Execute a SQL statement via sqlite3_exec; fatal on error with the SQLite error message. +fn execSql(db: *c.sqlite3, sql: [*c]const u8, writer: *std.Io.Writer) void { + var errmsg: [*c]u8 = null; + if (c.sqlite3_exec(db, sql, null, null, &errmsg) != c.SQLITE_OK) { + const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db)); + if (errmsg != null) c.sqlite3_free(errmsg); + fatal("{s}", writer, .sql_error, .{msg}); + } +} + +/// Get the name of a SQLite column as a Zig slice. Returns null if unavailable. +pub fn columnName(stmt: *c.sqlite3_stmt, col: c_int) ?[]const u8 { + const ptr = c.sqlite3_column_name(stmt, col); + if (ptr == null) return null; + return std.mem.span(@as([*:0]const u8, @ptrCast(ptr))); +} + +/// Get the text value of a SQLite column as a Zig slice. Returns null for SQL NULL. +pub fn columnText(stmt: *c.sqlite3_stmt, col: c_int) ?[]const u8 { + const ptr = c.sqlite3_column_text(stmt, col); + if (ptr == null) return null; + return std.mem.span(@as([*:0]const u8, @ptrCast(ptr))); +} + /// Create a table with all-TEXT columns. Column names are double-quote–escaped /// per SQL identifier rules. pub fn createAllTextTable( @@ -108,12 +150,7 @@ pub fn createAllTextTable( sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, .csv_error, .{}); sql.append(allocator, 0) catch fatal("out of memory", writer, .csv_error, .{}); - var errmsg: [*c]u8 = null; - if (c.sqlite3_exec(db, sql.items.ptr, null, null, &errmsg) != c.SQLITE_OK) { - const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db)); - if (errmsg != null) c.sqlite3_free(errmsg); - fatal("{s}", writer, .sql_error, .{msg}); - } + execSql(db, sql.items.ptr, writer); } /// Prepare `INSERT INTO VALUES (?, …, ?)` with n parameters. @@ -144,21 +181,11 @@ pub fn prepareInsertStmt( } pub fn beginTransaction(db: *c.sqlite3, writer: *std.Io.Writer) void { - var errmsg: [*c]u8 = null; - if (c.sqlite3_exec(db, "BEGIN TRANSACTION", null, null, &errmsg) != c.SQLITE_OK) { - const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db)); - if (errmsg != null) c.sqlite3_free(errmsg); - fatal("{s}", writer, .sql_error, .{msg}); - } + execSql(db, "BEGIN TRANSACTION", writer); } pub fn commitTransaction(db: *c.sqlite3, writer: *std.Io.Writer) void { - var errmsg: [*c]u8 = null; - if (c.sqlite3_exec(db, "COMMIT", null, null, &errmsg) != c.SQLITE_OK) { - const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db)); - if (errmsg != null) c.sqlite3_free(errmsg); - fatal("{s}", writer, .sql_error, .{msg}); - } + execSql(db, "COMMIT", writer); } /// openDb(disk, writer) → *sqlite3 @@ -185,12 +212,7 @@ pub fn openDb(disk: bool, writer: *std.Io.Writer) *c.sqlite3 { } // Ensure transient structures (ORDER BY sorts, GROUP BY indices) also spill to disk. - var errmsg: [*c]u8 = null; - if (c.sqlite3_exec(db.?, "PRAGMA temp_store = FILE", null, null, &errmsg) != c.SQLITE_OK) { - const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db)); - if (errmsg != null) c.sqlite3_free(errmsg); - fatal("failed to set PRAGMA temp_store = FILE: {s}", writer, .sql_error, .{msg}); - } + execSql(db.?, "PRAGMA temp_store = FILE", writer); return db.?; } @@ -242,12 +264,7 @@ pub fn createTable( sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, .csv_error, .{}); sql.append(allocator, 0) catch fatal("out of memory", writer, .csv_error, .{}); - var errmsg: [*c]u8 = null; - if (c.sqlite3_exec(db, sql.items.ptr, null, null, &errmsg) != c.SQLITE_OK) { - const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db)); - if (errmsg != null) c.sqlite3_free(errmsg); - fatal("{s}", writer, .sql_error, .{msg}); - } + execSql(db, sql.items.ptr, writer); } /// Compute the Levenshtein edit distance between two strings. @@ -291,9 +308,7 @@ pub fn getTableColumns(allocator: std.mem.Allocator, db: *c.sqlite3, table_name: while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { // PRAGMA table_info columns: cid(0), name(1), type(2), notnull(3), dflt_value(4), pk(5) - const ptr = c.sqlite3_column_text(stmt, 1); - if (ptr == null) continue; - const name = std.mem.span(@as([*:0]const u8, @ptrCast(ptr))); + const name = columnText(stmt.?, 1) orelse continue; const owned = allocator.dupe(u8, name) catch fatal("out of memory", writer, .csv_error, .{}); cols.append(allocator, owned) catch fatal("out of memory", writer, .csv_error, .{}); } diff --git a/src/table.zig b/src/table.zig index bdbc23f..a6e57dc 100644 --- a/src/table.zig +++ b/src/table.zig @@ -10,6 +10,7 @@ const std = @import("std"); const c = @import("c"); +const sqlite_mod = @import("sqlite.zig"); /// Write a formatted table from SQLite query results to the given writer. /// @@ -34,12 +35,7 @@ pub fn writeTable( // 1. Collect column names (duped for safety) const col_names = try a.alloc([]const u8, ncols); for (0..ncols) |i| { - const name_ptr = c.sqlite3_column_name(stmt, @intCast(i)); - if (name_ptr != null) { - col_names[i] = try a.dupe(u8, std.mem.span(@as([*:0]const u8, @ptrCast(name_ptr)))); - } else { - col_names[i] = ""; - } + col_names[i] = try a.dupe(u8, sqlite_mod.columnName(stmt, @intCast(i)) orelse ""); } // 2. Pass 1: Compute column widths and detect numeric columns @@ -197,9 +193,7 @@ fn writeDataRow( try writeSpaces(writer, w - null_text.len); } } else { - const ptr = c.sqlite3_column_text(stmt, idx); - if (ptr != null) { - const val = std.mem.span(@as([*:0]const u8, @ptrCast(ptr))); + if (sqlite_mod.columnText(stmt, idx)) |val| { const vw = visualWidth(val); const padding = w - vw; if (numeric[i] and val.len > 0) { diff --git a/src/xml.zig b/src/xml.zig index cbd9802..c93d1cb 100644 --- a/src/xml.zig +++ b/src/xml.zig @@ -70,8 +70,6 @@ fn decodeEntities(allocator: std.mem.Allocator, s: []const u8) ![]u8 { var out: std.ArrayList(u8) = .empty; errdefer out.deinit(allocator); var i: usize = 0; - // Loop invariant: out contains the decoded prefix of s[0..i] - // Bounding function: s.len - i while (i < s.len) { if (s[i] == '&') { if (std.mem.startsWith(u8, s[i..], "&")) { @@ -161,8 +159,6 @@ pub fn writeXmlRow( try writer.writeByte('<'); try writer.writeAll(row_name); try writer.writeByte('>'); - // Loop invariant I: columns 0..i-1 have been written - // Bounding function: col_count - i var i: c_int = 0; while (i < col_count) : (i += 1) { const name = std.mem.span(col_names[@intCast(i)]); @@ -181,9 +177,8 @@ pub fn writeXmlRow( } }, else => { - const ptr = c.sqlite3_column_text(stmt, i); - if (ptr != null) { - try writeXmlEscaped(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr)))); + if (sqlite_helpers.columnText(stmt, i)) |text| { + try writeXmlEscaped(writer, text); } }, } @@ -311,8 +306,6 @@ pub const XmlParser = struct { } fn skipWsAndMisc(self: *XmlParser, err_writer: *std.Io.Writer) void { - // Loop invariant: all whitespace and misc nodes before self.pos have been consumed - // Bounding function: self.data.len - self.pos while (true) { self.skipWs(); if (self.startsWith("