Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ result
AGENTS.md
skills-lock.json
.agents/
.slim/
13 changes: 0 additions & 13 deletions src/args.zig
Original file line number Diff line number Diff line change
Expand Up @@ -311,19 +311,6 @@ pub fn parseArgs(allocator: std.mem.Allocator, args: []const [:0]const u8) (SqlP
var positional_args: std.ArrayList([]const u8) = .empty;
defer positional_args.deinit(allocator);

// Loop invariant I: all args[1..i] have been processed;
// query holds the first non-flag argument seen, or null;
// type_inference reflects the presence of --no-type-inference;
// delimiter reflects -d/--delimiter/--tsv if present;
// header reflects the presence of --header/-H;
// output_format reflects the last --output-format/--json flag seen;
// input_format reflects the last --input-format flag seen;
// max_rows reflects the presence of --max-rows;
// disk reflects the presence of --disk;
// positional_args accumulates non-flag arguments for later
// conversion into file inputs and the query string;
// files is built from positional_args after the loop
// Bounding function: args.len - i
var i: usize = 1;
while (i < args.len) : (i += 1) {
const arg = args[i];
Expand Down
8 changes: 0 additions & 8 deletions src/csv.zig
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,6 @@ pub const CsvReader = struct {
var state: State = .field_start;
var has_data = false;

// Loop invariant I:
// `state` satisfies the representation invariant of the automaton.
// `field` contains the decoded bytes of the field currently being parsed.
// `fields` contains the completed, heap-allocated fields of this record.
// All bytes read from `reader` so far have been processed exactly once.
// Bounding function:
// Number of bytes remaining in `reader` (finite input; decreases by 1
// each iteration except on the EOF branch which exits immediately).
while (true) {
const byte = self.reader.takeByte() catch |err| switch (err) {
error.EndOfStream => {
Expand Down
32 changes: 5 additions & 27 deletions src/format.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
//! This module owns:
//! InputFormat — supported input formats, with parse()
//! OutputFormat — supported output formats, with parse()
//! LoadOpts — common options forwarded to input-format loaders
//! WriteOpts — options forwarded to OutputWriter
//! OutputWriter — stateful writer that dispatches on OutputFormat
//! writeField — RFC 4180 CSV field writer (used by OutputWriter and --sample mode)

const std = @import("std");
const c = @import("c");
const json_mod = @import("json.zig");
const sqlite_mod = @import("sqlite.zig");
const xml_mod = @import("xml.zig");

// ─── Input format ──────────────────────────────────────
Expand Down Expand Up @@ -56,22 +56,6 @@ pub const OutputFormat = enum {
}
};

// ─── Load options ───────────────────────────────────────

/// Options forwarded to input-format loaders.
pub const LoadOpts = struct {
/// Abort if more than this many data rows are read; null = unlimited.
max_rows: ?usize = null,
/// CSV/TSV field delimiter (1–8 bytes).
delimiter: []const u8 = ",",
/// Infer INTEGER/REAL column types from the first 100 rows (CSV/TSV only).
type_inference: bool = true,
/// Root element to navigate to for XML input; null = actual document root.
xml_root: ?[]const u8 = null,
/// Row tag filter for XML input; null = any direct child element.
xml_row: ?[]const u8 = null,
};

// ─── Write options ──────────────────────────────────────

/// Options forwarded to OutputWriter.
Expand Down Expand Up @@ -239,17 +223,14 @@ fn csvPrintRow(
writer: *std.Io.Writer,
delimiter: []const u8,
) !void {
// Loop invariant I: columns 0..i-1 have been written, separated by delimiter
// Bounding function: col_count - i
var i: c_int = 0;
while (i < col_count) : (i += 1) {
if (i > 0) try writer.writeAll(delimiter);
if (c.sqlite3_column_type(stmt, i) == c.SQLITE_NULL) {
try writer.writeAll("NULL");
} else {
const ptr = c.sqlite3_column_text(stmt, i);
if (ptr != null) {
try writeField(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr))), delimiter);
if (sqlite_mod.columnText(stmt, i)) |text| {
try writeField(writer, text, delimiter);
} else {
try writer.writeAll("NULL");
}
Expand All @@ -265,14 +246,11 @@ fn csvPrintHeaderRow(
writer: *std.Io.Writer,
delimiter: []const u8,
) !void {
// Loop invariant I: columns 0..i-1 names have been written, separated by delimiter
// Bounding function: col_count - i
var i: c_int = 0;
while (i < col_count) : (i += 1) {
if (i > 0) try writer.writeAll(delimiter);
const name_ptr = c.sqlite3_column_name(stmt, i);
if (name_ptr != null) {
try writeField(writer, std.mem.span(@as([*:0]const u8, @ptrCast(name_ptr))), delimiter);
if (sqlite_mod.columnName(stmt, i)) |name| {
try writeField(writer, name, delimiter);
}
}
try writer.writeByte('\n');
Expand Down
98 changes: 44 additions & 54 deletions src/json.zig
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@ pub fn readLine(
var line: std.ArrayList(u8) = .empty;
errdefer line.deinit(allocator);
var got_any = false;
// Loop invariant I: line contains bytes of the current line read so far (excl. terminator)
// Bounding function: bytes remaining in stream (stream is finite for well-formed input)
while (true) {
const byte = reader.takeByte() catch |err| switch (err) {
error.EndOfStream => {
Expand Down Expand Up @@ -148,8 +146,6 @@ pub fn insertRowFromJson(
deferred_allocs.deinit(allocator);
}

// Loop invariant I: params 1..col_idx-1 are bound for cols[0..col_idx-2]
// Bounding function: cols.len - j
for (cols, 0..) |col, j| {
const col_idx: c_int = @intCast(j + 1);
if (obj.get(col)) |val| {
Expand All @@ -174,8 +170,6 @@ pub fn navigateJsonPath(
) std.json.Value {
var current = value;
var remaining = path;
// Loop invariant: current is the value at the path prefix consumed so far
// Bounding function: remaining.len (strictly decreasing per segment consumed)
while (remaining.len > 0) {
const dot = std.mem.indexOfScalar(u8, remaining, '.') orelse remaining.len;
const key = remaining[0..dot];
Expand All @@ -192,6 +186,40 @@ pub fn navigateJsonPath(
return current;
}

/// Result of firstJsonObject, providing both the resolved array and its first object.
pub const FirstJsonResult = struct {
array: std.json.Array,
first_obj: ?std.json.ObjectMap,
};

/// Navigate a parsed JSON value to the target array and return the array and its first object.
/// Returns `first_obj = null` for empty arrays. Fatals if the path doesn't resolve to an array of objects.
pub fn firstJsonObject(
parsed_value: std.json.Value,
json_path: ?[]const u8,
stderr_writer: *std.Io.Writer,
) FirstJsonResult {
const target: std.json.Value = if (json_path) |path|
navigateJsonPath(parsed_value, path, stderr_writer)
else
parsed_value;

const array = switch (target) {
.array => |a| a,
else => if (json_path) |path|
fatal("--json-path '{s}': resolved to a non-array value; expected an array of objects", stderr_writer, .csv_error, .{path})
else
fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
};
if (array.items.len == 0) return .{ .array = array, .first_obj = null };

const first_obj = switch (array.items[0]) {
.object => |o| o,
else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}),
};
return .{ .array = array, .first_obj = first_obj };
}

// ─── Input loading ────────────────────────────────────

/// loadJsonArray(allocator, reader, db, table_name, max_rows, json_path, stderr_writer) → usize
Expand All @@ -212,11 +240,7 @@ pub fn loadJsonArray(
stderr_writer: *std.Io.Writer,
) usize {
// Read all input into a buffer using block reads instead of byte-by-byte takeByte()
const buf = reader.allocRemaining(allocator, .unlimited) catch |err| switch (err) {
error.OutOfMemory => fatal("out of memory reading JSON input", stderr_writer, .csv_error, .{}),
error.ReadFailed => fatal("failed to read JSON input", stderr_writer, .csv_error, .{}),
error.StreamTooLong => unreachable, // .unlimited never triggers this
};
const buf = sqlite_helpers.readAllInput(reader, allocator, stderr_writer, "JSON input");
defer allocator.free(buf);

if (buf.len == 0) return 0; // Empty input - return 0 rows gracefully
Expand All @@ -225,26 +249,9 @@ pub fn loadJsonArray(
fatal("failed to parse JSON input", stderr_writer, .csv_error, .{});
defer parsed.deinit();

const target: std.json.Value = if (json_path) |path|
navigateJsonPath(parsed.value, path, stderr_writer)
else
parsed.value;

const array = switch (target) {
.array => |a| a,
else => if (json_path) |path|
fatal("--json-path '{s}': resolved to a non-array value; expected an array of objects", stderr_writer, .csv_error, .{path})
else
fatal("JSON input must be an array of objects", stderr_writer, .csv_error, .{}),
};

if (array.items.len == 0) return 0; // Empty array - return 0 rows gracefully

// Extract column names from the first object's keys (insertion order)
const first_obj = switch (array.items[0]) {
.object => |o| o,
else => fatal("JSON array elements must be objects", stderr_writer, .csv_error, .{}),
};
const fj = firstJsonObject(parsed.value, json_path, stderr_writer);
const first_obj = fj.first_obj orelse return 0; // Empty array - return 0 rows gracefully
const array = fj.array;

var cols: std.ArrayList([]const u8) = .empty;
defer cols.deinit(allocator);
Expand All @@ -263,18 +270,13 @@ pub fn loadJsonArray(
defer _ = c.sqlite3_finalize(stmt);

var rows_inserted: usize = 0;
// Loop invariant I: array.items[0..rows_inserted] have been inserted into t
// Bounding function: array.items.len - rows_inserted
for (array.items) |item| {
const obj = switch (item) {
.object => |o| o,
else => fatal("JSON array element is not an object", stderr_writer, .csv_error, .{}),
};
rows_inserted += 1;
if (max_rows) |limit| {
if (rows_inserted > limit)
fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
}
sqlite_helpers.checkMaxRows(rows_inserted, max_rows, stderr_writer);
insertRowFromJson(allocator, stmt, cols.items, obj) catch
fatal("{s}", stderr_writer, .sql_error, .{std.mem.span(c.sqlite3_errmsg(db))});
}
Expand Down Expand Up @@ -311,9 +313,6 @@ pub fn loadNdjsonInput(
var rows_inserted: usize = 0;
var in_transaction = false;

// Loop invariant I: all non-blank lines 1..line_num have been processed;
// rows_inserted = number of objects inserted; in_transaction is true after first object
// Bounding function: lines remaining in reader (finite input)
while (true) {
line_num += 1;
const line = readLine(allocator, reader) catch |err| switch (err) {
Expand Down Expand Up @@ -366,10 +365,7 @@ pub fn loadNdjsonInput(
}

rows_inserted += 1;
if (max_rows) |limit| {
if (rows_inserted > limit)
fatal("input exceeds --max-rows limit ({d} rows)", stderr_writer, .usage, .{limit});
}
sqlite_helpers.checkMaxRows(rows_inserted, max_rows, stderr_writer);

const cols_const: []const []const u8 = @ptrCast(cols_owned.?);
insertRowFromJson(allocator, insert_stmt.?, cols_const, obj) catch
Expand Down Expand Up @@ -425,8 +421,6 @@ pub fn printJsonRow(
) !void {
if (!is_first) try writer.writeByte(',');
try writer.writeByte('{');
// Loop invariant I: columns 0..i-1 have been written, separated by commas
// Bounding function: col_count - i
var i: c_int = 0;
while (i < col_count) : (i += 1) {
if (i > 0) try writer.writeByte(',');
Expand All @@ -445,9 +439,8 @@ pub fn printJsonRow(
}
},
else => {
const ptr = c.sqlite3_column_text(stmt, i);
if (ptr != null) {
try writeJsonString(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr))));
if (sqlite_helpers.columnText(stmt, i)) |text| {
try writeJsonString(writer, text);
} else {
try writer.writeAll("null");
}
Expand All @@ -469,8 +462,6 @@ pub fn printNdjsonRow(
writer: *std.Io.Writer,
) !void {
try writer.writeByte('{');
// Loop invariant I: columns 0..i-1 have been written, separated by commas
// Bounding function: col_count - i
var i: c_int = 0;
while (i < col_count) : (i += 1) {
if (i > 0) try writer.writeByte(',');
Expand All @@ -489,9 +480,8 @@ pub fn printNdjsonRow(
}
},
else => {
const ptr = c.sqlite3_column_text(stmt, i);
if (ptr != null) {
try writeJsonString(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr))));
if (sqlite_helpers.columnText(stmt, i)) |text| {
try writeJsonString(writer, text);
} else {
try writer.writeAll("null");
}
Expand Down
Loading
Loading