From 41df06e68ff3f3f590cea99be14a09b516603ada Mon Sep 17 00:00:00 2001 From: abhiram304 Date: Thu, 12 Mar 2026 00:16:49 -0700 Subject: [PATCH 1/5] feat(formatter): add --format tsv output format Add `Tsv` as a new `OutputFormat` variant alongside the existing JSON, Table, YAML, and CSV formats. TSV (tab-separated values) is the standard format for shell pipeline tools such as `cut -f2` and `awk -F'\t'`, making it a natural companion to the existing CSV format for scripting use cases. Behaviour mirrors `--format csv`: - Array-of-objects: header row + data rows separated by tabs - Array-of-arrays (e.g. Sheets values API): rows separated by tabs - Flat scalars: one value per line - `--page-all` pagination: header emitted only on the first page Tab and newline characters inside field values are replaced with spaces to preserve column structure, matching the behaviour of Google Sheets TSV export and PostgreSQL COPY. --- .changeset/add-tsv-format.md | 10 ++ src/formatter.rs | 178 +++++++++++++++++++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 .changeset/add-tsv-format.md diff --git a/.changeset/add-tsv-format.md b/.changeset/add-tsv-format.md new file mode 100644 index 00000000..1ffd8ca1 --- /dev/null +++ b/.changeset/add-tsv-format.md @@ -0,0 +1,10 @@ +--- +"@googleworkspace/cli": minor +--- + +Add `--format tsv` output format for tab-separated values + +TSV is the standard format for shell pipeline tools (`cut -f2`, `awk -F'\t'`). +Supports the same features as `--format csv`: array-of-objects, array-of-arrays, +flat scalars, and `--page-all` pagination with header suppression on continuation +pages. Tab characters and newlines inside field values are replaced with spaces. diff --git a/src/formatter.rs b/src/formatter.rs index 08d4d287..f2963e64 100644 --- a/src/formatter.rs +++ b/src/formatter.rs @@ -31,6 +31,8 @@ pub enum OutputFormat { Yaml, /// Comma-separated values. Csv, + /// Tab-separated values. + Tsv, } impl OutputFormat { @@ -45,6 +47,7 @@ impl OutputFormat { "table" => Ok(Self::Table), "yaml" | "yml" => Ok(Self::Yaml), "csv" => Ok(Self::Csv), + "tsv" => Ok(Self::Tsv), other => Err(other.to_string()), } } @@ -64,6 +67,7 @@ pub fn format_value(value: &Value, format: &OutputFormat) -> String { OutputFormat::Table => format_table(value), OutputFormat::Yaml => format_yaml(value), OutputFormat::Csv => format_csv(value), + OutputFormat::Tsv => format_tsv(value), } } @@ -80,6 +84,7 @@ pub fn format_value_paginated(value: &Value, format: &OutputFormat, is_first_pag match format { OutputFormat::Json => serde_json::to_string(value).unwrap_or_default(), OutputFormat::Csv => format_csv_page(value, is_first_page), + OutputFormat::Tsv => format_tsv_page(value, is_first_page), OutputFormat::Table => format_table_page(value, is_first_page), // Prefix every page with a YAML document separator so that the // concatenated stream is parseable as a multi-document YAML file. @@ -410,6 +415,89 @@ fn format_csv_page(value: &Value, emit_header: bool) -> String { output } +fn format_tsv(value: &Value) -> String { + format_tsv_page(value, true) +} + +/// Format as TSV, optionally omitting the header row. +/// +/// Pass `emit_header = false` for all pages after the first when using +/// `--page-all`, so the combined output has a single header line. +fn format_tsv_page(value: &Value, emit_header: bool) -> String { + let items = extract_items(value); + + let arr = if let Some((_key, arr)) = items { + arr.as_slice() + } else if let Value::Array(arr) = value { + arr.as_slice() + } else { + return tsv_escape(&value_to_cell(value)); + }; + + if arr.is_empty() { + return String::new(); + } + + // Array of non-objects + if !arr.iter().any(|v| v.is_object()) { + let mut output = String::new(); + for item in arr { + if let Value::Array(inner) = item { + let cells: Vec = inner + .iter() + .map(|v| tsv_escape(&value_to_cell(v))) + .collect(); + let _ = writeln!(output, "{}", cells.join("\t")); + } else { + let _ = writeln!(output, "{}", tsv_escape(&value_to_cell(item))); + } + } + return output; + } + + // Collect columns + let mut columns: Vec = Vec::new(); + for item in arr { + if let Value::Object(obj) = item { + for key in obj.keys() { + if !columns.contains(key) { + columns.push(key.clone()); + } + } + } + } + + let mut output = String::new(); + + if emit_header { + let _ = writeln!(output, "{}", columns.join("\t")); + } + + for item in arr { + let cells: Vec = columns + .iter() + .map(|col| { + if let Value::Object(obj) = item { + tsv_escape(&value_to_cell(obj.get(col).unwrap_or(&Value::Null))) + } else { + String::new() + } + }) + .collect(); + let _ = writeln!(output, "{}", cells.join("\t")); + } + + output +} + +/// Escape a value for TSV output. +/// Tabs and newlines in field values are replaced with spaces to preserve +/// the column structure. This matches the behaviour of most TSV producers +/// (e.g. PostgreSQL COPY, Google Sheets TSV export). +fn tsv_escape(s: &str) -> String { + s.replace(['\t', '\n'], " ").replace('\r', "") +} + fn csv_escape(s: &str) -> String { if s.contains(',') || s.contains('"') || s.contains('\n') { format!("\"{}\"", s.replace('"', "\"\"")) @@ -629,6 +717,96 @@ mod tests { assert_eq!(csv_escape("has\"quote"), "\"has\"\"quote\""); } + #[test] + fn test_output_format_parse_tsv() { + assert_eq!(OutputFormat::parse("tsv"), Ok(OutputFormat::Tsv)); + assert_eq!(OutputFormat::from_str("tsv"), OutputFormat::Tsv); + } + + #[test] + fn test_format_tsv_array_of_objects() { + let val = json!({ + "files": [ + {"id": "1", "name": "hello"}, + {"id": "2", "name": "world"} + ] + }); + let output = format_value(&val, &OutputFormat::Tsv); + let lines: Vec<&str> = output.lines().collect(); + assert_eq!(lines[0], "id\tname"); + assert_eq!(lines[1], "1\thello"); + assert_eq!(lines[2], "2\tworld"); + } + + #[test] + fn test_format_tsv_array_of_arrays() { + let val = json!({ + "values": [ + ["Student Name", "Gender", "Class Level"], + ["Alexandra", "Female", "4. Senior"], + ["Andrew", "Male", "1. Freshman"] + ] + }); + let output = format_value(&val, &OutputFormat::Tsv); + let lines: Vec<&str> = output.lines().collect(); + assert_eq!(lines[0], "Student Name\tGender\tClass Level"); + assert_eq!(lines[1], "Alexandra\tFemale\t4. Senior"); + assert_eq!(lines[2], "Andrew\tMale\t1. Freshman"); + } + + #[test] + fn test_format_tsv_flat_scalars() { + let val = json!(["apple", "banana", "cherry"]); + let output = format_value(&val, &OutputFormat::Tsv); + let lines: Vec<&str> = output.lines().collect(); + assert_eq!(lines.len(), 3); + assert_eq!(lines[0], "apple"); + } + + #[test] + fn test_format_tsv_tab_in_value_replaced_with_space() { + // A tab inside a field value must be replaced with a space so it + // doesn't corrupt the column structure of the TSV output. + let val = json!([{"name": "hello\tworld"}]); + let output = format_value(&val, &OutputFormat::Tsv); + let data_line = output.lines().nth(1).unwrap_or(""); + assert_eq!(data_line, "hello world", "tab inside value must become a space: {output}"); + } + + #[test] + fn test_format_tsv_escape() { + assert_eq!(tsv_escape("simple"), "simple"); + assert_eq!(tsv_escape("has\ttab"), "has tab"); + assert_eq!(tsv_escape("has\nnewline"), "has newline"); + assert_eq!(tsv_escape("has\r\nwindows"), "has windows"); + } + + #[test] + fn test_format_value_paginated_tsv_first_page_has_header() { + let val = json!({ + "files": [ + {"id": "1", "name": "a.txt"}, + ] + }); + let output = format_value_paginated(&val, &OutputFormat::Tsv, true); + let lines: Vec<&str> = output.lines().collect(); + assert_eq!(lines[0], "id\tname"); + assert_eq!(lines[1], "1\ta.txt"); + } + + #[test] + fn test_format_value_paginated_tsv_continuation_no_header() { + let val = json!({ + "files": [ + {"id": "2", "name": "b.txt"} + ] + }); + let output = format_value_paginated(&val, &OutputFormat::Tsv, false); + let lines: Vec<&str> = output.lines().collect(); + assert_eq!(lines[0], "2\tb.txt"); + assert!(!output.contains("id\tname")); + } + #[test] fn test_format_yaml() { let val = json!({"name": "test", "count": 42}); From a5233ab6a47f4c04c3f4828b284d5e59f79165da Mon Sep 17 00:00:00 2001 From: abhiram304 Date: Thu, 12 Mar 2026 00:30:55 -0700 Subject: [PATCH 2/5] perf(formatter): use HashSet for O(1) column deduplication in format_tsv_page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Gemini code review feedback: the previous loop used Vec::contains for deduplication which is O(N) per key, leading to O(N²) complexity when collecting columns from large datasets. Replace with a HashSet<&str> to track seen keys in O(1) average time while still preserving insertion order in the Vec. --- src/formatter.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/formatter.rs b/src/formatter.rs index f2963e64..776bb4e2 100644 --- a/src/formatter.rs +++ b/src/formatter.rs @@ -455,12 +455,13 @@ fn format_tsv_page(value: &Value, emit_header: bool) -> String { return output; } - // Collect columns + // Collect columns, preserving insertion order while deduplicating in O(1). let mut columns: Vec = Vec::new(); + let mut seen_keys = std::collections::HashSet::new(); for item in arr { if let Value::Object(obj) = item { for key in obj.keys() { - if !columns.contains(key) { + if seen_keys.insert(key.as_str()) { columns.push(key.clone()); } } From 11e1e764a220919e19a417563bcf9cd1c910f6a7 Mon Sep 17 00:00:00 2001 From: abhiram304 Date: Thu, 12 Mar 2026 00:38:57 -0700 Subject: [PATCH 3/5] refactor(formatter): extract shared format_delimited_page for CSV and TSV Replace duplicate CSV/TSV page-formatting logic with a single format_delimited_page function. Both format_csv_page and format_tsv_page now delegate to this shared implementation, which uses a HashSet for O(1) column deduplication while preserving insertion order. Addresses code-review feedback requesting elimination of duplication. --- src/formatter.rs | 112 ++++++++++++++--------------------------------- 1 file changed, 34 insertions(+), 78 deletions(-) diff --git a/src/formatter.rs b/src/formatter.rs index 776bb4e2..0bc6fcaf 100644 --- a/src/formatter.rs +++ b/src/formatter.rs @@ -342,77 +342,9 @@ fn format_csv(value: &Value) -> String { } /// Format as CSV, optionally omitting the header row. -/// -/// Pass `emit_header = false` for all pages after the first when using -/// `--page-all`, so the combined output has a single header line. fn format_csv_page(value: &Value, emit_header: bool) -> String { - let items = extract_items(value); - - let arr = if let Some((_key, arr)) = items { - arr.as_slice() - } else if let Value::Array(arr) = value { - arr.as_slice() - } else { - // Single value — just output it - return value_to_cell(value); - }; - - if arr.is_empty() { - return String::new(); - } - - // Array of non-objects - if !arr.iter().any(|v| v.is_object()) { - let mut output = String::new(); - for item in arr { - if let Value::Array(inner) = item { - let cells: Vec = inner - .iter() - .map(|v| csv_escape(&value_to_cell(v))) - .collect(); - let _ = writeln!(output, "{}", cells.join(",")); - } else { - let _ = writeln!(output, "{}", csv_escape(&value_to_cell(item))); - } - } - return output; - } - - // Collect columns - let mut columns: Vec = Vec::new(); - for item in arr { - if let Value::Object(obj) = item { - for key in obj.keys() { - if !columns.contains(key) { - columns.push(key.clone()); - } - } - } - } - - let mut output = String::new(); - - // Header (omitted on continuation pages) - if emit_header { - let _ = writeln!(output, "{}", columns.join(",")); - } - - // Rows - for item in arr { - let cells: Vec = columns - .iter() - .map(|col| { - if let Value::Object(obj) = item { - csv_escape(&value_to_cell(obj.get(col).unwrap_or(&Value::Null))) - } else { - String::new() - } - }) - .collect(); - let _ = writeln!(output, "{}", cells.join(",")); - } - - output + // Preserve existing behaviour: single scalar values are not CSV-escaped. + format_delimited_page(value, emit_header, ",", &csv_escape, false) } fn format_tsv(value: &Value) -> String { @@ -424,6 +356,22 @@ fn format_tsv(value: &Value) -> String { /// Pass `emit_header = false` for all pages after the first when using /// `--page-all`, so the combined output has a single header line. fn format_tsv_page(value: &Value, emit_header: bool) -> String { + format_delimited_page(value, emit_header, "\t", &tsv_escape, true) +} + +/// Shared implementation for delimiter-separated output (CSV and TSV). +/// +/// `escape_fn` — per-format value escaping +/// `escape_single_value` — whether to escape a bare scalar value; CSV +/// preserves the historical no-escape behaviour +/// while TSV escapes tabs/newlines for correctness. +fn format_delimited_page( + value: &Value, + emit_header: bool, + separator: &str, + escape_fn: &dyn Fn(&str) -> String, + escape_single_value: bool, +) -> String { let items = extract_items(value); let arr = if let Some((_key, arr)) = items { @@ -431,25 +379,30 @@ fn format_tsv_page(value: &Value, emit_header: bool) -> String { } else if let Value::Array(arr) = value { arr.as_slice() } else { - return tsv_escape(&value_to_cell(value)); + let cell = value_to_cell(value); + return if escape_single_value { + escape_fn(&cell) + } else { + cell + }; }; if arr.is_empty() { return String::new(); } - // Array of non-objects + // Array of non-objects (includes array-of-arrays, e.g. Sheets values API) if !arr.iter().any(|v| v.is_object()) { let mut output = String::new(); for item in arr { if let Value::Array(inner) = item { let cells: Vec = inner .iter() - .map(|v| tsv_escape(&value_to_cell(v))) + .map(|v| escape_fn(&value_to_cell(v))) .collect(); - let _ = writeln!(output, "{}", cells.join("\t")); + let _ = writeln!(output, "{}", cells.join(separator)); } else { - let _ = writeln!(output, "{}", tsv_escape(&value_to_cell(item))); + let _ = writeln!(output, "{}", escape_fn(&value_to_cell(item))); } } return output; @@ -470,22 +423,25 @@ fn format_tsv_page(value: &Value, emit_header: bool) -> String { let mut output = String::new(); + // Header row — escape column names so delimiters inside names don't break parsing. if emit_header { - let _ = writeln!(output, "{}", columns.join("\t")); + let header: Vec = columns.iter().map(|c| escape_fn(c)).collect(); + let _ = writeln!(output, "{}", header.join(separator)); } + // Data rows for item in arr { let cells: Vec = columns .iter() .map(|col| { if let Value::Object(obj) = item { - tsv_escape(&value_to_cell(obj.get(col).unwrap_or(&Value::Null))) + escape_fn(&value_to_cell(obj.get(col).unwrap_or(&Value::Null))) } else { String::new() } }) .collect(); - let _ = writeln!(output, "{}", cells.join("\t")); + let _ = writeln!(output, "{}", cells.join(separator)); } output From 29ccee4c2c9b45cdf19bc0c28df4af0258bf7d47 Mon Sep 17 00:00:00 2001 From: abhiram304 Date: Thu, 12 Mar 2026 08:38:26 -0700 Subject: [PATCH 4/5] fix(formatter): replace \r with space in tsv_escape for consistent behaviour MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously \r was silently deleted while \n and \t were replaced with a space. This causes "hello\rworld" → "helloworld" but "hello\nworld" → "hello world", which is inconsistent. Now all three ASCII control characters that break TSV structure (\t, \n, \r) are uniformly replaced with a single space, matching the behaviour of PostgreSQL COPY and Google Sheets TSV export. Update test to cover standalone \r and the \r\n (Windows) case. Addresses code-review feedback. --- src/formatter.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/formatter.rs b/src/formatter.rs index 0bc6fcaf..5cd993e2 100644 --- a/src/formatter.rs +++ b/src/formatter.rs @@ -448,11 +448,11 @@ fn format_delimited_page( } /// Escape a value for TSV output. -/// Tabs and newlines in field values are replaced with spaces to preserve -/// the column structure. This matches the behaviour of most TSV producers -/// (e.g. PostgreSQL COPY, Google Sheets TSV export). +/// Tabs, newlines, and carriage returns in field values are replaced with +/// spaces to preserve column structure. This matches the behaviour of most +/// TSV producers (e.g. PostgreSQL COPY, Google Sheets TSV export). fn tsv_escape(s: &str) -> String { - s.replace(['\t', '\n'], " ").replace('\r', "") + s.replace(['\t', '\n', '\r'], " ") } fn csv_escape(s: &str) -> String { @@ -735,7 +735,8 @@ mod tests { assert_eq!(tsv_escape("simple"), "simple"); assert_eq!(tsv_escape("has\ttab"), "has tab"); assert_eq!(tsv_escape("has\nnewline"), "has newline"); - assert_eq!(tsv_escape("has\r\nwindows"), "has windows"); + assert_eq!(tsv_escape("has\rreturn"), "has return"); + assert_eq!(tsv_escape("has\r\nwindows"), "has windows"); } #[test] From 48c2256181c72d199321dca46490e4ba9d99af51 Mon Sep 17 00:00:00 2001 From: abhiram304 Date: Thu, 12 Mar 2026 21:21:38 -0700 Subject: [PATCH 5/5] perf(formatter): use static dispatch for format_delimited_page escape_fn Change escape_fn parameter from &dyn Fn(&str) -> String (dynamic dispatch) to a generic F: Fn(&str) -> String (static dispatch). The compiler can now monomorphize and potentially inline csv_escape/tsv_escape at each call site, eliminating virtual dispatch overhead per cell. Update call sites to pass function items directly (no & required). Addresses code-review feedback. --- src/formatter.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/formatter.rs b/src/formatter.rs index 5cd993e2..694a6b04 100644 --- a/src/formatter.rs +++ b/src/formatter.rs @@ -344,7 +344,7 @@ fn format_csv(value: &Value) -> String { /// Format as CSV, optionally omitting the header row. fn format_csv_page(value: &Value, emit_header: bool) -> String { // Preserve existing behaviour: single scalar values are not CSV-escaped. - format_delimited_page(value, emit_header, ",", &csv_escape, false) + format_delimited_page(value, emit_header, ",", csv_escape, false) } fn format_tsv(value: &Value) -> String { @@ -356,7 +356,7 @@ fn format_tsv(value: &Value) -> String { /// Pass `emit_header = false` for all pages after the first when using /// `--page-all`, so the combined output has a single header line. fn format_tsv_page(value: &Value, emit_header: bool) -> String { - format_delimited_page(value, emit_header, "\t", &tsv_escape, true) + format_delimited_page(value, emit_header, "\t", tsv_escape, true) } /// Shared implementation for delimiter-separated output (CSV and TSV). @@ -365,13 +365,16 @@ fn format_tsv_page(value: &Value, emit_header: bool) -> String { /// `escape_single_value` — whether to escape a bare scalar value; CSV /// preserves the historical no-escape behaviour /// while TSV escapes tabs/newlines for correctness. -fn format_delimited_page( +fn format_delimited_page( value: &Value, emit_header: bool, separator: &str, - escape_fn: &dyn Fn(&str) -> String, + escape_fn: F, escape_single_value: bool, -) -> String { +) -> String +where + F: Fn(&str) -> String, +{ let items = extract_items(value); let arr = if let Some((_key, arr)) = items {