Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .changeset/add-tsv-format.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
"@googleworkspace/cli": minor
---

Add `--format tsv` output format for tab-separated values

TSV is the standard format for shell pipeline tools (`cut -f2`, `awk -F'\t'`).
Supports the same features as `--format csv`: array-of-objects, array-of-arrays,
flat scalars, and `--page-all` pagination with header suppression on continuation
pages. Tab characters and newlines inside field values are replaced with spaces.
167 changes: 153 additions & 14 deletions src/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ pub enum OutputFormat {
Yaml,
/// Comma-separated values.
Csv,
/// Tab-separated values.
Tsv,
}

impl OutputFormat {
Expand All @@ -45,6 +47,7 @@ impl OutputFormat {
"table" => Ok(Self::Table),
"yaml" | "yml" => Ok(Self::Yaml),
"csv" => Ok(Self::Csv),
"tsv" => Ok(Self::Tsv),
other => Err(other.to_string()),
}
}
Expand All @@ -64,6 +67,7 @@ pub fn format_value(value: &Value, format: &OutputFormat) -> String {
OutputFormat::Table => format_table(value),
OutputFormat::Yaml => format_yaml(value),
OutputFormat::Csv => format_csv(value),
OutputFormat::Tsv => format_tsv(value),
}
}

Expand All @@ -80,6 +84,7 @@ pub fn format_value_paginated(value: &Value, format: &OutputFormat, is_first_pag
match format {
OutputFormat::Json => serde_json::to_string(value).unwrap_or_default(),
OutputFormat::Csv => format_csv_page(value, is_first_page),
OutputFormat::Tsv => format_tsv_page(value, is_first_page),
OutputFormat::Table => format_table_page(value, is_first_page),
// Prefix every page with a YAML document separator so that the
// concatenated stream is parseable as a multi-document YAML file.
Expand Down Expand Up @@ -337,48 +342,82 @@ fn format_csv(value: &Value) -> String {
}

/// Format as CSV, optionally omitting the header row.
fn format_csv_page(value: &Value, emit_header: bool) -> String {
// Preserve existing behaviour: single scalar values are not CSV-escaped.
format_delimited_page(value, emit_header, ",", csv_escape, false)
}

fn format_tsv(value: &Value) -> String {
format_tsv_page(value, true)
}

/// Format as TSV, optionally omitting the header row.
///
/// Pass `emit_header = false` for all pages after the first when using
/// `--page-all`, so the combined output has a single header line.
fn format_csv_page(value: &Value, emit_header: bool) -> String {
fn format_tsv_page(value: &Value, emit_header: bool) -> String {
format_delimited_page(value, emit_header, "\t", tsv_escape, true)
}

/// Shared implementation for delimiter-separated output (CSV and TSV).
///
/// `escape_fn` — per-format value escaping
/// `escape_single_value` — whether to escape a bare scalar value; CSV
/// preserves the historical no-escape behaviour
/// while TSV escapes tabs/newlines for correctness.
fn format_delimited_page<F>(
value: &Value,
emit_header: bool,
separator: &str,
escape_fn: F,
escape_single_value: bool,
) -> String
where
F: Fn(&str) -> String,
{
let items = extract_items(value);

let arr = if let Some((_key, arr)) = items {
arr.as_slice()
} else if let Value::Array(arr) = value {
arr.as_slice()
} else {
// Single value — just output it
return value_to_cell(value);
let cell = value_to_cell(value);
return if escape_single_value {
escape_fn(&cell)
} else {
cell
};
};

if arr.is_empty() {
return String::new();
}

// Array of non-objects
// Array of non-objects (includes array-of-arrays, e.g. Sheets values API)
if !arr.iter().any(|v| v.is_object()) {
let mut output = String::new();
for item in arr {
if let Value::Array(inner) = item {
let cells: Vec<String> = inner
.iter()
.map(|v| csv_escape(&value_to_cell(v)))
.map(|v| escape_fn(&value_to_cell(v)))
.collect();
let _ = writeln!(output, "{}", cells.join(","));
let _ = writeln!(output, "{}", cells.join(separator));
} else {
let _ = writeln!(output, "{}", csv_escape(&value_to_cell(item)));
let _ = writeln!(output, "{}", escape_fn(&value_to_cell(item)));
}
}
return output;
}

// Collect columns
// Collect columns, preserving insertion order while deduplicating in O(1).
let mut columns: Vec<String> = Vec::new();
let mut seen_keys = std::collections::HashSet::new();
for item in arr {
if let Value::Object(obj) = item {
for key in obj.keys() {
if !columns.contains(key) {
if seen_keys.insert(key.as_str()) {
columns.push(key.clone());
}
}
Expand All @@ -387,29 +426,38 @@ fn format_csv_page(value: &Value, emit_header: bool) -> String {

let mut output = String::new();

// Header (omitted on continuation pages)
// Header row — escape column names so delimiters inside names don't break parsing.
if emit_header {
let _ = writeln!(output, "{}", columns.join(","));
let header: Vec<String> = columns.iter().map(|c| escape_fn(c)).collect();
let _ = writeln!(output, "{}", header.join(separator));
}

// Rows
// Data rows
for item in arr {
let cells: Vec<String> = columns
.iter()
.map(|col| {
if let Value::Object(obj) = item {
csv_escape(&value_to_cell(obj.get(col).unwrap_or(&Value::Null)))
escape_fn(&value_to_cell(obj.get(col).unwrap_or(&Value::Null)))
} else {
String::new()
}
})
.collect();
let _ = writeln!(output, "{}", cells.join(","));
let _ = writeln!(output, "{}", cells.join(separator));
}

output
}
Comment on lines +350 to +451
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The new format_tsv_page function is almost identical to the existing format_csv_page function. This significant code duplication makes future maintenance more difficult and error-prone. For example, the efficient column collection logic using a HashSet should be shared, and both implementations have a bug where column headers are not escaped, potentially corrupting the output.

To improve maintainability and fix this bug, I recommend refactoring the common logic into a single generic format_delimited_page function. This function would accept the delimiter and an escape function as arguments.

Here's a suggested implementation that replaces the new TSV formatting functions. You could then update format_csv_page to call format_delimited_page(value, emit_header, ",", &csv_escape, false) to complete the refactoring. The escape_single_value: false parameter is to maintain the current behavior of format_csv_page, which doesn't escape single scalar values.

fn format_delimited_page(
    value: &Value,
    emit_header: bool,
    separator: &str,
    escape_fn: &dyn Fn(&str) -> String,
    escape_single_value: bool,
) -> String {
    let items = extract_items(value);

    let arr = if let Some((_key, arr)) = items {
        arr.as_slice()
    } else if let Value::Array(arr) = value {
        arr.as_slice()
    } else {
        let cell = value_to_cell(value);
        return if escape_single_value {
            escape_fn(&cell)
        } else {
            cell
        };
    };

    if arr.is_empty() {
        return String::new();
    }

    // Array of non-objects
    if !arr.iter().any(|v| v.is_object()) {
        let mut output = String::new();
        for item in arr {
            if let Value::Array(inner) = item {
                let cells: Vec<String> = inner
                    .iter()
                    .map(|v| escape_fn(&value_to_cell(v)))
                    .collect();
                let _ = writeln!(output, "{}", cells.join(separator));
            } else {
                let _ = writeln!(output, "{}", escape_fn(&value_to_cell(item)));
            }
        }
        return output;
    }

    // Collect columns, preserving insertion order while deduplicating in O(1).
    let mut columns: Vec<String> = Vec::new();
    let mut seen_keys = std::collections::HashSet::new();
    for item in arr {
        if let Value::Object(obj) = item {
            for key in obj.keys() {
                if seen_keys.insert(key.as_str()) {
                    columns.push(key.clone());
                }
            }
        }
    }

    let mut output = String::new();

    if emit_header {
        let headers = columns
            .iter()
            .map(|c| escape_fn(c))
            .collect::<Vec<_>>()
            .join(separator);
        let _ = writeln!(output, "{}", headers);
    }

    for item in arr {
        let cells: Vec<String> = columns
            .iter()
            .map(|col| {
                if let Value::Object(obj) = item {
                    escape_fn(&value_to_cell(obj.get(col).unwrap_or(&Value::Null)))
                } else {
                    String::new()
                }
            })
            .collect();
        let _ = writeln!(output, "{}", cells.join(separator));
    }

    output
}

fn format_tsv(value: &Value) -> String {
    format_tsv_page(value, true)
}

/// Format as TSV, optionally omitting the header row.
///
/// Pass `emit_header = false` for all pages after the first when using
/// `--page-all`, so the combined output has a single header line.
fn format_tsv_page(value: &Value, emit_header: bool) -> String {
    format_delimited_page(value, emit_header, "\t", &tsv_escape, true)
}


/// Escape a value for TSV output.
/// Tabs, newlines, and carriage returns in field values are replaced with
/// spaces to preserve column structure. This matches the behaviour of most
/// TSV producers (e.g. PostgreSQL COPY, Google Sheets TSV export).
fn tsv_escape(s: &str) -> String {
s.replace(['\t', '\n', '\r'], " ")
}
Comment on lines +457 to +459
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The current implementation of tsv_escape handles carriage returns (\r) inconsistently compared to newlines (\n). It removes \r but replaces \n with a space. This means a value like "hello\rworld" becomes "helloworld", while "hello\nworld" becomes "hello world". To ensure consistent behavior across different line ending conventions (Unix, Windows, classic Mac), all whitespace characters that could break the TSV structure (\t, \n, \r) should be replaced with a space.

Suggested change
fn tsv_escape(s: &str) -> String {
s.replace(['\t', '\n'], " ").replace('\r', "")
}
fn tsv_escape(s: &str) -> String {
s.replace(['\t', '\n', '\r'], ' ')
}

Comment on lines +457 to +459
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

For performance and consistency with csv_escape, this function should avoid allocating a new String if no escaping is necessary. The current implementation calls replace unconditionally, which is inefficient for inputs that don't contain any characters that need to be replaced.

fn tsv_escape(s: &str) -> String {
    if s.contains(['\t', '\n', '\r']) {
        s.replace(['\t', '\n', '\r'], " ")
    } else {
        s.to_string()
    }
}


fn csv_escape(s: &str) -> String {
if s.contains(',') || s.contains('"') || s.contains('\n') {
format!("\"{}\"", s.replace('"', "\"\""))
Expand Down Expand Up @@ -629,6 +677,97 @@ mod tests {
assert_eq!(csv_escape("has\"quote"), "\"has\"\"quote\"");
}

#[test]
fn test_output_format_parse_tsv() {
assert_eq!(OutputFormat::parse("tsv"), Ok(OutputFormat::Tsv));
assert_eq!(OutputFormat::from_str("tsv"), OutputFormat::Tsv);
}

#[test]
fn test_format_tsv_array_of_objects() {
let val = json!({
"files": [
{"id": "1", "name": "hello"},
{"id": "2", "name": "world"}
]
});
let output = format_value(&val, &OutputFormat::Tsv);
let lines: Vec<&str> = output.lines().collect();
assert_eq!(lines[0], "id\tname");
assert_eq!(lines[1], "1\thello");
assert_eq!(lines[2], "2\tworld");
}

#[test]
fn test_format_tsv_array_of_arrays() {
let val = json!({
"values": [
["Student Name", "Gender", "Class Level"],
["Alexandra", "Female", "4. Senior"],
["Andrew", "Male", "1. Freshman"]
]
});
let output = format_value(&val, &OutputFormat::Tsv);
let lines: Vec<&str> = output.lines().collect();
assert_eq!(lines[0], "Student Name\tGender\tClass Level");
assert_eq!(lines[1], "Alexandra\tFemale\t4. Senior");
assert_eq!(lines[2], "Andrew\tMale\t1. Freshman");
}

#[test]
fn test_format_tsv_flat_scalars() {
let val = json!(["apple", "banana", "cherry"]);
let output = format_value(&val, &OutputFormat::Tsv);
let lines: Vec<&str> = output.lines().collect();
assert_eq!(lines.len(), 3);
assert_eq!(lines[0], "apple");
}

#[test]
fn test_format_tsv_tab_in_value_replaced_with_space() {
// A tab inside a field value must be replaced with a space so it
// doesn't corrupt the column structure of the TSV output.
let val = json!([{"name": "hello\tworld"}]);
let output = format_value(&val, &OutputFormat::Tsv);
let data_line = output.lines().nth(1).unwrap_or("");
assert_eq!(data_line, "hello world", "tab inside value must become a space: {output}");
}

#[test]
fn test_format_tsv_escape() {
assert_eq!(tsv_escape("simple"), "simple");
assert_eq!(tsv_escape("has\ttab"), "has tab");
assert_eq!(tsv_escape("has\nnewline"), "has newline");
assert_eq!(tsv_escape("has\rreturn"), "has return");
assert_eq!(tsv_escape("has\r\nwindows"), "has windows");
}
Comment on lines +736 to +743
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Following the suggested change to tsv_escape to handle \r consistently, this test should be updated. The assertion for Windows-style newlines (\r\n) will now expect two spaces, as both \r and \n are replaced by a space. Additionally, a new test case for a standalone carriage return (\r) should be added to verify the corrected behavior.

Suggested change
#[test]
fn test_format_tsv_escape() {
assert_eq!(tsv_escape("simple"), "simple");
assert_eq!(tsv_escape("has\ttab"), "has tab");
assert_eq!(tsv_escape("has\nnewline"), "has newline");
assert_eq!(tsv_escape("has\r\nwindows"), "has windows");
}
#[test]
fn test_format_tsv_escape() {
assert_eq!(tsv_escape("simple"), "simple");
assert_eq!(tsv_escape("has\ttab"), "has tab");
assert_eq!(tsv_escape("has\nnewline"), "has newline");
assert_eq!(tsv_escape("has\rreturn"), "has return");
assert_eq!(tsv_escape("has\r\nwindows"), "has windows");
}


#[test]
fn test_format_value_paginated_tsv_first_page_has_header() {
let val = json!({
"files": [
{"id": "1", "name": "a.txt"},
]
});
let output = format_value_paginated(&val, &OutputFormat::Tsv, true);
let lines: Vec<&str> = output.lines().collect();
assert_eq!(lines[0], "id\tname");
assert_eq!(lines[1], "1\ta.txt");
}

#[test]
fn test_format_value_paginated_tsv_continuation_no_header() {
let val = json!({
"files": [
{"id": "2", "name": "b.txt"}
]
});
let output = format_value_paginated(&val, &OutputFormat::Tsv, false);
let lines: Vec<&str> = output.lines().collect();
assert_eq!(lines[0], "2\tb.txt");
assert!(!output.contains("id\tname"));
}

#[test]
fn test_format_yaml() {
let val = json!({"name": "test", "count": 42});
Expand Down
Loading