From 57316c7f1757435dd8aef3155815806545ffe28e Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Tue, 12 May 2026 09:25:45 +0000 Subject: [PATCH] fix(coreutils-port): accept localized-Command let-binding in uu_app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yesterday's `Coreutils Args Drift` workflow on main went red at the `regenerating truncate` step: error: unsafe uu_app body: expected a single clap Command builder expression, found 2 statements Upstream `truncate` (rev 94b06d97b) now routes the Command through `uucore::clap_localization::configure_localized_command(cmd)`, so the body has the shape pub fn uu_app() -> Command { let cmd = Command::new("truncate")…; uucore::clap_localization::configure_localized_command(cmd) .arg(…)… } The rewriter elides the localization helper (it pulls in Fluent; bashkit doesn't need it), leaving the body as a 2-stmt `let cmd = …; cmd.arg(…).arg(…)` form. PR #1617 had tightened the validator to a single expression, which is what this case trips. Extend `validate_uu_app_body` to also accept the 2-stmt shape [Stmt::Local(local), Stmt::Expr(tail, None)] with strict guards so the safety story does not weaken: - The `let` pattern must be a plain `Pat::Ident` — no `mut`, no `ref`, no subpattern, no type ascription, no `let … else …`. - The initializer must pass the same per-expression validator (no closures, loops, match, blocks, unsafe, async; only allowlisted macros; only allowlisted method names) AND chain-root at `Command::new(...)`, identical to the single-expression path. - The tail must pass the per-expression validator AND its receiver chain must bottom out at exactly the binder identifier — not at another Command::new, not at a function call, not at a different variable. This proves the tail is just continuing the same builder chain. Anything richer (tuple patterns, refutable matches, type ascription, mutability, an extra prefix statement) still fails, keeping the TM-INF-025 boundary closed. - [x] 5 new tests in `args::tests` cover both directions: positive (`accepts_localized_command_let_binding` mirroring truncate's exact shape, asserting the wrapper is elided in the emitted body and the tail starts at `cmd.arg(…)`); negative (`rejects_let_mut_binding…`, `rejects_three_statement_uu_app`, `rejects_tail_rooted_at_other_ident`, `rejects_let_init_not_rooted_at_command_new`). - [x] `cargo test -p bashkit-coreutils-port` green (32 + 5 → 37 tests). - [x] `cargo build -p bashkit` green (existing generated truncate_args.rs already uses this shape; this PR only re-enables the codegen path that produced it). - [x] `cargo clippy -p bashkit-coreutils-port --all-targets -- -D warnings` clean. - [x] `cargo fmt --check` clean. --- crates/bashkit-coreutils-port/src/args.rs | 99 ++++++++++++++++++----- 1 file changed, 78 insertions(+), 21 deletions(-) diff --git a/crates/bashkit-coreutils-port/src/args.rs b/crates/bashkit-coreutils-port/src/args.rs index ec155eb7..45680d02 100644 --- a/crates/bashkit-coreutils-port/src/args.rs +++ b/crates/bashkit-coreutils-port/src/args.rs @@ -652,7 +652,9 @@ fn collect_option_constants(file: &syn::File) -> Vec { // .(...)...` // — equivalent to a single chain split across a binding. The // tail's innermost receiver must be the let-bound identifier; -// both halves run through the same chain validator. +// both halves run through the same chain validator. The binding is +// deliberately plain: no `mut`, `ref`, type ascription, subpattern, +// non-doc attributes, or `let ... else`. fn validate_uu_app_body(uu_app: &ItemFn) -> Result<()> { match uu_app.block.stmts.as_slice() { [Stmt::Expr(expr, None)] => validate_command_chain_expr(expr), @@ -683,12 +685,7 @@ fn validate_command_chain_expr(expr: &Expr) -> Result<()> { } fn validate_let_bound_command_body(local: &syn::Local, tail_expr: &Expr) -> Result<()> { - let binding_ident = simple_let_ident(local).ok_or_else(|| { - anyhow!( - "unsafe uu_app body: let binding must be `let = ...` \ - (no destructuring, no let-else)" - ) - })?; + let binding_ident = plain_let_ident(local)?; let init = local .init .as_ref() @@ -718,21 +715,33 @@ fn validate_let_bound_command_body(local: &syn::Local, tail_expr: &Expr) -> Resu Ok(()) } -/// Returns the bound identifier of a plain `let [: T] = ...;`. -/// Rejects destructuring, `ref`, subpatterns, or attributes — anything -/// that could hide an active expression inside the binding pattern. -fn simple_let_ident(local: &syn::Local) -> Option<&syn::Ident> { - let pat = match &local.pat { - syn::Pat::Type(pt) => &*pt.pat, - other => other, - }; - match pat { - syn::Pat::Ident(pi) - if pi.attrs.is_empty() && pi.by_ref.is_none() && pi.subpat.is_none() => - { - Some(&pi.ident) +/// Returns the bound identifier of a plain `let = ...;`. +/// Rejects destructuring, `mut`, `ref`, type ascription, subpatterns, +/// or non-doc attributes — anything that could hide behavior in the +/// binding pattern or weaken the no-mutation proof. +fn plain_let_ident(local: &syn::Local) -> Result<&syn::Ident> { + if local.attrs.iter().any(|attr| !attr.path().is_ident("doc")) { + bail!("unsafe uu_app body: let binding must not carry non-doc attributes"); + } + match &local.pat { + syn::Pat::Ident(pi) => { + if !pi.attrs.is_empty() + || pi.by_ref.is_some() + || pi.mutability.is_some() + || pi.subpat.is_some() + { + bail!( + "unsafe uu_app body: let binding must be plain `let = ...` \ + (no `mut`, no `ref`, no subpattern)" + ); + } + Ok(&pi.ident) } - _ => None, + syn::Pat::Type(_) => bail!("unsafe uu_app body: let binding must not use type ascription"), + _ => bail!( + "unsafe uu_app body: let binding must bind a single identifier, \ + not a destructuring pattern" + ), } } @@ -1441,6 +1450,30 @@ pub fn uu_app() -> clap::Command { assert!(body.contains("cmd.arg("), "got: {body}"); } + #[test] + fn rejects_let_mut_binding_in_uu_app() { + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let mut cmd = Command::new("cat"); + cmd.arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let err = run(&uutils, "cat", "poc").unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("no `mut`"), "got: {msg}"); + } + #[test] fn rejects_let_with_non_command_initializer() { let (_tmp, uutils) = fixture(&[ @@ -1465,6 +1498,30 @@ pub fn uu_app() -> clap::Command { assert!(msg.contains("unsafe uu_app"), "got: {msg}"); } + #[test] + fn rejects_let_init_not_rooted_at_command_new() { + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let cmd = some::factory(); + cmd.arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let err = run(&uutils, "cat", "poc").unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("Command::new"), "got: {msg}"); + } + #[test] fn rejects_tail_not_chained_off_let_binding() { let (_tmp, uutils) = fixture(&[