From 53b5de0727cdbab19b424f523cd3a3f9342b8b49 Mon Sep 17 00:00:00 2001 From: robert Date: Tue, 19 May 2026 19:18:00 +0000 Subject: [PATCH 1/3] feat(algorithms): add counting sort --- Cslib.lean | 2 + .../Lean/CountingSort/CountingSort.lean | 225 ++++++++++++++++++ Cslib/Algorithms/Lean/Sorting.lean | 28 +++ 3 files changed, 255 insertions(+) create mode 100644 Cslib/Algorithms/Lean/CountingSort/CountingSort.lean create mode 100644 Cslib/Algorithms/Lean/Sorting.lean diff --git a/Cslib.lean b/Cslib.lean index 1b7b4c4b1..08c34c9e5 100644 --- a/Cslib.lean +++ b/Cslib.lean @@ -1,6 +1,8 @@ module -- shake: keep-all +public import Cslib.Algorithms.Lean.CountingSort.CountingSort public import Cslib.Algorithms.Lean.MergeSort.MergeSort +public import Cslib.Algorithms.Lean.Sorting public import Cslib.Algorithms.Lean.TimeM public import Cslib.Computability.Automata.Acceptors.Acceptor public import Cslib.Computability.Automata.Acceptors.OmegaAcceptor diff --git a/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean b/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean new file mode 100644 index 000000000..69fc38c27 --- /dev/null +++ b/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean @@ -0,0 +1,225 @@ +/- +Copyright (c) 2026 Robert Joseph George. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Robert Joseph George +-/ + +module + +public import Cslib.Algorithms.Lean.Sorting +public import Cslib.Algorithms.Lean.TimeM +public import Mathlib.Data.Nat.Basic +public import Mathlib.Order.RelClasses +public import Mathlib.Tactic.Attr.Core +public import Mathlib.Tactic.Finiteness.Attr +public import Batteries.Data.List.Lemmas + +/-! +# Counting sort on natural-number lists + +`countingSort` sorts a `List ℕ` using an explicit upper bound `bound`. It scans the input once for +each key in `0, ..., bound` and returns a `TimeM ℕ (List ℕ)`. + +The cost model counts one key comparison while scanning an input element. Constructing output +blocks, appending lists, arithmetic, and pattern matching are free. + +Correctness is proved through counts: each generated block contributes exactly the copies for one +key, and bounded inputs ensure no out-of-range key is missing from the output. +-/ + +@[expose] public section + +set_option autoImplicit false + +namespace Cslib.Algorithms.Lean.TimeM + +/-- `xs` is bounded by `bound` when every entry is at most `bound`. -/ +abbrev BoundedBy (bound : ℕ) (xs : List ℕ) : Prop := ∀ x ∈ xs, x ≤ bound + +/-- Counts occurrences of one key, charging one tick for each inspected input element. -/ +def countKey (key : ℕ) : List ℕ → TimeM ℕ ℕ + | [] => return 0 + | x :: xs => do + ✓ let hit := x == key + let rest ← countKey key xs + return rest + if hit then 1 else 0 + +/-- Builds the sorted blocks for keys `start, ..., start + fuel - 1`. -/ +def buildCountedFrom (start fuel : ℕ) (xs : List ℕ) : TimeM ℕ (List ℕ) := + match fuel with + | 0 => return [] + | fuel' + 1 => do + let count ← countKey start xs + let rest ← buildCountedFrom (start + 1) fuel' xs + return List.replicate count start ++ rest + +/-- Sorts natural-number lists by counting all keys from `0` through `bound`. -/ +def countingSort (bound : ℕ) (xs : List ℕ) : TimeM ℕ (List ℕ) := + buildCountedFrom 0 (bound + 1) xs + +section Correctness + +/-- Timed key counting computes `List.count`. -/ +@[simp, grind =] +private theorem ret_countKey (key : ℕ) (xs : List ℕ) : ⟪countKey key xs⟫ = xs.count key := by + induction xs with + | nil => simp [countKey] + | cons x xs ih => + by_cases h : x = key <;> simp [countKey, ih, h] + +/-- Counting one key performs one comparison per input element. -/ +@[simp, grind =] +private theorem countKey_time (key : ℕ) (xs : List ℕ) : (countKey key xs).time = xs.length := by + induction xs with + | nil => simp [countKey] + | cons x xs ih => simp [countKey, ih, Nat.add_comm] + +@[simp] +private theorem count_replicate_of_ne {key value count : ℕ} (h : value ≠ key) : + (List.replicate count value).count key = 0 := by + simp [List.count_replicate, h] + +/-- The generated blocks contain no key below the requested start. -/ +private theorem buildCountedFrom_start_le_of_mem {value start fuel : ℕ} {xs : List ℕ} : + value ∈ ⟪buildCountedFrom start fuel xs⟫ → start ≤ value := by + induction fuel generalizing start value with + | zero => simp [buildCountedFrom] + | succ fuel ih => + simp only [buildCountedFrom, ret_bind, ret_pure, ret_countKey, List.mem_append, + List.mem_replicate] + intro h + rcases h with h | h + · exact le_of_eq h.2.symm + · exact le_trans (Nat.le_succ start) (ih h) + +/-- +The generated blocks contain the right number of copies for each key. This is the main counting +invariant: keys outside `start, ..., start + fuel - 1` occur zero times. +-/ +@[grind =] +private theorem buildCountedFrom_count (key start fuel : ℕ) (xs : List ℕ) : + (⟪buildCountedFrom start fuel xs⟫).count key = + if start ≤ key ∧ key < start + fuel then xs.count key else 0 := by + induction fuel generalizing start with + | zero => simp [buildCountedFrom] + | succ fuel ih => + by_cases hstart : start = key + · subst key + simp [buildCountedFrom, ih] + · simp only [buildCountedFrom, ret_bind, ret_pure, ret_countKey, List.count_append] + simp only [ne_eq, hstart, not_false_eq_true, count_replicate_of_ne, ih, Nat.zero_add] + by_cases htotal : start ≤ key ∧ key < start + (fuel + 1) + · have htail : start + 1 ≤ key ∧ key < start + 1 + fuel := by omega + simp [htotal, htail] + · have htail : ¬(start + 1 ≤ key ∧ key < start + 1 + fuel) := by omega + rw [if_neg htail, if_neg htotal] + +/-- In-range keys are counted exactly as in the input. -/ +theorem countingSort_count_of_le_bound {bound key : ℕ} (xs : List ℕ) (hkey : key ≤ bound) : + (⟪countingSort bound xs⟫).count key = xs.count key := by + rw [countingSort, buildCountedFrom_count] + split_ifs with h + · rfl + · omega + +/-- Out-of-range keys never appear in the output. -/ +theorem countingSort_count_of_bound_lt {bound key : ℕ} (xs : List ℕ) (hkey : bound < key) : + (⟪countingSort bound xs⟫).count key = 0 := by + rw [countingSort, buildCountedFrom_count] + split_ifs with h + · omega + · rfl + +/-- If the input is bounded, every out-of-range key has count zero in the input too. -/ +private theorem bounded_count_eq_zero_of_bound_lt {bound key : ℕ} {xs : List ℕ} + (hxs : BoundedBy bound xs) (hkey : bound < key) : xs.count key = 0 := by + rw [List.count_eq_zero] + intro hmem + have := hxs key hmem + omega + +/-- Counting sort is a permutation because every key has the same count in input and output. -/ +theorem countingSort_perm (bound : ℕ) (xs : List ℕ) (hxs : BoundedBy bound xs) : + (⟪countingSort bound xs⟫).Perm xs := by + rw [List.perm_iff_count] + intro key + by_cases hkey : key ≤ bound + · exact countingSort_count_of_le_bound xs hkey + · have hlt : bound < key := Nat.lt_of_not_ge hkey + rw [countingSort_count_of_bound_lt xs hlt, bounded_count_eq_zero_of_bound_lt hxs hlt] + +/-- Every generated block list is sorted because later blocks only contain larger keys. -/ +private theorem buildCountedFrom_sorted (start fuel : ℕ) (xs : List ℕ) : + List.Pairwise (· ≤ ·) ⟪buildCountedFrom start fuel xs⟫ := by + induction fuel generalizing start with + | zero => simp [buildCountedFrom] + | succ fuel ih => + simp only [buildCountedFrom, ret_bind, ret_pure, ret_countKey] + rw [List.pairwise_append] + refine ⟨by simp [List.pairwise_replicate], ih (start + 1), ?_⟩ + intro a ha b hb + have haEq : a = start := (List.mem_replicate.mp ha).2 + have hbLower : start + 1 ≤ b := buildCountedFrom_start_le_of_mem hb + omega + +/-- Counting sort returns a sorted list. -/ +theorem countingSort_sorted (bound : ℕ) (xs : List ℕ) : + List.Pairwise (· ≤ ·) ⟪countingSort bound xs⟫ := by + exact buildCountedFrom_sorted 0 (bound + 1) xs + +/-- Filtering a list by one value gives exactly `count` copies of that value. -/ +private theorem filter_eq_replicate_count (key : ℕ) (xs : List ℕ) : + xs.filter (fun x => x = key) = List.replicate (xs.count key) key := by + induction xs with + | nil => simp + | cons x xs ih => + by_cases h : x = key + · simp [h, ih, List.replicate_succ] + · simp [h, ih] + +/-- +Counting sort is stable for natural numbers because every equal-value subsequence is just a block of +identical copies, and the count theorem proves that block has the same length as in the input. +-/ +theorem countingSort_stable (bound : ℕ) (xs : List ℕ) (hxs : BoundedBy bound xs) : + StableByValue xs ⟪countingSort bound xs⟫ := by + intro key + rw [filter_eq_replicate_count key ⟪countingSort bound xs⟫, filter_eq_replicate_count key xs] + by_cases hkey : key ≤ bound + · rw [countingSort_count_of_le_bound xs hkey] + · have hlt : bound < key := Nat.lt_of_not_ge hkey + rw [countingSort_count_of_bound_lt xs hlt, bounded_count_eq_zero_of_bound_lt hxs hlt] + +/-- Counting sort is functionally correct for inputs covered by the supplied bound. -/ +theorem countingSort_correct (bound : ℕ) (xs : List ℕ) (hxs : BoundedBy bound xs) : + List.Pairwise (· ≤ ·) ⟪countingSort bound xs⟫ ∧ (⟪countingSort bound xs⟫).Perm xs ∧ + StableByValue xs ⟪countingSort bound xs⟫ := by + exact ⟨countingSort_sorted bound xs, countingSort_perm bound xs hxs, + countingSort_stable bound xs hxs⟩ + +end Correctness + +section TimeComplexity + +/-- Building counted blocks scans the input once for each generated key. -/ +@[simp] +private theorem buildCountedFrom_time (start fuel : ℕ) (xs : List ℕ) : + (buildCountedFrom start fuel xs).time = fuel * xs.length := by + induction fuel generalizing start with + | zero => simp [buildCountedFrom] + | succ fuel ih => simp [buildCountedFrom, ih, Nat.succ_mul, Nat.add_comm] + +/-- Time complexity of counting sort under the stated cost model. -/ +theorem countingSort_time (bound : ℕ) (xs : List ℕ) : + (countingSort bound xs).time = (bound + 1) * xs.length := by + simp [countingSort] + +/-- A convenient upper bound form of `countingSort_time`. -/ +theorem countingSort_time_le (bound : ℕ) (xs : List ℕ) : + let n := xs.length + (countingSort bound xs).time ≤ (bound + 1) * n := by + simp [countingSort_time] + +end TimeComplexity + +end Cslib.Algorithms.Lean.TimeM diff --git a/Cslib/Algorithms/Lean/Sorting.lean b/Cslib/Algorithms/Lean/Sorting.lean new file mode 100644 index 000000000..e20257d9d --- /dev/null +++ b/Cslib/Algorithms/Lean/Sorting.lean @@ -0,0 +1,28 @@ +/- +Copyright (c) 2026 Robert Joseph George. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Robert Joseph George +-/ + +module + +public import Cslib.Init + +/-! +# Sorting utilities + +For stable list sorts, filtering the input and output by any value records the relative order of +the copies of that value. +-/ + +@[expose] public section + +set_option autoImplicit false + +namespace Cslib.Algorithms.Lean + +/-- `ys` preserves the order of equal values from `xs`. -/ +abbrev StableByValue {α : Type*} [DecidableEq α] (xs ys : List α) : Prop := + ∀ value, ys.filter (fun x => x = value) = xs.filter (fun x => x = value) + +end Cslib.Algorithms.Lean From 3704ca36ba394e49fb01036d3f1520269329292c Mon Sep 17 00:00:00 2001 From: robert Date: Tue, 19 May 2026 19:56:35 +0000 Subject: [PATCH 2/3] Address counting sort review comments --- .../Lean/CountingSort/CountingSort.lean | 16 ++++++++++++---- Cslib/Algorithms/Lean/Sorting.lean | 8 +++++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean b/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean index 69fc38c27..3dbb376fa 100644 --- a/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean +++ b/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean @@ -23,8 +23,12 @@ each key in `0, ..., bound` and returns a `TimeM ℕ (List ℕ)`. The cost model counts one key comparison while scanning an input element. Constructing output blocks, appending lists, arithmetic, and pattern matching are free. -Correctness is proved through counts: each generated block contributes exactly the copies for one -key, and bounded inputs ensure no out-of-range key is missing from the output. +The raw algorithm intentionally only emits keys in the range `0, ..., bound`; values above `bound` +are omitted. The correctness theorems therefore assume `BoundedBy bound xs`, which is the contract +that the supplied bound covers the input. + +The definitions live in the `TimeM` namespace, matching the existing timed list algorithms in +`Cslib.Algorithms.Lean`. -/ @[expose] public section @@ -53,7 +57,11 @@ def buildCountedFrom (start fuel : ℕ) (xs : List ℕ) : TimeM ℕ (List ℕ) : let rest ← buildCountedFrom (start + 1) fuel' xs return List.replicate count start ++ rest -/-- Sorts natural-number lists by counting all keys from `0` through `bound`. -/ +/-- +Sorts natural-number lists by counting all keys from `0` through `bound`. +If the input contains values greater than `bound`, this raw version omits them; use the correctness +theorems with a `BoundedBy bound xs` hypothesis when treating the output as a sort of `xs`. +-/ def countingSort (bound : ℕ) (xs : List ℕ) : TimeM ℕ (List ℕ) := buildCountedFrom 0 (bound + 1) xs @@ -169,7 +177,7 @@ theorem countingSort_sorted (bound : ℕ) (xs : List ℕ) : /-- Filtering a list by one value gives exactly `count` copies of that value. -/ private theorem filter_eq_replicate_count (key : ℕ) (xs : List ℕ) : - xs.filter (fun x => x = key) = List.replicate (xs.count key) key := by + xs.filter (fun x => decide (x = key)) = List.replicate (xs.count key) key := by induction xs with | nil => simp | cons x xs ih => diff --git a/Cslib/Algorithms/Lean/Sorting.lean b/Cslib/Algorithms/Lean/Sorting.lean index e20257d9d..332b64e94 100644 --- a/Cslib/Algorithms/Lean/Sorting.lean +++ b/Cslib/Algorithms/Lean/Sorting.lean @@ -11,8 +11,10 @@ public import Cslib.Init /-! # Sorting utilities -For stable list sorts, filtering the input and output by any value records the relative order of -the copies of that value. +For stable list sorts, filtering the input and output by any value gives a compact way to state that +the output keeps the same per-value subsequence as the input. For plain values this is equivalent to +preserving the number of copies of each value; for richer element types it can express a stronger +order-preservation property. -/ @[expose] public section @@ -23,6 +25,6 @@ namespace Cslib.Algorithms.Lean /-- `ys` preserves the order of equal values from `xs`. -/ abbrev StableByValue {α : Type*} [DecidableEq α] (xs ys : List α) : Prop := - ∀ value, ys.filter (fun x => x = value) = xs.filter (fun x => x = value) + ∀ value, ys.filter (fun x => decide (x = value)) = xs.filter (fun x => decide (x = value)) end Cslib.Algorithms.Lean From b03ead35307f7036c42ff8bb7e25796fb4f70f80 Mon Sep 17 00:00:00 2001 From: robert Date: Tue, 19 May 2026 20:01:08 +0000 Subject: [PATCH 3/3] Make counting sort tactic import explicit --- Cslib/Algorithms/Lean/CountingSort/CountingSort.lean | 1 + 1 file changed, 1 insertion(+) diff --git a/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean b/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean index 3dbb376fa..e3495787b 100644 --- a/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean +++ b/Cslib/Algorithms/Lean/CountingSort/CountingSort.lean @@ -11,6 +11,7 @@ public import Cslib.Algorithms.Lean.TimeM public import Mathlib.Data.Nat.Basic public import Mathlib.Order.RelClasses public import Mathlib.Tactic.Attr.Core +public import Mathlib.Tactic.Basic public import Mathlib.Tactic.Finiteness.Attr public import Batteries.Data.List.Lemmas