elara-labs · rajkumarsakthivel · Jun 11, 2026
@@ -1011,7 +1011,14 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None:
                 served = stats.get("served_tokens", 0)
                 if q > 0 and full > 0:
                     pct = int((full - served) / full * 100)
-                    savings = f" · {pct}% saved over {q} queries"
+                    tokens_saved = full - served
+                    from context_engine.pricing import get_model_pricing
+                    model = config.pricing_model.lower()
+                    all_pricing = get_model_pricing()
+                    rate = all_pricing.get(model, all_pricing.get("opus", {"input": 15.0}))
+                    cost = tokens_saved * rate["input"] / 1_000_000
+                    cost_str = f"${cost:.2f}" if cost >= 0.01 else "<$0.01"
+                    savings = f" · {pct}% saved over {q} queries ({cost_str} saved)"
             except Exception:
                 pass
         click.echo(

@@ -1432,6 +1432,20 @@ def _handle_session_event(self, args):
         body = self._apply_output_compression(body)
         return [TextContent(type="text", text=body)]
 
+    def _fmt_cost_saved(self, tokens_saved: int) -> str:
+        """Format cost savings as a short string, e.g. ', $4.37 saved'."""
+        try:
+            from context_engine.pricing import get_model_pricing
+            model = self._config.pricing_model.lower()
+            pricing = get_model_pricing()
+            rate = pricing.get(model, pricing.get("opus", {"input": 15.0}))
+            cost = tokens_saved * rate["input"] / 1_000_000
+            if cost >= 0.01:
+                return f", ${cost:.2f} saved"
+        except Exception:
+            pass
+        return ""
+
     async def _handle_index_status(self):
         queries = self._stats["queries"]
         raw = self._stats["raw_tokens"]
@@ -1450,15 +1464,17 @@ async def _handle_index_status(self):
             if full_file > 0:
                 full_saved = full_file - served
                 full_pct = int(full_saved / full_file * 100)
+                cost_note = self._fmt_cost_saved(full_saved)
                 status_parts.append(
                     f"Token savings ({queries} queries): "
                     f"{full_file:,} full-file baseline → {served:,} served "
-                    f"({full_pct}% saved)"
+                    f"({full_pct}% saved{cost_note})"
                 )
             else:
+                cost_note = self._fmt_cost_saved(saved)
                 status_parts.append(
                     f"Token savings ({queries} queries): {raw:,} raw → {served:,} served "
-                    f"({saved:,} saved, {pct}%)"
+                    f"({saved:,} saved, {pct}%{cost_note})"
                 )
         else:
             status_parts.append(

@@ -85,9 +85,22 @@ def _fmt_k(n: int) -> str:
             return f"{n / 1_000:.1f}k"
         return str(n)
 
+    tokens_saved = total_baseline - total_served
+    cost_str = ""
+    try:
+        from context_engine.pricing import get_model_pricing
+        pricing = get_model_pricing()
+        # Use opus as default; the session hook doesn't have config access
+        rate = pricing.get("opus", {"input": 15.0})["input"]
+        cost = tokens_saved * rate / 1_000_000
+        if cost >= 0.01:
+            cost_str = f", ${cost:.2f} saved"
+    except Exception:
+        pass
+
     return (
         f"CCE saved {saved_pct:.0f}% of input tokens across {total_queries} queries "
-        f"({_fmt_k(total_baseline)} baseline, {_fmt_k(total_served)} served)"
+        f"({_fmt_k(total_baseline)} baseline, {_fmt_k(total_served)} served{cost_str})"
     )