diff --git a/crates/example/src/lib.rs b/crates/example/src/lib.rs
index e768debc..14e0311a 100644
--- a/crates/example/src/lib.rs
+++ b/crates/example/src/lib.rs
@@ -136,14 +136,16 @@ pub struct App {
 
 impl App {
     pub fn new(ctx: &Context, camera_control: CameraControl) -> Self {
+        // Use library defaults for atlas size and MSAA -- the Context
+        // auto-detects appropriate settings based on GPU capabilities.
         let stage = ctx
             .new_stage()
             .with_background_color(DARK_BLUE_BG_COLOR)
             .with_bloom_mix_strength(0.5)
-            .with_bloom_filter_radius(4.0)
-            .with_msaa_sample_count(4);
+            .with_bloom_filter_radius(4.0);
         let size = ctx.get_size();
-        let (proj, view) = renderling::camera::default_perspective(size.x as f32, size.y as f32);
+        let (proj, view) =
+            renderling::camera::default_perspective(size.x as f32, size.y as f32);
         let camera = stage.new_camera().with_projection_and_view(proj, view);
 
         let sunlight = stage
@@ -152,14 +154,6 @@ impl App {
             .with_color(renderling::math::hex_to_vec4(0xFDFBD3FF))
             .with_intensity(Lux::OUTDOOR_SUNSET);
 
-        stage
-            .set_atlas_size(wgpu::Extent3d {
-                width: 2048,
-                height: 2048,
-                depth_or_array_layers: 32,
-            })
-            .unwrap();
-
         let ui = Ui::new(ctx).with_background_color(Vec4::ZERO);
         let _ = ui.add_font(FontArc::try_from_slice(FONT_BYTES).unwrap());
         let fps_counter = FPSCounter::default();
diff --git a/crates/renderling/src/atlas/cpu.rs b/crates/renderling/src/atlas/cpu.rs
index 5afdbce8..f9112e77 100644
--- a/crates/renderling/src/atlas/cpu.rs
+++ b/crates/renderling/src/atlas/cpu.rs
@@ -207,6 +207,9 @@ pub struct Atlas {
     descriptor: Hybrid<AtlasDescriptor>,
     /// Used for user updates into the atlas by blit images into specific frames.
     blitter: AtlasBlitter,
+    /// Maximum size the atlas is allowed to grow to.
+    /// When `None`, the atlas will not auto-grow.
+    max_size: Arc<RwLock<Option<wgpu::Extent3d>>>,
 }
 
 impl Atlas {
@@ -305,9 +308,27 @@ impl Atlas {
             label,
             blitter,
             texture_array: Arc::new(RwLock::new(texture)),
+            max_size: Arc::new(RwLock::new(None)),
         }
     }
 
+    /// Set the maximum size the atlas is allowed to auto-grow to.
+    ///
+    /// When the atlas cannot pack new images into its current size,
+    /// it will attempt to grow (doubling dimensions, then adding
+    /// layers) up to this limit before returning an error.
+    ///
+    /// Pass `None` to disable auto-growing.
+    pub fn set_max_size(&self, max_size: Option<wgpu::Extent3d>) {
+        *self.max_size.write().expect("atlas max_size write") = max_size;
+    }
+
+    /// Builder-style setter for the maximum auto-grow size.
+    pub fn with_max_size(self, max_size: wgpu::Extent3d) -> Self {
+        self.set_max_size(Some(max_size));
+        self
+    }
+
     pub fn descriptor_id(&self) -> Id<AtlasDescriptor> {
         self.descriptor.id()
     }
@@ -356,18 +377,77 @@ impl Atlas {
         self.texture_array.read().expect("atlas texture_array read").texture.size()
     }
 
-    /// Add the given images
+    /// Compute the next larger extent for auto-growing.
+    ///
+    /// Strategy: first double the width/height, then add layers.
+    fn next_grow_extent(
+        current: wgpu::Extent3d,
+        max: wgpu::Extent3d,
+    ) -> Option<wgpu::Extent3d> {
+        // Try doubling dimensions first (if below max)
+        if current.width * 2 <= max.width
+            && current.height * 2 <= max.height
+        {
+            return Some(wgpu::Extent3d {
+                width: current.width * 2,
+                height: current.height * 2,
+                depth_or_array_layers: current.depth_or_array_layers,
+            });
+        }
+        // Try adding a layer
+        if current.depth_or_array_layers
+            < max.depth_or_array_layers
+        {
+            return Some(wgpu::Extent3d {
+                width: current.width,
+                height: current.height,
+                depth_or_array_layers: current.depth_or_array_layers + 1,
+            });
+        }
+        // Already at max
+        None
+    }
+
+    /// Add the given images, auto-growing the atlas if necessary.
     pub fn add_images<'a>(
         &self,
         images: impl IntoIterator<Item = &'a AtlasImage>,
     ) -> Result<Vec<AtlasTexture>, AtlasError> {
+        let images: Vec<&AtlasImage> = images.into_iter().collect();
         // UNWRAP: POP
-        let mut layers = self.layers.write().expect("atlas layers write");
-        let mut texture_array = self.texture_array.write().expect("atlas texture_array write");
-        let extent = texture_array.texture.size();
-
-        let newly_packed_layers = pack_images(&layers, images, extent)
-            .context(CannotPackTexturesSnafu { size: extent })?;
+        let mut layers =
+            self.layers.write().expect("atlas layers write");
+        let mut texture_array = self
+            .texture_array
+            .write()
+            .expect("atlas texture_array write");
+        let mut extent = texture_array.texture.size();
+        let max_size =
+            *self.max_size.read().expect("atlas max_size read");
+
+        // Try packing, auto-growing if a max_size is configured.
+        let newly_packed_layers = loop {
+            match pack_images(&layers, images.iter().copied(), extent) {
+                Some(packed) => break packed,
+                None => {
+                    if let Some(max) = max_size {
+                        if let Some(bigger) =
+                            Self::next_grow_extent(extent, max)
+                        {
+                            log::info!(
+                                "atlas auto-growing from {extent:?} \
+                                 to {bigger:?}"
+                            );
+                            extent = bigger;
+                            continue;
+                        }
+                    }
+                    return Err(AtlasError::CannotPackTextures {
+                        size: extent,
+                    });
+                }
+            }
+        };
 
         let mut staged = StagedResources::try_staging(
             self.slab.runtime(),
@@ -378,6 +458,15 @@ impl Atlas {
             self.label.as_deref(),
         )?;
 
+        // Update the descriptor with the new size.
+        self.descriptor.set(AtlasDescriptor {
+            size: UVec3::new(
+                extent.width,
+                extent.height,
+                extent.depth_or_array_layers,
+            ),
+        });
+
         // Commit our newly staged values, now that everything is done.
         *texture_array = staged.texture;
         *layers = staged.layers;
diff --git a/crates/renderling/src/bloom/cpu.rs b/crates/renderling/src/bloom/cpu.rs
index 6e044733..ca384c96 100644
--- a/crates/renderling/src/bloom/cpu.rs
+++ b/crates/renderling/src/bloom/cpu.rs
@@ -553,7 +553,7 @@ impl Bloom {
             .clone()
     }
 
-    pub(crate) fn render_downsamples(&self, device: &wgpu::Device, queue: &wgpu::Queue) {
+    fn encode_downsamples(&self, encoder: &mut wgpu::CommandEncoder) {
         struct DownsampleItem<'a> {
             view: &'a wgpu::TextureView,
             bindgroup: &'a wgpu::BindGroup,
@@ -561,9 +561,10 @@ impl Bloom {
         }
         // Get all the bindgroups (which are what we're reading from),
         // starting with the hdr frame.
-        // Since `bindgroups` are one element greater (we pushed `hdr_texture_bindgroup`
-        // to the front) the last bindgroup will not be used, which is good - we
-        // don't need to read from the smallest texture during downsampling.
+        // Since `bindgroups` are one element greater (we pushed
+        // `hdr_texture_bindgroup` to the front) the last bindgroup will not
+        // be used, which is good - we don't need to read from the smallest
+        // texture during downsampling.
         // UNWRAP: not safe but we want to panic
         let textures_guard = self.textures.read().expect("bloom textures read");
         let hdr_texture_downsample_bindgroup_guard = self
@@ -595,8 +596,6 @@ impl Bloom {
         {
             let title = format!("bloom downsample {i}");
             let label = Some(title.as_str());
-            let mut encoder =
-                device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label });
             {
                 let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
                     label,
@@ -618,11 +617,10 @@ impl Bloom {
                 let id = pixel_size.into();
                 render_pass.draw(0..6, id..id + 1);
             }
-            queue.submit(std::iter::once(encoder.finish()));
         }
     }
 
-    fn render_upsamples(&self, device: &wgpu::Device, queue: &wgpu::Queue) {
+    fn encode_upsamples(&self, encoder: &mut wgpu::CommandEncoder) {
         struct UpsampleItem<'a> {
             view: &'a wgpu::TextureView,
             bindgroup: &'a wgpu::BindGroup,
@@ -642,8 +640,6 @@ impl Bloom {
         for (i, UpsampleItem { view, bindgroup }) in items.enumerate() {
             let title = format!("bloom upsample {}", textures_guard.len() - i - 1);
             let label = Some(title.as_str());
-            let mut encoder =
-                device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label });
             {
                 let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
                     label,
@@ -665,16 +661,14 @@ impl Bloom {
                 let id = self.upsample_filter_radius.id().into();
                 render_pass.draw(0..6, id..id + 1);
             }
-            queue.submit(std::iter::once(encoder.finish()));
         }
     }
 
-    fn render_mix(&self, device: &wgpu::Device, queue: &wgpu::Queue) {
+    fn encode_mix(&self, encoder: &mut wgpu::CommandEncoder) {
         let label = Some("bloom mix");
         // UNWRAP: not safe but we want to panic
         let mix_texture = self.mix_texture.read().expect("bloom mix_texture read");
         let mix_bindgroup = self.mix_bindgroup.read().expect("bloom mix_bindgroup read");
-        let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label });
         {
             let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
                 label,
@@ -696,19 +690,23 @@ impl Bloom {
             let id = self.mix_strength.id().into();
             render_pass.draw(0..6, id..id + 1);
         }
-
-        queue.submit(std::iter::once(encoder.finish()));
     }
 
+    /// Run the full bloom pipeline (downsample, upsample, mix) using a
+    /// single command encoder and a single queue submission.
     pub fn bloom(&self, device: &wgpu::Device, queue: &wgpu::Queue) {
         self.slab.commit();
         assert!(
             self.slab_buffer.is_valid(),
             "bloom slab buffer should never resize"
         );
-        self.render_downsamples(device, queue);
-        self.render_upsamples(device, queue);
-        self.render_mix(device, queue);
+        let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
+            label: Some("bloom"),
+        });
+        self.encode_downsamples(&mut encoder);
+        self.encode_upsamples(&mut encoder);
+        self.encode_mix(&mut encoder);
+        queue.submit(std::iter::once(encoder.finish()));
     }
 }
 
diff --git a/crates/renderling/src/context.rs b/crates/renderling/src/context.rs
index 63969c45..5eb86559 100644
--- a/crates/renderling/src/context.rs
+++ b/crates/renderling/src/context.rs
@@ -293,12 +293,74 @@ impl Frame {
     }
 }
 
+/// Rendering quality profile based on GPU capabilities.
+///
+/// The profile is auto-detected from adapter features and info, but can
+/// be overridden via [`Context::with_gpu_profile`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum GpuProfile {
+    /// Desktop GPUs with full feature support.
+    High,
+    /// Mid-range or integrated GPUs.
+    Medium,
+    /// Low-power GPUs (e.g. Raspberry Pi 5, mobile SoCs).
+    Low,
+}
+
+impl GpuProfile {
+    /// Auto-detect a profile from the adapter's supported features and
+    /// info.
+    pub fn detect(adapter: &wgpu::Adapter) -> Self {
+        let features = adapter.features();
+        let info = adapter.get_info();
+        let name = info.name.to_lowercase();
+
+        // Known low-power GPU families
+        let is_low_power = name.contains("v3d")
+            || name.contains("videocore")
+            || name.contains("mali-4")
+            || name.contains("mali-t")
+            || name.contains("powervr")
+            || name.contains("sgx")
+            || name.contains("llvmpipe")
+            || name.contains("softpipe")
+            || name.contains("swrast");
+
+        if is_low_power {
+            return Self::Low;
+        }
+
+        // If multi-draw indirect is missing the GPU is likely low-end
+        if !features.contains(wgpu::Features::MULTI_DRAW_INDIRECT) {
+            // But it could still be medium (e.g. some Intel integrated)
+            let is_integrated = name.contains("intel")
+                || name.contains("adreno")
+                || name.contains("mali");
+            return if is_integrated {
+                Self::Medium
+            } else {
+                Self::Low
+            };
+        }
+
+        Self::High
+    }
+}
+
 /// Configurable default values to use when creating new [`Stage`]s.
 #[derive(Debug, Clone, Copy)]
 pub(crate) struct GlobalStageConfig {
     pub(crate) atlas_size: wgpu::Extent3d,
     pub(crate) shadow_map_atlas_size: wgpu::Extent3d,
     pub(crate) use_compute_culling: bool,
+    pub(crate) gpu_profile: GpuProfile,
+    pub(crate) default_bloom: bool,
+    pub(crate) default_msaa_sample_count: u32,
+    /// When `true`, stages use `Rgba16Float` render targets, bloom, and
+    /// tonemapping. When `false`, stages render in the surface format
+    /// (LDR), skipping bloom and tonemapping entirely. This halves
+    /// render-target bandwidth on low-power GPUs.
+    pub(crate) default_hdr: bool,
 }
 
 /// Contains the adapter, device, queue, [`RenderTarget`] and configuration.
@@ -316,6 +378,9 @@ pub struct Context {
     adapter: Arc<wgpu::Adapter>,
     render_target: RenderTarget,
     pub(crate) stage_config: Arc<RwLock<GlobalStageConfig>>,
+    /// Soft GPU memory budget in bytes. When set, atlas auto-grow and
+    /// other large allocations will refuse to exceed this limit.
+    pub(crate) memory_budget: Arc<RwLock<Option<usize>>>,
 }
 
 impl AsRef<WgpuRuntime> for Context {
@@ -325,33 +390,72 @@ impl AsRef<WgpuRuntime> for Context {
 }
 
 impl Context {
-    /// Creates a new `Context` with the specified target, adapter, device, and queue.
+    /// Creates a new `Context` with the specified target, adapter, device,
+    /// and queue.
+    ///
+    /// The GPU profile is auto-detected from the adapter and used to select
+    /// sensible defaults for atlas sizes, bloom, MSAA, etc.
     pub fn new(
         target: RenderTarget,
         adapter: impl Into<Arc<wgpu::Adapter>>,
         device: impl Into<Arc<wgpu::Device>>,
         queue: impl Into<Arc<wgpu::Queue>>,
+    ) -> Self {
+        let adapter: Arc<wgpu::Adapter> = adapter.into();
+        let profile = GpuProfile::detect(&adapter);
+        log::info!("detected GPU profile: {profile:?}");
+        Self::new_with_profile(target, adapter, device, queue, profile)
+    }
+
+    /// Creates a new `Context` with a specific GPU profile, overriding
+    /// auto-detection.
+    pub fn new_with_profile(
+        target: RenderTarget,
+        adapter: impl Into<Arc<wgpu::Adapter>>,
+        device: impl Into<Arc<wgpu::Device>>,
+        queue: impl Into<Arc<wgpu::Queue>>,
+        profile: GpuProfile,
     ) -> Self {
         let adapter: Arc<wgpu::Adapter> = adapter.into();
         let limits = adapter.limits();
-        let w = limits
-            .max_texture_dimension_2d
-            .min(crate::atlas::ATLAS_SUGGESTED_SIZE);
+
+        let (atlas_dim, atlas_layers, shadow_dim, shadow_layers, bloom, msaa, hdr) =
+            match profile {
+                GpuProfile::High => (
+                    crate::atlas::ATLAS_SUGGESTED_SIZE,
+                    crate::atlas::ATLAS_SUGGESTED_LAYERS,
+                    crate::atlas::ATLAS_SUGGESTED_SIZE,
+                    4u32,
+                    true,
+                    4u32,
+                    true,
+                ),
+                GpuProfile::Medium => (1024, 4, 1024, 2, true, 2, true),
+                GpuProfile::Low => (512, 2, 512, 2, false, 1, false),
+            };
+
+        let w = limits.max_texture_dimension_2d.min(atlas_dim);
+        let sw = limits.max_texture_dimension_2d.min(shadow_dim);
         let stage_config = Arc::new(RwLock::new(GlobalStageConfig {
             atlas_size: wgpu::Extent3d {
                 width: w,
                 height: w,
-                depth_or_array_layers: adapter
-                    .limits()
+                depth_or_array_layers: limits
                     .max_texture_array_layers
-                    .min(crate::atlas::ATLAS_SUGGESTED_LAYERS),
+                    .min(atlas_layers),
             },
             shadow_map_atlas_size: wgpu::Extent3d {
-                width: w,
-                height: w,
-                depth_or_array_layers: 4,
+                width: sw,
+                height: sw,
+                depth_or_array_layers: limits
+                    .max_texture_array_layers
+                    .min(shadow_layers),
             },
             use_compute_culling: false,
+            gpu_profile: profile,
+            default_bloom: bloom,
+            default_msaa_sample_count: msaa,
+            default_hdr: hdr,
         }));
         Self {
             adapter,
@@ -361,6 +465,7 @@ impl Context {
             },
             render_target: target,
             stage_config,
+            memory_budget: Arc::new(RwLock::new(None)),
         }
     }
 
@@ -607,6 +712,63 @@ impl Context {
         !self.stage_config.read().expect("stage_config read").use_compute_culling
     }
 
+    /// Returns the auto-detected (or overridden) GPU profile.
+    pub fn get_gpu_profile(&self) -> GpuProfile {
+        self.stage_config
+            .read()
+            .expect("stage_config read")
+            .gpu_profile
+    }
+
+    /// Returns the default bloom setting for this context's GPU profile.
+    pub fn get_default_bloom(&self) -> bool {
+        self.stage_config
+            .read()
+            .expect("stage_config read")
+            .default_bloom
+    }
+
+    /// Returns the default MSAA sample count for this context's GPU
+    /// profile.
+    pub fn get_default_msaa_sample_count(&self) -> u32 {
+        self.stage_config
+            .read()
+            .expect("stage_config read")
+            .default_msaa_sample_count
+    }
+
+    /// Returns the default HDR setting for this context's GPU profile.
+    ///
+    /// When `false`, stages use LDR rendering (surface format), skipping
+    /// bloom and tonemapping for reduced bandwidth on low-power GPUs.
+    pub fn get_default_hdr(&self) -> bool {
+        self.stage_config
+            .read()
+            .expect("stage_config read")
+            .default_hdr
+    }
+
+    /// Set a soft GPU memory budget in bytes.
+    ///
+    /// When set, atlas auto-grow and other large allocations will refuse
+    /// to exceed this limit, logging a warning instead of risking OOM.
+    ///
+    /// Pass `None` to remove the budget (default).
+    pub fn set_memory_budget(&self, budget: Option<usize>) {
+        *self.memory_budget.write().expect("memory_budget write") = budget;
+    }
+
+    /// Set a soft GPU memory budget in bytes (builder pattern).
+    pub fn with_memory_budget(self, budget: usize) -> Self {
+        self.set_memory_budget(Some(budget));
+        self
+    }
+
+    /// Returns the current memory budget, if set.
+    pub fn get_memory_budget(&self) -> Option<usize> {
+        *self.memory_budget.read().expect("memory_budget read")
+    }
+
     /// Creates and returns a new [`Stage`] renderer.
     pub fn new_stage(&self) -> Stage {
         Stage::new(self)
diff --git a/crates/renderling/src/draw/cpu.rs b/crates/renderling/src/draw/cpu.rs
index 2f840589..3819e1b5 100644
--- a/crates/renderling/src/draw/cpu.rs
+++ b/crates/renderling/src/draw/cpu.rs
@@ -7,10 +7,12 @@ use craballoc::{
 use crabslab::Id;
 
 use crate::{
+    camera::shader::CameraDescriptor,
     context::Context,
     cull::{ComputeCulling, CullingError},
     primitive::{shader::PrimitiveDescriptor, Primitive},
     texture::Texture,
+    transform::shader::TransformDescriptor,
 };
 
 use super::DrawIndirectArgs;
@@ -254,39 +256,91 @@ impl DrawCalls {
     }
 
     /// Draw into the given `RenderPass` by directly calling each draw.
-    pub fn draw_direct(&self, render_pass: &mut wgpu::RenderPass) {
+    ///
+    /// When `frustum_cull` is `Some`, each primitive's bounding sphere
+    /// is tested against the camera frustum on the CPU and skipped if
+    /// outside the view. This is the fallback for GPUs without
+    /// `MULTI_DRAW_INDIRECT` support (e.g. Raspberry Pi).
+    pub fn draw_direct(
+        &self,
+        render_pass: &mut wgpu::RenderPass,
+        frustum_cull: Option<&CameraDescriptor>,
+    ) {
         if self.renderlets.is_empty() {
             log::warn!("no internal renderlets, nothing to draw");
         }
+        let mut drawn = 0u32;
+        let mut culled = 0u32;
         for ir in self.renderlets.iter() {
-            // UNWRAP: panic on purpose
             let desc = ir.descriptor.get();
+
+            // CPU-side frustum culling
+            if let Some(camera) = frustum_cull {
+                if desc.bounds.radius > 0.0 {
+                    // Read the transform from the Primitive's
+                    // CPU-side handle, falling back to identity.
+                    let transform = ir
+                        .transform
+                        .lock()
+                        .expect("transform lock")
+                        .as_ref()
+                        .map(|t| t.descriptor.get())
+                        .unwrap_or(TransformDescriptor::default());
+                    let (inside, _) =
+                        desc.bounds.is_inside_camera_view(camera, transform);
+                    if !inside {
+                        culled += 1;
+                        continue;
+                    }
+                }
+            }
+
             let vertex_range = 0..desc.get_vertex_count();
             let id = ir.descriptor.id();
             let instance_range = id.inner()..id.inner() + 1;
             render_pass.draw(vertex_range, instance_range);
+            drawn += 1;
+        }
+        if culled > 0 {
+            log::trace!(
+                "CPU frustum culling: drawn {drawn}, culled {culled}"
+            );
         }
     }
 
     /// Draw into the given `RenderPass`.
     ///
-    /// This method draws using the indirect draw buffer, if possible, otherwise
-    /// it falls back to `draw_direct`.
-    pub fn draw(&self, render_pass: &mut wgpu::RenderPass) {
+    /// This method draws using the indirect draw buffer, if possible,
+    /// otherwise it falls back to `draw_direct` with optional CPU-side
+    /// frustum culling.
+    pub fn draw(
+        &self,
+        render_pass: &mut wgpu::RenderPass,
+        frustum_cull: Option<&CameraDescriptor>,
+    ) {
         let num_draw_calls = self.draw_count();
         if num_draw_calls > 0 {
             if let Some(indirect) = &self.drawing_strategy.indirect {
-                log::trace!("drawing {num_draw_calls} renderlets using indirect");
+                log::trace!(
+                    "drawing {num_draw_calls} renderlets using indirect"
+                );
                 if let Some(indirect_buffer) = indirect.slab.get_buffer() {
-                    render_pass.multi_draw_indirect(&indirect_buffer, 0, num_draw_calls as u32);
+                    render_pass.multi_draw_indirect(
+                        &indirect_buffer,
+                        0,
+                        num_draw_calls as u32,
+                    );
                 } else {
                     log::warn!(
-                        "could not get the indirect buffer - was `DrawCall::upkeep` called?"
+                        "could not get the indirect buffer - \
+                         was `DrawCall::upkeep` called?"
                     );
                 }
             } else {
-                log::trace!("drawing {num_draw_calls} renderlets using direct");
-                self.draw_direct(render_pass);
+                log::trace!(
+                    "drawing {num_draw_calls} renderlets using direct"
+                );
+                self.draw_direct(render_pass, frustum_cull);
             }
         } else {
             log::warn!("zero draw calls");
diff --git a/crates/renderling/src/geometry/cpu.rs b/crates/renderling/src/geometry/cpu.rs
index 29861dbf..d3b1ff54 100644
--- a/crates/renderling/src/geometry/cpu.rs
+++ b/crates/renderling/src/geometry/cpu.rs
@@ -10,7 +10,7 @@ use crabslab::{Array, Id};
 use glam::{Mat4, UVec2, Vec4};
 
 use crate::{
-    camera::Camera,
+    camera::{shader::CameraDescriptor, Camera},
     geometry::{
         shader::{GeometryDescriptor, SkinDescriptor},
         MorphTarget, Vertex,
@@ -260,7 +260,6 @@ impl MorphTargets {
         }
     }
     /// Returns a pointer to the underlying morph targets data on the GPU.
-    ///
     pub fn array(&self) -> Array<Array<MorphTarget>> {
         self.arrays.array()
     }
@@ -369,6 +368,16 @@ impl Geometry {
         &self.descriptor
     }
 
+    /// Returns a copy of the current camera's descriptor, if a camera
+    /// is set.
+    pub fn camera_descriptor(&self) -> Option<CameraDescriptor> {
+        self.camera
+            .lock()
+            .expect("geometry camera lock")
+            .as_ref()
+            .map(|c| c.descriptor())
+    }
+
     /// Returns the vertices of a white unit cube.
     pub fn default_vertices(&self) -> &Vertices {
         &self.default_vertices
diff --git a/crates/renderling/src/internal/cpu.rs b/crates/renderling/src/internal/cpu.rs
index c0566de0..c4eff2e3 100644
--- a/crates/renderling/src/internal/cpu.rs
+++ b/crates/renderling/src/internal/cpu.rs
@@ -42,30 +42,83 @@ pub async fn adapter(
 }
 
 /// Create a new [`wgpu::Device`].
+///
+/// Requests only the features and limits that renderling actually needs,
+/// intersected with what the adapter supports.
 pub async fn device(
     adapter: &wgpu::Adapter,
 ) -> Result<(wgpu::Device, wgpu::Queue), wgpu::RequestDeviceError> {
     let wanted_features = wgpu::Features::INDIRECT_FIRST_INSTANCE
         | wgpu::Features::MULTI_DRAW_INDIRECT
-        //// when debugging rust-gpu shader miscompilation it's nice to have this
+        //// when debugging rust-gpu shader miscompilation it's nice to have
+        //// this
         //| wgpu::Features::SPIRV_SHADER_PASSTHROUGH
-        // this one is a funny requirement, it seems it is needed if using storage buffers in
-        // vertex shaders, even if those shaders are read-only
+        // this one is a funny requirement, it seems it is needed if using
+        // storage buffers in vertex shaders, even if those shaders are
+        // read-only
         | wgpu::Features::VERTEX_WRITABLE_STORAGE
         | wgpu::Features::CLEAR_TEXTURE;
     let supported_features = adapter.features();
     let required_features = wanted_features.intersection(supported_features);
     let unsupported_features = wanted_features.difference(supported_features);
     if !unsupported_features.is_empty() {
-        log::error!("requested but unsupported features: {unsupported_features:#?}");
-        log::warn!("requested and supported features: {supported_features:#?}");
+        log::error!(
+            "requested but unsupported features: \
+             {unsupported_features:#?}"
+        );
+        log::warn!(
+            "requested and supported features: {supported_features:#?}"
+        );
     }
-    let limits = adapter.limits();
-    log::info!("adapter limits: {limits:#?}");
+    let adapter_limits = adapter.limits();
+    log::info!("adapter limits: {adapter_limits:#?}");
+
+    // Request only what we need, clamped to what the adapter supports.
+    // This avoids forcing drivers (e.g. V3D on RPi) to reserve resources
+    // up to their maximum reported limits.
+    let required_limits = wgpu::Limits {
+        max_texture_dimension_2d: adapter_limits.max_texture_dimension_2d,
+        max_texture_dimension_1d: adapter_limits.max_texture_dimension_1d,
+        max_texture_array_layers: adapter_limits.max_texture_array_layers,
+        max_storage_buffers_per_shader_stage: adapter_limits
+            .max_storage_buffers_per_shader_stage
+            .min(8),
+        max_storage_buffer_binding_size: adapter_limits
+            .max_storage_buffer_binding_size,
+        max_uniform_buffer_binding_size: adapter_limits
+            .max_uniform_buffer_binding_size
+            .min(65536),
+        max_bind_groups: adapter_limits.max_bind_groups.min(4),
+        max_bindings_per_bind_group: adapter_limits
+            .max_bindings_per_bind_group
+            .min(640),
+        max_color_attachments: adapter_limits.max_color_attachments.min(4),
+        max_buffer_size: adapter_limits.max_buffer_size,
+        max_vertex_buffers: adapter_limits.max_vertex_buffers.min(8),
+        max_vertex_attributes: adapter_limits.max_vertex_attributes.min(16),
+        max_vertex_buffer_array_stride: adapter_limits
+            .max_vertex_buffer_array_stride
+            .min(2048),
+        max_inter_stage_shader_components: adapter_limits
+            .max_inter_stage_shader_components,
+        max_compute_workgroups_per_dimension: adapter_limits
+            .max_compute_workgroups_per_dimension,
+        max_compute_workgroup_size_x: adapter_limits
+            .max_compute_workgroup_size_x,
+        max_compute_workgroup_size_y: adapter_limits
+            .max_compute_workgroup_size_y,
+        max_compute_workgroup_size_z: adapter_limits
+            .max_compute_workgroup_size_z,
+        max_compute_invocations_per_workgroup: adapter_limits
+            .max_compute_invocations_per_workgroup,
+        max_compute_workgroup_storage_size: adapter_limits
+            .max_compute_workgroup_storage_size,
+        ..wgpu::Limits::downlevel_defaults()
+    };
     adapter
         .request_device(&wgpu::DeviceDescriptor {
             required_features,
-            required_limits: adapter.limits(),
+            required_limits,
             label: None,
             memory_hints: wgpu::MemoryHints::default(),
             trace: wgpu::Trace::Off,
diff --git a/crates/renderling/src/light/cpu.rs b/crates/renderling/src/light/cpu.rs
index aa539681..bfe550ab 100644
--- a/crates/renderling/src/light/cpu.rs
+++ b/crates/renderling/src/light/cpu.rs
@@ -764,6 +764,9 @@ impl LightingBindGroupLayoutEntries {
 
 impl Lighting {
     /// Create the atlas used to store all shadow maps.
+    ///
+    /// The atlas is created at `size` and can auto-grow by up to 2x
+    /// in each dimension if packing fails.
     fn create_shadow_map_atlas(
         light_slab: &SlabAllocator<WgpuRuntime>,
         size: wgpu::Extent3d,
@@ -771,6 +774,11 @@ impl Lighting {
         let usage = wgpu::TextureUsages::RENDER_ATTACHMENT
             | wgpu::TextureUsages::TEXTURE_BINDING
             | wgpu::TextureUsages::COPY_SRC;
+        let max_size = wgpu::Extent3d {
+            width: size.width * 2,
+            height: size.height * 2,
+            depth_or_array_layers: (size.depth_or_array_layers * 2).max(2),
+        };
         Atlas::new(
             light_slab,
             size,
@@ -778,6 +786,7 @@ impl Lighting {
             Some("shadow-map-atlas"),
             Some(usage),
         )
+        .with_max_size(max_size)
     }
 
     /// Create a new [`Lighting`] manager.
diff --git a/crates/renderling/src/linkage/light_tiling_compute_tile_min_and_max_depth_multisampled.rs b/crates/renderling/src/linkage/light_tiling_compute_tile_min_and_max_depth_multisampled.rs
index 9c1af43d..8fa577ac 100644
--- a/crates/renderling/src/linkage/light_tiling_compute_tile_min_and_max_depth_multisampled.rs
+++ b/crates/renderling/src/linkage/light_tiling_compute_tile_min_and_max_depth_multisampled.rs
@@ -6,7 +6,10 @@ mod target {
     pub const ENTRY_POINT: &str =
         "light::shader::light_tiling_compute_tile_min_and_max_depth_multisampled";
     pub fn descriptor() -> wgpu::ShaderModuleDescriptor<'static> {
-        wgpu :: include_spirv ! ("../../shaders/light-shader-light_tiling_compute_tile_min_and_max_depth_multisampled.spv")
+        wgpu::include_spirv!(
+            "../../shaders/light-shader-light_tiling_compute_tile_min_and_max_depth_multisampled.\
+             spv"
+        )
     }
     pub fn linkage(device: &wgpu::Device) -> super::ShaderLinkage {
         log::debug!(
@@ -24,7 +27,10 @@ mod target {
     pub const ENTRY_POINT: &str =
         "lightshaderlight_tiling_compute_tile_min_and_max_depth_multisampled";
     pub fn descriptor() -> wgpu::ShaderModuleDescriptor<'static> {
-        wgpu :: include_wgsl ! ("../../shaders/light-shader-light_tiling_compute_tile_min_and_max_depth_multisampled.wgsl")
+        wgpu::include_wgsl!(
+            "../../shaders/light-shader-light_tiling_compute_tile_min_and_max_depth_multisampled.\
+             wgsl"
+        )
     }
     pub fn linkage(device: &wgpu::Device) -> super::ShaderLinkage {
         log::debug!(
diff --git a/crates/renderling/src/material/cpu.rs b/crates/renderling/src/material/cpu.rs
index 3f864532..ec9cdc4e 100644
--- a/crates/renderling/src/material/cpu.rs
+++ b/crates/renderling/src/material/cpu.rs
@@ -31,16 +31,26 @@ impl AsRef<WgpuRuntime> for Materials {
 }
 
 impl Materials {
-    /// Creates a new `Materials` instance with the specified runtime and atlas
-    /// size.
+    /// Creates a new `Materials` instance with the specified runtime and
+    /// atlas size.
+    ///
+    /// The atlas is created at `atlas_size` and can auto-grow by up to
+    /// 2x in each dimension if packing fails.
     ///
     /// # Arguments
     ///
     /// * `runtime` - A reference to the WgpuRuntime.
-    /// * `atlas_size` - The size of the atlas texture.
+    /// * `atlas_size` - The initial (and default) size of the atlas texture.
     pub fn new(runtime: impl AsRef<WgpuRuntime>, atlas_size: wgpu::Extent3d) -> Self {
         let slab = SlabAllocator::new(runtime, "materials", wgpu::BufferUsages::empty());
-        let atlas = Atlas::new(&slab, atlas_size, None, Some("materials-atlas"), None);
+        // Allow the atlas to auto-grow up to 2x its configured size.
+        let max_size = wgpu::Extent3d {
+            width: atlas_size.width * 2,
+            height: atlas_size.height * 2,
+            depth_or_array_layers: (atlas_size.depth_or_array_layers * 2).max(2),
+        };
+        let atlas = Atlas::new(&slab, atlas_size, None, Some("materials-atlas"), None)
+            .with_max_size(max_size);
         let default_material = Material {
             descriptor: slab.new_value(Default::default()),
             albedo_texture: Default::default(),
diff --git a/crates/renderling/src/stage/cpu.rs b/crates/renderling/src/stage/cpu.rs
index a285b78a..168a9522 100644
--- a/crates/renderling/src/stage/cpu.rs
+++ b/crates/renderling/src/stage/cpu.rs
@@ -308,7 +308,15 @@ impl StageRendering<'_> {
 
             render_pass.set_pipeline(self.pipeline);
             render_pass.set_bind_group(0, Some(primitive_bind_group.as_ref()), &[]);
-            draw_calls.draw(&mut render_pass);
+            // For the direct-draw fallback (no MULTI_DRAW_INDIRECT),
+            // provide the camera for CPU-side frustum culling.
+            let geo_desc = self.stage.geometry.descriptor().get();
+            let frustum_cull = if geo_desc.perform_frustum_culling {
+                self.stage.geometry.camera_descriptor()
+            } else {
+                None
+            };
+            draw_calls.draw(&mut render_pass, frustum_cull.as_ref());
 
             let has_skybox = self.stage.has_skybox.load(Ordering::Relaxed);
             if has_skybox {
@@ -428,6 +436,14 @@ pub struct Stage {
 
     pub(crate) has_bloom: Arc<AtomicBool>,
     pub(crate) has_debug_overlay: Arc<AtomicBool>,
+    /// When `true` the stage renders to `Rgba16Float`, runs bloom, and
+    /// tonemaps to the output. When `false` (LDR) it renders in
+    /// `surface_format`, skipping bloom and tonemapping entirely.
+    pub(crate) use_hdr: Arc<AtomicBool>,
+    /// The format of the final output surface (e.g. `Rgba8UnormSrgb`).
+    pub(crate) surface_format: wgpu::TextureFormat,
+    /// Shared reference to the context's memory budget.
+    pub(crate) memory_budget: Arc<RwLock<Option<usize>>>,
 
     pub(crate) stage_slab_buffer: Arc<RwLock<SlabBuffer<wgpu::Buffer>>>,
 
@@ -634,6 +650,8 @@ impl Stage {
             .expect("textures_bindgroup lock")
             .take();
 
+        self.log_budget_warning();
+
         Ok(frames)
     }
 
@@ -797,12 +815,13 @@ impl Stage {
     ) -> (Arc<SkyboxRenderPipeline>, Arc<wgpu::BindGroup>) {
         let msaa_sample_count = self.msaa_sample_count.load(Ordering::Relaxed);
         // UNWRAP: safe because we're only ever called from the render thread.
+        let render_fmt = self.render_format();
         let mut pipeline_guard = self.skybox_pipeline.write().expect("skybox_pipeline write");
         let pipeline = if let Some(pipeline) = pipeline_guard.as_mut() {
             if pipeline.msaa_sample_count() != msaa_sample_count {
                 *pipeline = Arc::new(crate::skybox::create_skybox_render_pipeline(
                     self.device(),
-                    Texture::HDR_TEXTURE_FORMAT,
+                    render_fmt,
                     Some(msaa_sample_count),
                 ));
             }
@@ -810,7 +829,7 @@ impl Stage {
         } else {
             let pipeline = Arc::new(crate::skybox::create_skybox_render_pipeline(
                 self.device(),
-                Texture::HDR_TEXTURE_FORMAT,
+                render_fmt,
                 Some(msaa_sample_count),
             ));
             *pipeline_guard = Some(pipeline.clone());
@@ -953,7 +972,7 @@ impl Stage {
         &self.brdf_lut
     }
 
-    /// Sum the byte size of all used GPU memory.
+    /// Sum the byte size of all used GPU slab-buffer memory.
     ///
     /// Adds together the byte size of all underlying slab buffers.
     ///
@@ -977,6 +996,89 @@ impl Stage {
         4 * num_u32s
     }
 
+    /// Estimate the byte size of all GPU textures owned by this stage.
+    ///
+    /// Includes the render target, depth buffer, MSAA target, bloom
+    /// textures, material atlas, shadow map atlas, BRDF LUT, and IBL
+    /// cubemaps.
+    pub fn used_gpu_texture_byte_size(&self) -> usize {
+        let size = self.get_size();
+        let w = size.x as usize;
+        let h = size.y as usize;
+        let msaa = self.get_msaa_sample_count() as usize;
+        let use_hdr = self.use_hdr.load(Ordering::Relaxed);
+
+        // Bytes per pixel for the render-target format.
+        let render_bpp: usize = if use_hdr { 8 } else { 4 };
+
+        let mut total: usize = 0;
+
+        // 1. Render texture (hdr_texture)
+        total += w * h * render_bpp;
+        // 2. Depth texture
+        total += w * h * 4;
+        // 3. MSAA render target (only when MSAA > 1)
+        if msaa > 1 {
+            total += w * h * render_bpp * msaa;
+            // MSAA depth
+            total += w * h * 4 * msaa;
+        }
+        // 4. Bloom textures (only in HDR mode)
+        if use_hdr {
+            // Mix texture: full resolution, Rgba16Float
+            total += w * h * 8;
+            // Mip chain textures
+            let mips = (w.min(h) as u32).max(1).ilog2() as usize;
+            for i in 1..=mips {
+                total += (w >> i) * (h >> i) * 8;
+            }
+        }
+        // 5. Material atlas (Rgba8Unorm, 4 bpp)
+        let atlas_size = self.materials.atlas().get_size();
+        total += (atlas_size.width as usize)
+            * (atlas_size.height as usize)
+            * (atlas_size.depth_or_array_layers as usize)
+            * 4;
+        // 6. Shadow map atlas (R32Float, 4 bpp)
+        let shadow_size = self.lighting.shadow_map_atlas.get_size();
+        total += (shadow_size.width as usize)
+            * (shadow_size.height as usize)
+            * (shadow_size.depth_or_array_layers as usize)
+            * 4;
+        // 7. BRDF LUT (Rg16Float, 512x512, 4 bpp)
+        total += 512 * 512 * 4;
+        // 8. IBL irradiance cubemap (Rgba16Float, 32x32x6 faces)
+        total += 32 * 32 * 6 * 8;
+        // 9. IBL prefiltered environment cubemap (Rgba16Float, 128x128 down to 8x8, 5
+        //    mip levels, 6 faces)
+        for m in 0..5u32 {
+            let s = (128 >> m) as usize;
+            total += s * s * 6 * 8;
+        }
+
+        total
+    }
+
+    /// Total estimated GPU memory usage (buffers + textures).
+    pub fn used_gpu_total_byte_size(&self) -> usize {
+        self.used_gpu_buffer_byte_size() + self.used_gpu_texture_byte_size()
+    }
+
+    /// Check current GPU memory usage against the configured budget.
+    ///
+    /// Logs a warning if the estimated usage exceeds the budget. Does
+    /// nothing if no budget has been set.
+    pub fn log_budget_warning(&self) {
+        if let Some(budget) = *self.memory_budget.read().expect("memory_budget read") {
+            let used = self.used_gpu_total_byte_size();
+            if used > budget {
+                let used_mb = used as f64 / (1024.0 * 1024.0);
+                let budget_mb = budget as f64 / (1024.0 * 1024.0);
+                log::warn!("GPU memory usage ({used_mb:.1} MB) exceeds budget ({budget_mb:.1} MB)");
+            }
+        }
+    }
+
     pub fn hdr_texture(&self) -> impl Deref<Target = crate::texture::Texture> + '_ {
         self.hdr_texture.read().expect("hdr_texture read")
     }
@@ -1172,12 +1274,31 @@ impl Stage {
         })
     }
 
+    /// Return the texture format used for the main render pass.
+    ///
+    /// `Rgba16Float` when HDR is enabled, or the surface format when in
+    /// LDR mode.
+    pub fn render_format(&self) -> wgpu::TextureFormat {
+        if self.use_hdr.load(Ordering::Relaxed) {
+            Texture::HDR_TEXTURE_FORMAT
+        } else {
+            self.surface_format
+        }
+    }
+
     /// Create a new stage.
     pub fn new(ctx: &crate::context::Context) -> Self {
         let runtime = ctx.runtime();
         let device = &runtime.device;
         let resolution @ UVec2 { x: w, y: h } = ctx.get_size();
         let stage_config = *ctx.stage_config.read().expect("stage_config read");
+        let use_hdr = stage_config.default_hdr;
+        let surface_format = ctx.get_render_target().format().add_srgb_suffix();
+        let render_format = if use_hdr {
+            Texture::HDR_TEXTURE_FORMAT
+        } else {
+            surface_format
+        };
         let geometry = Geometry::new(
             ctx,
             resolution,
@@ -1188,30 +1309,27 @@ impl Stage {
         );
         let materials = Materials::new(runtime, stage_config.atlas_size);
         let multisample_count = 1;
-        let hdr_texture = Arc::new(RwLock::new(Texture::create_hdr_texture(
+        let hdr_texture = Arc::new(RwLock::new(Texture::create_render_texture(
             device,
             w,
             h,
             multisample_count,
+            render_format,
         )));
         let depth_texture =
             Texture::create_depth_texture(device, w, h, multisample_count, Some("stage-depth"));
         let msaa_render_target = Default::default();
         // UNWRAP: safe because no other references at this point (created above^)
         let bloom = Bloom::new(ctx, &hdr_texture.read().expect("hdr_texture read"));
-        let tonemapping = Tonemapping::new(
-            runtime,
-            ctx.get_render_target().format().add_srgb_suffix(),
-            &bloom.get_mix_texture(),
-        );
-        let stage_pipeline = Self::create_primitive_pipeline(
-            device,
-            wgpu::TextureFormat::Rgba16Float,
-            multisample_count,
-        );
+        let tonemapping = Tonemapping::new(runtime, surface_format, &bloom.get_mix_texture());
+        let stage_pipeline =
+            Self::create_primitive_pipeline(device, render_format, multisample_count);
         let geometry_buffer = geometry.slab_allocator().commit();
         let lighting = Lighting::new(stage_config.shadow_map_atlas_size, &geometry);
 
+        // In LDR mode, bloom is always off regardless of the profile default.
+        let has_bloom = use_hdr && stage_config.default_bloom;
+
         let brdf_lut = BrdfLut::new(runtime);
         let skybox = Skybox::empty(runtime);
         let ibl = Ibl::new(runtime, &skybox);
@@ -1241,7 +1359,7 @@ impl Stage {
             brdf_lut,
             bloom,
             tonemapping,
-            has_bloom: AtomicBool::from(true).into(),
+            has_bloom: AtomicBool::from(has_bloom).into(),
             textures_bindgroup: Default::default(),
             debug_overlay: DebugOverlay::new(device, ctx.get_render_target().format()),
             has_debug_overlay: Arc::new(false.into()),
@@ -1251,6 +1369,9 @@ impl Stage {
             clear_color_attachments: Arc::new(true.into()),
             clear_depth_attachments: Arc::new(true.into()),
             background_color: Arc::new(RwLock::new(wgpu::Color::TRANSPARENT)),
+            use_hdr: Arc::new(AtomicBool::new(use_hdr)),
+            surface_format,
+            memory_budget: ctx.memory_budget.clone(),
         }
     }
 
@@ -1297,11 +1418,8 @@ impl Stage {
         *self
             .primitive_pipeline
             .write()
-            .expect("primitive_pipeline write") = Self::create_primitive_pipeline(
-            self.device(),
-            wgpu::TextureFormat::Rgba16Float,
-            multisample_count,
-        );
+            .expect("primitive_pipeline write") =
+            Self::create_primitive_pipeline(self.device(), self.render_format(), multisample_count);
         let size = self.get_size();
         // UNWRAP: POP
         *self.depth_texture.write().expect("depth_texture write") = Texture::create_depth_texture(
@@ -1476,7 +1594,8 @@ impl Stage {
         self.geometry
             .descriptor()
             .modify(|cfg| cfg.resolution = size);
-        let hdr_texture = Texture::create_hdr_texture(self.device(), size.x, size.y, 1);
+        let hdr_texture =
+            Texture::create_render_texture(self.device(), size.x, size.y, 1, self.render_format());
         let sample_count = self.msaa_sample_count.load(Ordering::Relaxed);
         if let Some(msaa_view) = self
             .msaa_render_target
@@ -1562,6 +1681,93 @@ impl Stage {
         self
     }
 
+    /// Enable or disable HDR rendering.
+    ///
+    /// When `true` (default on High/Medium profiles) the stage renders to
+    /// `Rgba16Float`, runs bloom, and tonemaps to the output surface.
+    ///
+    /// When `false` (default on Low profile) the stage renders directly in
+    /// the surface format (LDR), skipping bloom and tonemapping entirely.
+    /// This halves render-target bandwidth and is recommended for
+    /// low-power GPUs like the Raspberry Pi 5.
+    ///
+    /// Changing this setting recreates the render pipeline and render
+    /// textures.
+    pub fn set_hdr(&self, hdr: bool) {
+        let prev = self.use_hdr.swap(hdr, Ordering::Relaxed);
+        if prev == hdr {
+            return;
+        }
+        let format = if hdr {
+            Texture::HDR_TEXTURE_FORMAT
+        } else {
+            self.surface_format
+        };
+        log::info!("switching HDR mode to {hdr} (render format: {format:?})");
+
+        let size = self.get_size();
+        let sample_count = self.msaa_sample_count.load(Ordering::Relaxed);
+
+        // Recreate the primitive pipeline with the new format
+        *self
+            .primitive_pipeline
+            .write()
+            .expect("primitive_pipeline write") =
+            Self::create_primitive_pipeline(self.device(), format, sample_count);
+
+        // Recreate the render texture
+        let render_texture =
+            Texture::create_render_texture(self.device(), size.x, size.y, 1, format);
+
+        // Update MSAA target format
+        if let Some(msaa_view) = self
+            .msaa_render_target
+            .write()
+            .expect("msaa_render_target write")
+            .as_mut()
+        {
+            *msaa_view =
+                create_msaa_textureview(self.device(), size.x, size.y, format, sample_count);
+        }
+
+        // Update bloom and tonemapping references
+        self.bloom.set_hdr_texture(self.runtime(), &render_texture);
+        self.tonemapping
+            .set_hdr_texture(self.device(), &render_texture);
+        *self.hdr_texture.write().expect("hdr_texture write") = render_texture;
+
+        // If switching to LDR, disable bloom
+        if !hdr {
+            self.has_bloom.store(false, Ordering::Relaxed);
+        }
+
+        // Invalidate skybox pipeline (format changed)
+        *self.skybox_pipeline.write().expect("skybox_pipeline write") = None;
+        let _ = self
+            .skybox_bindgroup
+            .lock()
+            .expect("skybox_bindgroup lock")
+            .take();
+        let _ = self
+            .textures_bindgroup
+            .lock()
+            .expect("textures_bindgroup lock")
+            .take();
+    }
+
+    /// Enable or disable HDR rendering (builder pattern).
+    ///
+    /// See [`Stage::set_hdr`] for details.
+    pub fn with_hdr(self, hdr: bool) -> Self {
+        self.set_hdr(hdr);
+        self
+    }
+
+    /// Returns whether HDR rendering is enabled.
+    pub fn get_hdr(&self) -> bool {
+        self.use_hdr.load(Ordering::Relaxed)
+    }
+
     /// Adds a primitive to the internal list of primitives to be drawn each
     /// frame.
     ///
@@ -1614,6 +1820,8 @@ impl Stage {
 
     /// Render the staged scene into the given view.
     pub fn render(&self, view: &wgpu::TextureView) {
+        let use_hdr = self.use_hdr.load(Ordering::Relaxed);
+
         // UNWRAP: POP
         let background_color = *self.background_color.read().expect("background_color read");
         // UNWRAP: POP
@@ -1622,7 +1830,6 @@ impl Stage {
             .read()
             .expect("msaa_render_target read");
         let clear_colors = self.clear_color_attachments.load(Ordering::Relaxed);
-        let hdr_texture = self.hdr_texture.read().expect("hdr_texture read");
 
         let mk_ops = |store| wgpu::Operations {
             load: if clear_colors {
@@ -1632,17 +1839,24 @@ impl Stage {
             },
             store,
         };
+
+        // In LDR mode the main pass writes directly to the output view,
+        // skipping bloom and tonemapping entirely. In HDR mode it writes
+        // to the intermediate HDR render texture as before.
+        let hdr_texture = self.hdr_texture.read().expect("hdr_texture read");
+        let color_target_view = if use_hdr { &hdr_texture.view } else { view };
+
         let render_pass_color_attachment = if let Some(msaa_view) = msaa_target.as_ref() {
             wgpu::RenderPassColorAttachment {
                 ops: mk_ops(wgpu::StoreOp::Discard),
                 view: msaa_view,
-                resolve_target: Some(&hdr_texture.view),
+                resolve_target: Some(color_target_view),
                 depth_slice: None,
             }
         } else {
             wgpu::RenderPassColorAttachment {
                 ops: mk_ops(wgpu::StoreOp::Store),
-                view: &hdr_texture.view,
+                view: color_target_view,
                 resolve_target: None,
                 depth_slice: None,
             }
@@ -1674,41 +1888,45 @@ impl Stage {
         }
         .run();
 
-        // then render bloom
-        if self.has_bloom.load(Ordering::Relaxed) {
-            self.bloom.bloom(self.device(), self.queue());
-        } else {
-            // copy the input hdr texture to the bloom mix texture
-            let mut encoder =
-                self.device()
-                    .create_command_encoder(&wgpu::CommandEncoderDescriptor {
-                        label: Some("no bloom copy"),
-                    });
-            let bloom_mix_texture = self.bloom.get_mix_texture();
-            encoder.copy_texture_to_texture(
-                wgpu::TexelCopyTextureInfo {
-                    texture: &self.hdr_texture.read().expect("hdr_texture read").texture,
-                    mip_level: 0,
-                    origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
-                    aspect: wgpu::TextureAspect::All,
-                },
-                wgpu::TexelCopyTextureInfo {
-                    texture: &bloom_mix_texture.texture,
-                    mip_level: 0,
-                    origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
-                    aspect: wgpu::TextureAspect::All,
-                },
-                wgpu::Extent3d {
-                    width: bloom_mix_texture.width(),
-                    height: bloom_mix_texture.height(),
-                    depth_or_array_layers: 1,
-                },
-            );
-            self.queue().submit(std::iter::once(encoder.finish()));
-        }
+        if use_hdr {
+            // HDR path: bloom -> tonemapping -> output view
+            if self.has_bloom.load(Ordering::Relaxed) {
+                self.bloom.bloom(self.device(), self.queue());
+            } else {
+                // copy the input hdr texture to the bloom mix texture
+                let mut encoder =
+                    self.device()
+                        .create_command_encoder(&wgpu::CommandEncoderDescriptor {
+                            label: Some("no bloom copy"),
+                        });
+                let bloom_mix_texture = self.bloom.get_mix_texture();
+                encoder.copy_texture_to_texture(
+                    wgpu::TexelCopyTextureInfo {
+                        texture: &self.hdr_texture.read().expect("hdr_texture read").texture,
+                        mip_level: 0,
+                        origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
+                        aspect: wgpu::TextureAspect::All,
+                    },
+                    wgpu::TexelCopyTextureInfo {
+                        texture: &bloom_mix_texture.texture,
+                        mip_level: 0,
+                        origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
+                        aspect: wgpu::TextureAspect::All,
+                    },
+                    wgpu::Extent3d {
+                        width: bloom_mix_texture.width(),
+                        height: bloom_mix_texture.height(),
+                        depth_or_array_layers: 1,
+                    },
+                );
+                self.queue().submit(std::iter::once(encoder.finish()));
+            }
 
-        // then render tonemapping
-        self.tonemapping.render(self.device(), self.queue(), view);
+            // then render tonemapping
+            self.tonemapping.render(self.device(), self.queue(), view);
+        }
+        // In LDR mode the main pass already wrote to `view`, so there is
+        // nothing more to do — bloom and tonemapping are skipped entirely.
 
         // then render the debug overlay
         if self.has_debug_overlay.load(Ordering::Relaxed) {
diff --git a/crates/renderling/src/texture.rs b/crates/renderling/src/texture.rs
index a3014ebf..02ce96bf 100644
--- a/crates/renderling/src/texture.rs
+++ b/crates/renderling/src/texture.rs
@@ -678,16 +678,36 @@ impl Texture {
         height: u32,
         multisample_count: u32,
     ) -> Texture {
-        // * The hdr texture is what we render to in most cases
-        // * we also read from it to calculate bloom
-        // * we also write the bloom mix result back to it
-        // * we also read the texture in tests
+        Self::create_render_texture(
+            device,
+            width,
+            height,
+            multisample_count,
+            Self::HDR_TEXTURE_FORMAT,
+        )
+    }
+
+    /// Create a render texture with the specified format.
+    ///
+    /// In HDR mode this is `Rgba16Float`; in LDR mode it can be the
+    /// surface format (e.g. `Rgba8UnormSrgb`), halving bandwidth.
+    pub fn create_render_texture(
+        device: &wgpu::Device,
+        width: u32,
+        height: u32,
+        multisample_count: u32,
+        format: wgpu::TextureFormat,
+    ) -> Texture {
+        // * The render texture is what we render to in most cases
+        // * In HDR mode we also read from it to calculate bloom
+        // * In HDR mode we also write the bloom mix result back to it
+        // * We also read the texture in tests
         let usage = wgpu::TextureUsages::RENDER_ATTACHMENT
             | wgpu::TextureUsages::TEXTURE_BINDING
             | wgpu::TextureUsages::COPY_DST
             | wgpu::TextureUsages::COPY_SRC;
         let texture = Arc::new(device.create_texture(&wgpu::TextureDescriptor {
-            label: Some("hdr"),
+            label: Some("render"),
             size: wgpu::Extent3d {
                 width,
                 height,
@@ -696,7 +716,7 @@ impl Texture {
             mip_level_count: 1,
             sample_count: multisample_count,
             dimension: wgpu::TextureDimension::D2,
-            format: Self::HDR_TEXTURE_FORMAT,
+            format,
             usage,
             view_formats: &[],
         }));
diff --git a/crates/renderling/src/ui/cpu.rs b/crates/renderling/src/ui/cpu.rs
index 4e224382..f3cddbd8 100644
--- a/crates/renderling/src/ui/cpu.rs
+++ b/crates/renderling/src/ui/cpu.rs
@@ -143,12 +143,13 @@ pub struct Ui {
 impl Ui {
     pub fn new(ctx: &Context) -> Self {
         let UVec2 { x, y } = ctx.get_size();
+        let msaa = ctx.get_default_msaa_sample_count();
         let stage = ctx
             .new_stage()
             .with_background_color(Vec4::ONE)
             .with_lighting(false)
             .with_bloom(false)
-            .with_msaa_sample_count(4)
+            .with_msaa_sample_count(msaa)
             .with_frustum_culling(false);
         let (proj, view) = crate::camera::default_ortho2d(x as f32, y as f32);
         let camera = stage.new_camera().with_projection_and_view(proj, view);