darktable-org · TurboGit · Mar 24, 2026 · Mar 21, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/data/darktableconfig.xml.in b/data/darktableconfig.xml.in
@@ -3558,11 +3558,25 @@
     <longdescription>radius of structuring element for morphological open+close cleanup. removes small protrusions and fills small holes in the mask. 0 = disabled, 1-3 = typical values.</longdescription>
   </dtconfig>
   <dtconfig>
-    <name>plugins/darkroom/masks/object/edge_refine</name>
-    <type min="0.0" max="0.5">float</type>
-    <default>0.2</default>
-    <shortdescription>AI mask edge refinement</shortdescription>
-    <longdescription>strength of edge-aware threshold boost. near strong image edges the binarization threshold is raised, snapping the mask boundary to object edges. 0 = disabled, 0.1-0.3 = typical values.</longdescription>
+    <name>plugins/darkroom/masks/object/guided_radius</name>
+    <type min="0" max="20">int</type>
+    <default>5</default>
+    <shortdescription>AI mask guided filter radius</shortdescription>
+    <longdescription>radius of the guided filter used to snap the mask boundary to image edges. larger values produce smoother boundaries. 0 = disabled.</longdescription>
+  </dtconfig>
+  <dtconfig>
+    <name>plugins/darkroom/masks/object/guided_eps</name>
+    <type min="0.001" max="1.0">float</type>
+    <default>0.01</default>
+    <shortdescription>AI mask guided filter edge sensitivity</shortdescription>
+    <longdescription>edge sensitivity for the guided filter. smaller values preserve finer edges. 0.001 = very sharp, 0.1 = soft, 1.0 = nearly no edge preservation.</longdescription>
+  </dtconfig>
+  <dtconfig>
+    <name>plugins/darkroom/masks/object/render_size</name>
+    <type min="1024">int</type>
+    <default>1024</default>
+    <shortdescription>AI mask render resolution</shortdescription>
+    <longdescription>target resolution (longest side in pixels) for rendering the image before AI mask encoding. higher values improve edge accuracy but increase processing time. the AI encoder always works at 1024px internally.</longdescription>
   </dtconfig>
   <dtconfig>
     <name>plugins/darkroom/masks/object/brush_size</name>

diff --git a/src/common/ai/segmentation.c b/src/common/ai/segmentation.c
@@ -557,14 +557,20 @@ void dt_seg_warmup_decoder(dt_seg_context_t *ctx)
     {
       int64_t iou_shape[2] = {1, nm};
       int64_t lr_shape[4] = {1, nm, pm_dim, pm_dim};
+      const int dec_outputs = dt_ai_get_output_count(ctx->decoder);
 
       outputs[0] = (dt_ai_tensor_t){
         .data = masks, .type = DT_AI_FLOAT, .shape = masks_shape, .ndim = 4};
       outputs[1] = (dt_ai_tensor_t){
         .data = iou_buf, .type = DT_AI_FLOAT, .shape = iou_shape, .ndim = 2};
-      outputs[2] = (dt_ai_tensor_t){
-        .data = low_res, .type = DT_AI_FLOAT, .shape = lr_shape, .ndim = 4};
-      n_out = 3;
+      n_out = 2;
+      // low_res_masks output is optional (absent in 256x256 decoders)
+      if(dec_outputs >= 3)
+      {
+        outputs[2] = (dt_ai_tensor_t){
+          .data = low_res, .type = DT_AI_FLOAT, .shape = lr_shape, .ndim = 4};
+        n_out = 3;
+      }
     }
     else
     {
@@ -825,27 +831,33 @@ float *dt_seg_compute_mask(dt_seg_context_t *ctx,
 
   if(is_sam)
   {
-    // SAM: 3 outputs -- masks [1,N,H,W], iou [1,N], low_res [1,N,pm_dim,pm_dim]
-    const size_t low_res_per = (size_t)pm_dim * pm_dim;
-    low_res = g_try_malloc((size_t)nm * low_res_per * sizeof(float));
-    if(!low_res)
-    {
-      g_free(point_coords);
-      g_free(point_labels);
-      g_free(masks);
-      return NULL;
-    }
-
+    // SAM: masks [1,N,H,W] + iou [1,N], optionally low_res [1,N,pm,pm]
     int64_t iou_shape[2] = {1, nm};
-    int64_t low_res_shape[4] = {1, nm, pm_dim, pm_dim};
+    const int dec_out_count = dt_ai_get_output_count(ctx->decoder);
 
     dec_outputs[0] = (dt_ai_tensor_t){
       .data = masks, .type = DT_AI_FLOAT, .shape = masks_shape, .ndim = 4};
     dec_outputs[1] = (dt_ai_tensor_t){
       .data = iou_pred, .type = DT_AI_FLOAT, .shape = iou_shape, .ndim = 2};
-    dec_outputs[2] = (dt_ai_tensor_t){
-      .data = low_res, .type = DT_AI_FLOAT, .shape = low_res_shape, .ndim = 4};
-    n_dec_out = 3;
+    n_dec_out = 2;
+
+    // low_res_masks output is optional (absent in 256x256 decoders)
+    if(dec_out_count >= 3)
+    {
+      const size_t low_res_per = (size_t)pm_dim * pm_dim;
+      low_res = g_try_malloc((size_t)nm * low_res_per * sizeof(float));
+      if(!low_res)
+      {
+        g_free(point_coords);
+        g_free(point_labels);
+        g_free(masks);
+        return NULL;
+      }
+      int64_t low_res_shape[4] = {1, nm, pm_dim, pm_dim};
+      dec_outputs[2] = (dt_ai_tensor_t){
+        .data = low_res, .type = DT_AI_FLOAT, .shape = low_res_shape, .ndim = 4};
+      n_dec_out = 3;
+    }
   }
   else
   {
@@ -897,11 +909,21 @@ float *dt_seg_compute_mask(dt_seg_context_t *ctx,
              "[segmentation] mask computed (%.3fs), best=%d/%d IoU=%.3f",
              dec_elapsed, best, nm, iou_pred[best]);
 
-    // cache the best low-res mask for iterative refinement
-    const size_t low_res_per = (size_t)pm_dim * pm_dim;
-    memcpy(ctx->prev_mask, low_res + (size_t)best * low_res_per,
-           low_res_per * sizeof(float));
-    g_free(low_res);
+    // cache the best mask for iterative refinement
+    if(low_res)
+    {
+      // use dedicated low_res output (1024x1024 decoder)
+      const size_t low_res_per = (size_t)pm_dim * pm_dim;
+      memcpy(ctx->prev_mask, low_res + (size_t)best * low_res_per,
+             low_res_per * sizeof(float));
+      g_free(low_res);
+    }
+    else
+    {
+      // masks output is already at prev_mask resolution (256x256 decoder)
+      memcpy(ctx->prev_mask, masks + (size_t)best * per_mask,
+             per_mask * sizeof(float));
+    }
   }
   else
   {

diff --git a/src/develop/masks/object.c b/src/develop/masks/object.c
@@ -20,6 +20,7 @@
 #include "common/ai_models.h"
 #include "common/colorspaces.h"
 #include "common/debug.h"
+#include "common/guided_filter.h"
 #include "common/mipmap_cache.h"
 #include "common/ras2vect.h"
 #include "control/conf.h"
@@ -40,16 +41,21 @@
 #define CONF_OBJECT_THRESHOLD_KEY "plugins/darkroom/masks/object/threshold"
 #define CONF_OBJECT_REFINE_KEY "plugins/darkroom/masks/object/refine_passes"
 #define CONF_OBJECT_MORPH_KEY "plugins/darkroom/masks/object/morph_radius"
-#define CONF_OBJECT_EDGE_REFINE_KEY "plugins/darkroom/masks/object/edge_refine"
+#define CONF_OBJECT_GUIDED_RADIUS_KEY "plugins/darkroom/masks/object/guided_radius"
+#define CONF_OBJECT_GUIDED_EPS_KEY "plugins/darkroom/masks/object/guided_eps"
 #define CONF_OBJECT_CLEANUP_KEY "plugins/darkroom/masks/object/cleanup"
 #define CONF_OBJECT_SMOOTHING_KEY "plugins/darkroom/masks/object/smoothing"
 #define CONF_OBJECT_FEATHER_KEY "plugins/darkroom/masks/object/feather"
 #define CONF_OBJECT_PERSIST_KEY "plugins/darkroom/masks/object/persist_model"
+#define CONF_OBJECT_PATH_PREVIEW_KEY "plugins/darkroom/masks/object/path_preview"
 
-// target resolution for segmentation encoding (longest side in pixels),
-// matches the encoder input size (1024) -- rendering higher just to
-// downscale in preprocessing wastes pipeline time with no quality gain
-#define SEG_ENCODE_TARGET 1024
+// default render target (longest side in pixels).
+// the SAM encoder internally downscales to 1024 so encoding quality
+// is the same, but higher render resolution gives the guided filter
+// and vectorizer more detail for edge refinement.
+// configurable via plugins/darkroom/masks/object/render_size
+#define SEG_RENDER_DEFAULT 1024
+#define CONF_OBJECT_RENDER_SIZE_KEY "plugins/darkroom/masks/object/render_size"
 
 // --- per-session segmentation state (stored in gui->scratchpad) ---
 
@@ -309,8 +315,11 @@ static gpointer _encode_thread_func(gpointer data)
   dt_dev_pixelpipe_get_dimensions(&pipe, &dev, pipe.iwidth, pipe.iheight,
                                   &pipe.processed_width, &pipe.processed_height);
 
-  const double scale = fmin((double)SEG_ENCODE_TARGET / (double)pipe.processed_width,
-                            (double)SEG_ENCODE_TARGET / (double)pipe.processed_height);
+  const int render_target = dt_conf_key_exists(CONF_OBJECT_RENDER_SIZE_KEY)
+    ? MAX(dt_conf_get_int(CONF_OBJECT_RENDER_SIZE_KEY), 1024)
+    : SEG_RENDER_DEFAULT;
+  const double scale = fmin((double)render_target / (double)pipe.processed_width,
+                            (double)render_target / (double)pipe.processed_height);
   const double final_scale = fmin(scale, 1.0); // don't upscale
   const int out_w = (int)(final_scale * pipe.processed_width);
   const int out_h = (int)(final_scale * pipe.processed_height);
@@ -599,73 +608,51 @@ static void _morph_open_close(float *mask, int w, int h, float threshold, int ra
   g_free(tmp);
 }
 
-// edge-aware threshold refinement: near strong image edges the binarization
-// threshold is raised by up to edge_boost, snapping the mask boundary to
-// actual object contours - uses Scharr gradient of the stored RGB image
-static void _edge_refine_threshold(float *mask, int mw, int mh,
-                                    const uint8_t *rgb, int rgb_w, int rgb_h,
-                                    float base_threshold, float edge_boost)
+// edge-aware mask refinement using guided filter: smooths the mask in
+// flat regions while preserving sharp transitions at image edges.
+// the stored RGB image is used as the guide
+static void _guided_filter_refine(float *mask,
+                                  const int mw,
+                                  const int mh,
+                                  const uint8_t *rgb,
+                                  const int rgb_w,
+                                  const int rgb_h,
+                                  const int radius,
+                                  const float sqrt_eps)
 {
-  if(edge_boost <= 0.0f || !rgb || rgb_w < 3 || rgb_h < 3)
+  if(!rgb || rgb_w < 3 || rgb_h < 3)
     return;
   if(mw != rgb_w || mh != rgb_h)
     return;
 
   const size_t npix = (size_t)mw * mh;
 
-  // step 1: convert uint8 RGB to float luminance (Rec.601)
-  float *lum = g_try_malloc(npix * sizeof(float));
-  if(!lum) return;
+  // convert uint8 RGB to float RGBA guide (guided_filter expects 4ch)
+  float *guide = dt_alloc_align_float(npix * 4);
+  if(!guide) return;
 
   for(size_t i = 0; i < npix; i++)
-    lum[i] = (0.299f * (float)rgb[i * 3]
-            + 0.587f * (float)rgb[i * 3 + 1]
-            + 0.114f * (float)rgb[i * 3 + 2]) / 255.0f;
-
-  // step 2: compute Scharr gradient magnitude, track max for normalization
-  float *grad = g_try_malloc(npix * sizeof(float));
-  if(!grad)
   {
-    g_free(lum);
-    return;
+    guide[i * 4 + 0] = (float)rgb[i * 3 + 0] / 255.0f;
+    guide[i * 4 + 1] = (float)rgb[i * 3 + 1] / 255.0f;
+    guide[i * 4 + 2] = (float)rgb[i * 3 + 2] / 255.0f;
+    guide[i * 4 + 3] = 0.0f;
   }
 
-  float grad_max = 0.0f;
-
-  for(int y = 0; y < mh; y++)
+  // run guided filter: smooths mask but preserves edges from the guide
+  float *mask_bak = dt_alloc_align_float(npix);
+  if(!mask_bak)
   {
-    for(int x = 0; x < mw; x++)
-    {
-      float g = 0.0f;
-      if(y >= 1 && y < mh - 1 && x >= 1 && x < mw - 1)
-      {
-        const float *p = &lum[y * mw + x];
-        const float gx = (47.0f / 255.0f) * (p[-mw - 1] - p[-mw + 1]
-                                             + p[mw - 1] - p[mw + 1])
-                        + (162.0f / 255.0f) * (p[-1] - p[1]);
-        const float gy = (47.0f / 255.0f) * (p[-mw - 1] - p[mw - 1]
-                                             + p[-mw + 1] - p[mw + 1])
-                        + (162.0f / 255.0f) * (p[-mw] - p[mw]);
-        g = sqrtf(gx * gx + gy * gy);
-      }
-      grad[y * mw + x] = g;
-      if(g > grad_max) grad_max = g;
-    }
+    dt_free_align(guide);
+    return;
   }
 
-  g_free(lum);
+  memcpy(mask_bak, mask, npix * sizeof(float));
+  guided_filter(guide, mask_bak, mask, mw, mh, 4,
+                radius, sqrt_eps, 1.0f, 0.0f, 1.0f);
 
-  // step 3: normalize and apply spatially-varying threshold
-  const float inv_max = (grad_max > 1e-6f) ? 1.0f / grad_max : 0.0f;
-
-  for(size_t i = 0; i < npix; i++)
-  {
-    const float g_norm = grad[i] * inv_max;
-    const float effective_thresh = base_threshold + edge_boost * g_norm;
-    mask[i] = (mask[i] > effective_thresh) ? 1.0f : 0.0f;
-  }
-
-  g_free(grad);
+  dt_free_align(mask_bak);
+  dt_free_align(guide);
 }
 
 // run the decoder with accumulated points and update the cached mask
@@ -741,12 +728,13 @@ static void _run_decoder(dt_masks_form_gui_t *gui)
     seed_y = CLAMP(seed_y, 0, mh - 1);
     const float threshold = CLAMP(dt_conf_get_float(CONF_OBJECT_THRESHOLD_KEY), 0.3f, 0.9f);
 
-    // edge-aware threshold refinement: snap mask boundary to image edges
-    const float edge_boost = CLAMP(dt_conf_get_float(CONF_OBJECT_EDGE_REFINE_KEY), 0.0f, 0.5f);
-    if(edge_boost > 0.0f && d->encode_rgb)
-      _edge_refine_threshold(mask, mw, mh,
-                             d->encode_rgb, d->encode_rgb_w, d->encode_rgb_h,
-                             threshold, edge_boost);
+    // guided filter edge refinement: snap mask boundary to image edges
+    const int gf_radius = CLAMP(dt_conf_get_int(CONF_OBJECT_GUIDED_RADIUS_KEY), 0, 20);
+    const float gf_eps = CLAMP(dt_conf_get_float(CONF_OBJECT_GUIDED_EPS_KEY), 0.001f, 1.0f);
+    if(gf_radius > 0 && d->encode_rgb)
+      _guided_filter_refine(mask, mw, mh,
+                            d->encode_rgb, d->encode_rgb_w, d->encode_rgb_h,
+                            gf_radius, sqrtf(gf_eps));
 
     _keep_seed_component(mask, mw, mh, threshold, seed_x, seed_y);
 
@@ -770,6 +758,11 @@ static void _update_preview(_object_data_t *d)
   if(!d->mask || d->mask_w <= 0 || d->mask_h <= 0)
     return;
 
+  // skip vectorization when path preview is disabled
+  if(dt_conf_key_exists(CONF_OBJECT_PATH_PREVIEW_KEY)
+     && !dt_conf_get_bool(CONF_OBJECT_PATH_PREVIEW_KEY))
+    return;
+
   const size_t n = (size_t)d->mask_w * d->mask_h;
   float *inv_mask = g_try_malloc(n * sizeof(float));
   if(!inv_mask) return;