diff --git a/pkg/inference/backends/llamacpp/llamacpp.go b/pkg/inference/backends/llamacpp/llamacpp.go
index 3bccff740..f5ffef771 100644
--- a/pkg/inference/backends/llamacpp/llamacpp.go
+++ b/pkg/inference/backends/llamacpp/llamacpp.go
@@ -181,10 +181,10 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, _ string, mode
 		if draftPath != "" {
 			args = append(args, "--model-draft", draftPath)
 			if config.Speculative.NumTokens > 0 {
-				args = append(args, "--draft-max", strconv.Itoa(config.Speculative.NumTokens))
+				args = append(args, "--spec-draft-n-max", strconv.Itoa(config.Speculative.NumTokens))
 			}
 			if config.Speculative.MinAcceptanceRate > 0 {
-				args = append(args, "--draft-p-min", strconv.FormatFloat(config.Speculative.MinAcceptanceRate, 'f', 2, 64))
+				args = append(args, "--spec-draft-p-min", strconv.FormatFloat(config.Speculative.MinAcceptanceRate, 'f', 2, 64))
 			}
 		}
 	}
diff --git a/pkg/inference/runtime_flags_allowlist.go b/pkg/inference/runtime_flags_allowlist.go
index 3b47a43ac..3b7d40ea2 100644
--- a/pkg/inference/runtime_flags_allowlist.go
+++ b/pkg/inference/runtime_flags_allowlist.go
@@ -114,14 +114,46 @@ var LlamaCppAllowedFlags = map[string]bool{
 	"--spm-infill":       true,
 
 	// Speculative decoding (safe flags only - no file paths)
-	"--draft": true, "--draft-n": true, "--draft-max": true,
-	"--draft-min": true, "--draft-n-min": true,
-	"--draft-p-min": true,
-	"-cd":           true, "--ctx-size-draft": true,
-	"-devd": true, "--device-draft": true,
-	"-ngld": true, "--gpu-layers-draft": true, "--n-gpu-layers-draft": true,
-	"-td": true, "--threads-draft": true,
-	"-tbd": true, "--threads-batch-draft": true,
+	// Flags prefixed with --spec-draft-* are the canonical llama.cpp v1.5+ names.
+	// Short aliases (e.g. -td) and legacy unprefixed variants (e.g. --threads-draft)
+	// are kept for backward compatibility with older llama.cpp versions.
+	"--spec-draft-threads": true, "-td": true, "--threads-draft": true,
+	"--spec-draft-threads-batch": true, "-tbd": true, "--threads-batch-draft": true,
+	"--spec-draft-cpu-mask": true, "-Cd": true, "--cpu-mask-draft": true,
+	"--spec-draft-cpu-range": true, "-Crd": true, "--cpu-range-draft": true,
+	"--spec-draft-cpu-strict": true, "--cpu-strict-draft": true,
+	"--spec-draft-prio": true, "--prio-draft": true,
+	"--spec-draft-poll": true, "--poll-draft": true,
+	"--spec-draft-cpu-mask-batch": true, "-Cbd": true, "--cpu-mask-batch-draft": true,
+	"--spec-draft-cpu-strict-batch": true, "--cpu-strict-batch-draft": true,
+	"--spec-draft-prio-batch": true, "--prio-batch-draft": true,
+	"--spec-draft-poll-batch": true, "--poll-batch-draft": true,
+	"--spec-draft-override-tensor": true, "-otd": true, "--override-tensor-draft": true,
+	"--spec-draft-cpu-moe": true, "-cmoed": true, "--cpu-moe-draft": true,
+	"--spec-draft-n-cpu-moe": true, "--spec-draft-ncmoe": true, "-ncmoed": true, "--n-cpu-moe-draft": true,
+	"--spec-draft-n-max": true, "--draft-n-max": true, "--draft-max": true,
+	"--spec-draft-n-min": true, "--draft-n-min": true, "--draft-min": true,
+	"--spec-draft-p-split": true, "--draft-p-split": true,
+	"--spec-draft-p-min": true, "--draft-p-min": true,
+	"--spec-draft-backend-sampling": true, "--no-spec-draft-backend-sampling": true,
+	"--spec-draft-device": true, "-devd": true, "--device-draft": true,
+	"--spec-draft-ngl": true, "-ngld": true, "--gpu-layers-draft": true, "--n-gpu-layers-draft": true,
+	"--spec-type": true,
+	"--spec-ngram-mod-n-min": true,
+	"--spec-ngram-mod-n-max": true,
+	"--spec-ngram-mod-n-match": true,
+	"--spec-ngram-simple-size-n": true,
+	"--spec-ngram-simple-size-m": true,
+	"--spec-ngram-simple-min-hits": true,
+	"--spec-ngram-map-k-size-n": true,
+	"--spec-ngram-map-k-size-m": true,
+	"--spec-ngram-map-k-min-hits": true,
+	"--spec-ngram-map-k4v-size-n": true,
+	"--spec-ngram-map-k4v-size-m": true,
+	"--spec-ngram-map-k4v-min-hits": true,
+	"--spec-draft-type-k": true, "-ctkd": true, "--cache-type-k-draft": true,
+	"--spec-draft-type-v": true, "-ctvd": true, "--cache-type-v-draft": true,
+	"-cd": true, "--ctx-size-draft": true,
 
 	// LoRA (safe flags only - no file paths)
 	"--lora-init-without-apply": true,
diff --git a/pkg/inference/runtime_flags_allowlist_test.go b/pkg/inference/runtime_flags_allowlist_test.go
index 6384ea433..a73ec3890 100644
--- a/pkg/inference/runtime_flags_allowlist_test.go
+++ b/pkg/inference/runtime_flags_allowlist_test.go
@@ -162,9 +162,16 @@ func TestLlamaCppAllowedFlags_Categories(t *testing.T) {
 			"--metrics", "--no-metrics", "--jinja", "--no-jinja",
 		},
 		"speculative": {
-			"--draft", "--draft-max", "--draft-min",
+			"--spec-draft-n-max", "--draft-n-max", "--draft-max", "--spec-draft-n-min", "--draft-n-min", "--draft-min",
+			"--spec-draft-p-min", "--draft-p-min",
+			"--spec-draft-p-split", "--draft-p-split",
+			"--spec-draft-threads", "-td", "--threads-draft",
+			"--spec-draft-ngl", "-ngld", "--gpu-layers-draft", "--n-gpu-layers-draft",
+			"--spec-draft-device", "-devd", "--device-draft",
+			"--spec-type",
 			"-cd", "--ctx-size-draft",
-			"-ngld", "--gpu-layers-draft",
+			"--spec-draft-type-k", "-ctkd", "--cache-type-k-draft",
+			"--spec-draft-type-v", "-ctvd", "--cache-type-v-draft",
 		},
 	}