diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 0ec9bcf589f7..fcab568fb442 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -1828,6 +1828,98 @@ jobs:
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
             ubuntu-version: '2404'
+          # llama-cpp-tq (TurboQuant fork)
+          - build-type: ''
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-cpu-llama-cpp-tq'
+            runs-on: 'bigger-runner'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "8"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp-tq'
+            runs-on: 'bigger-runner'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp-tq'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "13"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'false'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-cuda-13-arm64-llama-cpp-tq'
+            base-image: "ubuntu:24.04"
+            runs-on: 'ubuntu-24.04-arm'
+            ubuntu-version: '2404'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+          - build-type: 'hipblas'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-rocm-hipblas-llama-cpp-tq'
+            runs-on: 'ubuntu-latest'
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
+            skip-drivers: 'false'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/arm64'
+            skip-drivers: 'false'
+            tag-latest: 'auto'
+            tag-suffix: '-nvidia-l4t-arm64-llama-cpp-tq'
+            base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
+            runs-on: 'ubuntu-24.04-arm'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2204'
+          - build-type: 'vulkan'
+            cuda-major-version: ""
+            cuda-minor-version: ""
+            platforms: 'linux/amd64,linux/arm64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-vulkan-llama-cpp-tq'
+            runs-on: 'bigger-runner'
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "llama-cpp-tq"
+            dockerfile: "./backend/Dockerfile.llama-cpp"
+            context: "./"
+            ubuntu-version: '2404'
           # Stablediffusion-ggml
           - build-type: ''
             cuda-major-version: ""
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
index 49e489beb00b..bd36e6646d7f 100644
--- a/.github/workflows/bump_deps.yaml
+++ b/.github/workflows/bump_deps.yaml
@@ -14,6 +14,11 @@ jobs:
             variable: "LLAMA_VERSION"
             branch: "master"
             file: "backend/cpp/llama-cpp/Makefile"
+          - repository: "TheTom/llama-cpp-turboquant"
+            variable: "LLAMA_VERSION"
+            branch: "master"
+            file: "backend/cpp/llama-cpp-tq/Makefile"
+            branch_suffix: "-tq"
           - repository: "ggml-org/whisper.cpp"
             variable: "WHISPER_CPP_VERSION"
             branch: "master"
@@ -60,7 +65,7 @@ jobs:
           push-to-fork: ci-forks/LocalAI
           commit-message: ':arrow_up: Update ${{ matrix.repository }}'
           title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
-          branch: "update/${{ matrix.variable }}"
+          branch: "update/${{ matrix.variable }}${{ matrix.branch_suffix }}"
           body: ${{ steps.bump.outputs.message }}
           signoff: true
 
diff --git a/.gitignore b/.gitignore
index 25252eada349..11aca693bd32 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@ prepare-sources
 /backend/cpp/llama-cpp/llama.cpp
 /backend/cpp/llama-*
 !backend/cpp/llama-cpp
+!backend/cpp/llama-cpp-tq
 /backends
 /backend-images
 /result.yaml
diff --git a/Makefile b/Makefile
index 61bceb7e36fe..f3e6573a48e5 100644
--- a/Makefile
+++ b/Makefile
@@ -544,8 +544,9 @@ backend-images:
 	mkdir -p backend-images
 
 # Backend metadata: BACKEND_NAME | DOCKERFILE_TYPE | BUILD_CONTEXT | PROGRESS_FLAG | NEEDS_BACKEND_ARG
-# llama-cpp is special - uses llama-cpp Dockerfile and doesn't need BACKEND arg
+# llama-cpp and forks - use llama-cpp Dockerfile
 BACKEND_LLAMA_CPP = llama-cpp|llama-cpp|.|false|false
+BACKEND_LLAMA_CPP_TQ = llama-cpp-tq|llama-cpp|.|false|true
 
 # Golang backends
 BACKEND_PIPER = piper|golang|.|false|true
@@ -609,6 +610,7 @@ endef
 
 # Generate all docker-build targets
 $(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP)))
+$(eval $(call generate-docker-build-target,$(BACKEND_LLAMA_CPP_TQ)))
 $(eval $(call generate-docker-build-target,$(BACKEND_PIPER)))
 $(eval $(call generate-docker-build-target,$(BACKEND_LOCAL_STORE)))
 $(eval $(call generate-docker-build-target,$(BACKEND_HUGGINGFACE)))
diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp
index 3930d04d4aba..110b9bf04a5f 100644
--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -58,7 +58,9 @@ ARG CUDA_DOCKER_ARCH
 ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
 ARG CMAKE_ARGS
 ENV CMAKE_ARGS=${CMAKE_ARGS}
-ARG BACKEND=rerankers
+ARG BACKEND=llama-cpp
+ARG LLAMA_BACKEND_DIR=${BACKEND}
+ENV LLAMA_BACKEND_DIR=${LLAMA_BACKEND_DIR}
 ARG BUILD_TYPE
 ENV BUILD_TYPE=${BUILD_TYPE}
 ARG CUDA_MAJOR_VERSION
@@ -255,32 +257,27 @@ if [[ -n "${CUDA_DOCKER_ARCH:-}" ]]; then
   CUDA_ARCH_ESC="${CUDA_DOCKER_ARCH//;/\\;}"
   export CMAKE_ARGS="${CMAKE_ARGS:-} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH_ESC}"
   echo "CMAKE_ARGS(env) = ${CMAKE_ARGS}"
-  rm -rf /LocalAI/backend/cpp/llama-cpp-*-build
+  rm -rf /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR}-*-build
 fi
 
+cd /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR}
+
 if [ "${TARGETARCH}" = "arm64" ] || [ "${BUILD_TYPE}" = "hipblas" ]; then
-  cd /LocalAI/backend/cpp/llama-cpp
-  make llama-cpp-fallback
-  make llama-cpp-grpc
-  make llama-cpp-rpc-server
+  make ARCH=aarch64 build-variants
 else
-  cd /LocalAI/backend/cpp/llama-cpp
-  make llama-cpp-avx
-  make llama-cpp-avx2
-  make llama-cpp-avx512
-  make llama-cpp-fallback
-  make llama-cpp-grpc
-  make llama-cpp-rpc-server
+  make build-variants
 fi
 EOT
 
 
 # Copy libraries using a script to handle architecture differences
-RUN make -BC /LocalAI/backend/cpp/llama-cpp package
+RUN make -BC /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR} package
 
 
 FROM scratch
 
+ARG BACKEND=llama-cpp
+ARG LLAMA_BACKEND_DIR=${BACKEND}
 
 # Copy all available binaries (the build process only creates the appropriate ones for the target architecture)
-COPY --from=builder /LocalAI/backend/cpp/llama-cpp/package/. ./
+COPY --from=builder /LocalAI/backend/cpp/${LLAMA_BACKEND_DIR}/package/. ./
diff --git a/backend/cpp/llama-cpp-tq/Makefile b/backend/cpp/llama-cpp-tq/Makefile
new file mode 100644
index 000000000000..e2cb41471254
--- /dev/null
+++ b/backend/cpp/llama-cpp-tq/Makefile
@@ -0,0 +1,6 @@
+LLAMA_VERSION?=master
+LLAMA_REPO?=https://github.com/TheTom/llama-cpp-turboquant
+BACKEND_NAME?=llama-cpp-tq
+SHARED_DIR?=$(CURDIR)/../llama-cpp
+
+include ../llama-cpp/Makefile
diff --git a/backend/cpp/llama-cpp/CMakeLists.txt b/backend/cpp/llama-cpp/CMakeLists.txt
index 598461975532..d4965a2feb97 100644
--- a/backend/cpp/llama-cpp/CMakeLists.txt
+++ b/backend/cpp/llama-cpp/CMakeLists.txt
@@ -59,6 +59,11 @@ add_library(hw_grpc_proto
 
 add_executable(${TARGET} grpc-server.cpp json.hpp httplib.h)
 
+# Enable autoparser support if the header exists (not present in all llama.cpp forks)
+if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/chat-auto-parser.h")
+    target_compile_definitions(${TARGET} PRIVATE HAS_AUTOPARSER)
+endif()
+
 target_include_directories(${TARGET} PRIVATE ../llava)
 target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
 
diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile
index 9cbf69125e2d..67ad03aff1bc 100644
--- a/backend/cpp/llama-cpp/Makefile
+++ b/backend/cpp/llama-cpp/Makefile
@@ -1,6 +1,10 @@
 
 LLAMA_VERSION?=a1cfb645307edc61a89e41557f290f441043d3c2
 LLAMA_REPO?=https://github.com/ggerganov/llama.cpp
+BACKEND_NAME?=llama-cpp
+SHARED_DIR?=$(CURDIR)
+GRPC_SERVER_DIR?=tools/grpc-server
+SERVER_SOURCE_DIR?=tools/server
 
 CMAKE_ARGS?=
 BUILD_TYPE?=
@@ -67,6 +71,17 @@ ifeq ($(BUILD_TYPE),sycl_f32)
 		-DCMAKE_CXX_FLAGS="-fsycl"
 endif
 
+# Variants to build for each architecture (can be overridden by forks)
+X86_64_VARIANTS ?= llama-cpp-avx llama-cpp-avx2 llama-cpp-avx512 llama-cpp-fallback llama-cpp-grpc llama-cpp-rpc-server
+ARM64_VARIANTS ?= llama-cpp-fallback llama-cpp-grpc llama-cpp-rpc-server
+
+build-variants:
+ifeq ($(ARCH),aarch64)
+	@for v in $(ARM64_VARIANTS); do $(MAKE) $$v || exit 1; done
+else
+	@for v in $(X86_64_VARIANTS); do $(MAKE) $$v || exit 1; done
+endif
+
 INSTALLED_PACKAGES=$(CURDIR)/../grpc/installed_packages
 INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
 ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
@@ -90,42 +105,42 @@ else
 endif
 
 llama-cpp-avx2: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build purge
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx2-build
+	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx2-build purge
 	$(info ${GREEN}I llama-cpp build info:avx2${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx2-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx2-build/grpc-server llama-cpp-avx2
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="$(BACKEND_NAME)-avx2-build" build-llama-cpp-grpc-server
+	cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx2-build/grpc-server llama-cpp-avx2
 
 llama-cpp-avx512: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build purge
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx512-build
+	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx512-build purge
 	$(info ${GREEN}I llama-cpp build info:avx512${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-cpp-avx512-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx512-build/grpc-server llama-cpp-avx512
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="$(BACKEND_NAME)-avx512-build" build-llama-cpp-grpc-server
+	cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx512-build/grpc-server llama-cpp-avx512
 
 llama-cpp-avx: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build purge
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx-build
+	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx-build purge
 	$(info ${GREEN}I llama-cpp build info:avx${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="$(BACKEND_NAME)-avx-build" build-llama-cpp-grpc-server
+	cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-avx-build/grpc-server llama-cpp-avx
 
 llama-cpp-fallback: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build purge
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-fallback-build
+	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-fallback-build purge
 	$(info ${GREEN}I llama-cpp build info:fallback${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="$(BACKEND_NAME)-fallback-build" build-llama-cpp-grpc-server
+	cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-fallback-build/grpc-server llama-cpp-fallback
 
 llama-cpp-grpc: llama.cpp
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build
-	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build purge
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME) $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build
+	$(MAKE) -C $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build purge
 	$(info ${GREEN}I llama-cpp build info:grpc${RESET})
-	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server
-	cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc
+	CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="$(BACKEND_NAME)-grpc-build" build-llama-cpp-grpc-server
+	cp -rfv $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build/grpc-server llama-cpp-grpc
 
 llama-cpp-rpc-server: llama-cpp-grpc
-	cp -rf $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
+	cp -rf $(CURRENT_MAKEFILE_DIR)/../$(BACKEND_NAME)-grpc-build/llama.cpp/build/bin/rpc-server llama-cpp-rpc-server
 
 llama.cpp:
 	mkdir -p llama.cpp
@@ -133,30 +148,30 @@ llama.cpp:
 	git init && \
 	git remote add origin $(LLAMA_REPO)  && \
 	git fetch origin && \
-	git checkout -b build $(LLAMA_VERSION) && \
+	(git checkout -b build $(LLAMA_VERSION) || git checkout -b build origin/$(LLAMA_VERSION)) && \
 	git submodule update --init --recursive --depth 1 --single-branch
 
-llama.cpp/tools/grpc-server: llama.cpp
-	mkdir -p llama.cpp/tools/grpc-server
-	bash prepare.sh
+llama.cpp/$(GRPC_SERVER_DIR): llama.cpp
+	mkdir -p llama.cpp/$(GRPC_SERVER_DIR)
+	SHARED_DIR=$(SHARED_DIR) SERVER_SOURCE_DIR=$(SERVER_SOURCE_DIR) GRPC_SERVER_DIR=$(GRPC_SERVER_DIR) bash $(SHARED_DIR)/prepare.sh
 
 rebuild:
-	bash prepare.sh
+	SHARED_DIR=$(SHARED_DIR) SERVER_SOURCE_DIR=$(SERVER_SOURCE_DIR) GRPC_SERVER_DIR=$(GRPC_SERVER_DIR) bash $(SHARED_DIR)/prepare.sh
 	rm -rf grpc-server
 	$(MAKE) grpc-server
 
 package:
-	bash package.sh
+	bash $(SHARED_DIR)/package.sh
 
 purge:
 	rm -rf llama.cpp/build
-	rm -rf llama.cpp/tools/grpc-server
+	rm -rf llama.cpp/$(GRPC_SERVER_DIR)
 	rm -rf grpc-server
 
 clean: purge
 	rm -rf llama.cpp
 
-grpc-server: llama.cpp llama.cpp/tools/grpc-server
+grpc-server: llama.cpp llama.cpp/$(GRPC_SERVER_DIR)
 	@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
 ifneq (,$(findstring sycl,$(BUILD_TYPE)))
 	+bash -c "source $(ONEAPI_VARS); \
diff --git a/backend/cpp/llama-cpp/grpc-server.cpp b/backend/cpp/llama-cpp/grpc-server.cpp
index d9d5a5ca4a82..8f4383d1e97d 100644
--- a/backend/cpp/llama-cpp/grpc-server.cpp
+++ b/backend/cpp/llama-cpp/grpc-server.cpp
@@ -17,7 +17,9 @@
 #include "backend.pb.h"
 #include "backend.grpc.pb.h"
 #include "common.h"
+#ifdef HAS_AUTOPARSER
 #include "chat-auto-parser.h"
+#endif
 #include <getopt.h>
 #include <grpcpp/ext/proto_server_reflection_plugin.h>
 #include <grpcpp/grpcpp.h>
@@ -2665,6 +2667,7 @@ class BackendServiceImpl final : public backend::Backend::Service {
         
         response->set_rendered_template(rendered_template);
 
+#ifdef HAS_AUTOPARSER
         // Run differential template analysis to detect tool format markers
         if (params_base.use_jinja) {
             try {
@@ -2770,6 +2773,7 @@ class BackendServiceImpl final : public backend::Backend::Service {
                 SRV_WRN("ModelMetadata: failed to run autoparser analysis: %s\n", e.what());
             }
         }
+#endif
 
         return grpc::Status::OK;
     }
diff --git a/backend/cpp/llama-cpp/package.sh b/backend/cpp/llama-cpp/package.sh
index d1897e6bed5a..efa760255e30 100755
--- a/backend/cpp/llama-cpp/package.sh
+++ b/backend/cpp/llama-cpp/package.sh
@@ -5,14 +5,21 @@
 
 set -e
 
-CURDIR=$(dirname "$(realpath $0)")
-REPO_ROOT="${CURDIR}/../../.."
+# Use working directory (not script location) so forks that share this script work correctly
+CURDIR=$(pwd)
+SCRIPT_DIR=$(dirname "$(realpath $0)")
+REPO_ROOT="${SCRIPT_DIR}/../../.."
 
 # Create lib directory
 mkdir -p $CURDIR/package/lib
 
 cp -avrf $CURDIR/llama-cpp-* $CURDIR/package/
-cp -rfv $CURDIR/run.sh $CURDIR/package/
+# Copy run.sh — prefer local copy, fall back to shared dir (script location)
+if [ -f "$CURDIR/run.sh" ]; then
+    cp -rfv $CURDIR/run.sh $CURDIR/package/
+else
+    cp -rfv $SCRIPT_DIR/run.sh $CURDIR/package/
+fi
 
 # Detect architecture and copy appropriate libraries
 if [ -f "/lib64/ld-linux-x86-64.so.2" ]; then
diff --git a/backend/cpp/llama-cpp/prepare.sh b/backend/cpp/llama-cpp/prepare.sh
index f9b7e3dd2651..d3652fe8f535 100644
--- a/backend/cpp/llama-cpp/prepare.sh
+++ b/backend/cpp/llama-cpp/prepare.sh
@@ -1,31 +1,43 @@
 #!/bin/bash
 
-## Patches
+SHARED_DIR="${SHARED_DIR:-.}"
+SERVER_SOURCE_DIR="${SERVER_SOURCE_DIR:-tools/server}"
+GRPC_SERVER_DIR="${GRPC_SERVER_DIR:-tools/grpc-server}"
 
 ## Apply patches from the `patches` directory
 if [ -d "patches" ]; then
     for patch in $(ls patches); do
         echo "Applying patch $patch"
         patch -d llama.cpp/ -p1 < patches/$patch
-    done 
+    done
 fi
 
 set -e
 
-for file in $(ls llama.cpp/tools/server/); do
-    cp -rfv llama.cpp/tools/server/$file llama.cpp/tools/grpc-server/
+# Copy server source files into grpc-server build directory
+for file in $(ls llama.cpp/${SERVER_SOURCE_DIR}/); do
+    cp -rfv llama.cpp/${SERVER_SOURCE_DIR}/$file llama.cpp/${GRPC_SERVER_DIR}/
 done
 
-cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
-cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
-cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/tools/grpc-server/
-cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/tools/grpc-server/
+# Copy build files — prefer local overrides, fall back to SHARED_DIR
+for f in CMakeLists.txt grpc-server.cpp; do
+    if [ -f "$f" ]; then
+        cp -r "$f" llama.cpp/${GRPC_SERVER_DIR}/
+    else
+        cp -r "$SHARED_DIR/$f" llama.cpp/${GRPC_SERVER_DIR}/
+    fi
+done
+
+cp -rfv llama.cpp/vendor/nlohmann/json.hpp llama.cpp/${GRPC_SERVER_DIR}/
+cp -rfv llama.cpp/vendor/cpp-httplib/httplib.h llama.cpp/${GRPC_SERVER_DIR}/
+
+# Add grpc-server subdirectory to the parent CMakeLists.txt
+PARENT_CMAKELISTS="llama.cpp/$(dirname ${GRPC_SERVER_DIR})/CMakeLists.txt"
 
 set +e
-if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
+if grep -q "grpc-server" "$PARENT_CMAKELISTS"; then
     echo "grpc-server already added"
 else
-    echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
+    echo "add_subdirectory(grpc-server)" >> "$PARENT_CMAKELISTS"
 fi
 set -e
-
diff --git a/backend/index.yaml b/backend/index.yaml
index d94cb70be2f9..1335ef0fd369 100644
--- a/backend/index.yaml
+++ b/backend/index.yaml
@@ -29,6 +29,34 @@
     nvidia-cuda-12: "cuda12-llama-cpp"
     nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp"
     nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp"
+- &llamacpp_tq
+  name: "llama-cpp-tq"
+  alias: "llama-cpp-tq"
+  license: mit
+  description: |
+    TurboQuant llama.cpp fork - quantization research
+  urls:
+    - https://github.com/TheTom/llama-cpp-turboquant
+  tags:
+    - text-to-text
+    - LLM
+    - CPU
+    - GPU
+    - Metal
+    - CUDA
+    - HIP
+  capabilities:
+    default: "cpu-llama-cpp-tq"
+    nvidia: "cuda12-llama-cpp-tq"
+    intel: "intel-sycl-f16-llama-cpp-tq"
+    amd: "rocm-llama-cpp-tq"
+    metal: "metal-llama-cpp-tq"
+    vulkan: "vulkan-llama-cpp-tq"
+    nvidia-l4t: "nvidia-l4t-arm64-llama-cpp-tq"
+    nvidia-cuda-13: "cuda13-llama-cpp-tq"
+    nvidia-cuda-12: "cuda12-llama-cpp-tq"
+    nvidia-l4t-cuda-12: "nvidia-l4t-arm64-llama-cpp-tq"
+    nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-llama-cpp-tq"
 - &whispercpp
   name: "whisper"
   alias: "whisper"
@@ -1252,6 +1280,57 @@
   uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-llama-cpp"
   mirrors:
     - localai/localai-backends:master-gpu-nvidia-cuda-13-llama-cpp
+# llama-cpp-tq (TurboQuant) concrete backends
+- !!merge <<: *llamacpp_tq
+  name: "cpu-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-cpu-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "cuda12-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-12-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "cuda13-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-nvidia-cuda-13-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "rocm-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-rocm-hipblas-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "intel-sycl-f16-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-intel-sycl-f16-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "intel-sycl-f32-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-intel-sycl-f32-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "vulkan-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-vulkan-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-gpu-vulkan-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "metal-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-metal-darwin-arm64-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "nvidia-l4t-arm64-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-arm64-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-arm64-llama-cpp-tq
+- !!merge <<: *llamacpp_tq
+  name: "cuda13-nvidia-l4t-arm64-llama-cpp-tq"
+  uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp-tq"
+  mirrors:
+    - localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-llama-cpp-tq
 ## whisper
 - !!merge <<: *whispercpp
   name: "nvidia-l4t-arm64-whisper"