diff --git a/CMakeLists.txt b/CMakeLists.txt
index eb89465..45afbc5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,10 +1,6 @@
 cmake_minimum_required(VERSION 3.14)
 project(s2cpp LANGUAGES C CXX)
 
-if (WIN32)
-    add_definitions(-DWIN32_LEAN_AND_MEAN)
-endif()
-
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
@@ -15,6 +11,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 option(S2_VULKAN  "Build with Vulkan backend"  OFF)
 option(S2_CUDA    "Build with CUDA backend"    OFF)
 option(S2_METAL   "Build with Metal backend"   OFF)
+option(S2_BUILD_SHARED_LIBRARIES   "Compile S2 as a dynamic link library(dll) and a shared(.lib) library as well."   OFF)
 
 # ---------------------------------------------------------------------------
 # GGML (git submodule — not modified)
@@ -38,6 +35,7 @@ add_subdirectory(ggml)
 # s2 executable
 # ---------------------------------------------------------------------------
 set(S2_SOURCES
+    src/s2_export_api.cpp
     src/s2_audio.cpp
     src/s2_tokenizer.cpp
     src/s2_sampler.cpp
@@ -46,36 +44,98 @@ set(S2_SOURCES
     src/s2_prompt.cpp
     src/s2_generate.cpp
     src/s2_pipeline.cpp
-    src/s2_server.cpp
     src/main.cpp
 )
 
+if(S2_BUILD_SHARED_LIBRARIES)
 add_executable(s2 ${S2_SOURCES})
+add_library(s2_dll SHARED ${S2_SOURCES})
+add_library(s2_lib STATIC ${S2_SOURCES})
 
 target_include_directories(s2 PRIVATE
     ${CMAKE_CURRENT_SOURCE_DIR}/include
     ${CMAKE_CURRENT_SOURCE_DIR}/third_party
     ${CMAKE_CURRENT_SOURCE_DIR}/ggml/include
-    ${CMAKE_CURRENT_SOURCE_DIR}/ggml/src
+    ${CMAKE_CURRENT_SOURCE_DIR}/ggml/src          # for ggml-common.h etc.
+)
+target_include_directories(s2_dll PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/third_party
+    ${CMAKE_CURRENT_SOURCE_DIR}/ggml/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/ggml/src          # for ggml-common.h etc.
+)
+target_include_directories(s2_lib PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/third_party
+    ${CMAKE_CURRENT_SOURCE_DIR}/ggml/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/ggml/src          # for ggml-common.h etc.
 )
 
 target_link_libraries(s2 PRIVATE ggml)
+target_link_libraries(s2_dll PRIVATE ggml)
+target_link_libraries(s2_lib PRIVATE ggml)
+
 
+target_compile_definitions(s2_dll PRIVATE S2_LIBRARY)
+target_compile_definitions(s2_lib PRIVATE S2_LIBRARY)
 if(S2_VULKAN)
     target_compile_definitions(s2 PRIVATE GGML_USE_VULKAN)
+	target_compile_definitions(s2_dll PRIVATE GGML_USE_VULKAN)
+	target_compile_definitions(s2_lib PRIVATE GGML_USE_VULKAN)
 endif()
 if(S2_CUDA)
     target_compile_definitions(s2 PRIVATE GGML_USE_CUDA)
+	target_compile_definitions(s2_dll PRIVATE GGML_USE_CUDA)
+	target_compile_definitions(s2_lib PRIVATE GGML_USE_CUDA)
 endif()
 
+
 # Platform-specific
 if(UNIX AND NOT APPLE)
     target_link_libraries(s2 PRIVATE pthread m)
+	target_link_libraries(s2_dll PRIVATE pthread m)
+	target_link_libraries(s2_lib PRIVATE pthread m)
+endif()
+
+# Install
+install(TARGETS s2 RUNTIME DESTINATION bin)
+install(TARGETS s2_dll RUNTIME DESTINATION bin)
+install(TARGETS s2_lib RUNTIME DESTINATION bin)
+
+else()
+add_executable(s2 ${S2_SOURCES})
+
+target_include_directories(s2 PRIVATE
+    ${CMAKE_CURRENT_SOURCE_DIR}/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/third_party
+    ${CMAKE_CURRENT_SOURCE_DIR}/ggml/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/ggml/src          # for ggml-common.h etc.
+)
+
+target_link_libraries(s2 PRIVATE ggml)
+
+if(S2_VULKAN)
+    target_compile_definitions(s2 PRIVATE GGML_USE_VULKAN)
+endif()
+if(S2_CUDA)
+    target_compile_definitions(s2 PRIVATE GGML_USE_CUDA)
 endif()
 
-if(WIN32)
-    target_link_libraries(s2 PRIVATE ws2_32 wsock32)
+# Platform-specific
+if(UNIX AND NOT APPLE)
+    target_link_libraries(s2 PRIVATE pthread m)
 endif()
 
 # Install
 install(TARGETS s2 RUNTIME DESTINATION bin)
+
+endif()
+
+
+
+
+
+
+
+
+
diff --git a/include/s2_codec.h b/include/s2_codec.h
index a8bb480..c48e480 100644
--- a/include/s2_codec.h
+++ b/include/s2_codec.h
@@ -32,7 +32,7 @@ class AudioCodec {
     // Decode VQ codes to mono float32 audio.
     // codes: (num_codebooks, n_frames) flattened row-major.
     bool decode(const int32_t * codes, int32_t n_frames, int32_t n_threads,
-                std::vector<float> & audio_out);
+                std::vector<float> & audio_out, int32_t* audio_n_frames_out);
 
     int32_t sample_rate()     const { return sample_rate_; }
     int32_t hop_length()      const { return hop_length_; }
diff --git a/include/s2_config.h b/include/s2_config.h
new file mode 100644
index 0000000..6ec5c8d
--- /dev/null
+++ b/include/s2_config.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+
+#ifdef S2_LIBRARY
+#define S2_Export __declspec(dllexport)
+static bool SuppressNonEssentialVerbosity = true;
+#else
+#define S2_Export 
+static bool SuppressNonEssentialVerbosity = false;
+#endif
+#else
+
+// On Unix-like systems, check for GCC 4+ visibility support
+#if __GNUC__ >= 4
+#define S2_Export __attribute__((visibility("default")))
+#else
+#define S2_Export
+#endif
+#endif
\ No newline at end of file
diff --git a/include/s2_export_api.h b/include/s2_export_api.h
new file mode 100644
index 0000000..4064226
--- /dev/null
+++ b/include/s2_export_api.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include "s2_audio.h"
+#include "s2_codec.h"
+#include "s2_generate.h"
+#include "s2_model.h"
+#include "s2_tokenizer.h"
+#include "s2_pipeline.h"
+#include "s2_prompt.h"
+#include "s2_config.h"
+
+extern "C" 
+{
+	S2_Export s2::Pipeline* AllocS2Pipeline();
+	S2_Export void ReleaseS2Pipeline(s2::Pipeline* Pipeline);
+	S2_Export void SyncS2TokenizerConfigFromS2Model(s2::SlowARModel* Model, s2::Tokenizer* Tokenizer);
+	S2_Export int InitializeS2Pipeline(s2::Pipeline* Pipeline, s2::Tokenizer* Tokenizer, s2::SlowARModel* Model, s2::AudioCodec* AudioCodec);
+
+	S2_Export s2::GenerateParams* AllocS2GenerateParams();
+	S2_Export void ReleaseS2GenerateParams(s2::GenerateParams* GenerateParams);
+	S2_Export int InitializeS2GenerateParams(s2::GenerateParams* GenerateParams, int32_t max_new_tokens = -1, float temperature = -1, float top_p = -1, int32_t top_k = -1, int32_t min_tokens_before_end = -1, int32_t n_threads = -1, int verbose = -1);
+
+	S2_Export s2::SlowARModel* AllocS2Model();
+	S2_Export void ReleaseS2Model(s2::SlowARModel* Model);
+	S2_Export int InitializeS2Model(s2::SlowARModel* Model, const char* gguf_path, int32_t gpu_device, int32_t backend_type);
+
+	S2_Export s2::Tokenizer* AllocS2Tokenizer();
+	S2_Export void ReleaseS2Tokenizer(s2::Tokenizer* Tokenizer);
+	S2_Export int InitializeS2Tokenizer(s2::Tokenizer* Tokenizer, const char* path);
+
+	S2_Export s2::AudioCodec* AllocS2AudioCodec();
+	S2_Export void ReleaseS2AudioCodec(s2::AudioCodec* AudioCodec);
+	S2_Export int InitializeS2AudioCodec(s2::AudioCodec* AudioCodec, const char* gguf_path, int32_t gpu_device, int32_t backend_type);
+
+	S2_Export std::vector<int32_t>* AllocS2AudioPromptCodes();
+	S2_Export void ReleaseS2AudioPromptCodes(std::vector<int32_t>* AudioPromptCodes);
+	S2_Export int InitializeAudioPromptCodes(s2::Pipeline* Pipeline, int32_t ThreadCount, const char* ReferenceAudioPath, std::vector<int32_t>* AudioPromptCodes, int* TPrompt);
+
+	S2_Export std::vector<float>* AllocS2AudioBuffer(int InitialSize);
+	S2_Export void ReleaseS2AudioBuffer(std::vector<float>* AudioBuffer);
+	S2_Export float* GetS2AudioBufferDataPointer(std::vector<float>* AudioBuffer);
+
+	S2_Export int S2Synthesize(s2::Pipeline* Pipeline, const s2::GenerateParams* GenerateParams, std::vector<float>* AudioBuffer, std::vector<int32_t>* ReferenceAudioPromptCodes, int32_t* ReferenceAudioTPrompt, const char* ReferenceAudioPath, const char* ReferenceAudioTranscript, const char* TextToInfer, const char* OutputAudioPath, int32_t* AudioBufferOutputLength);
+}
\ No newline at end of file
diff --git a/include/s2_pipeline.h b/include/s2_pipeline.h
index 9e1bb31..ed43efd 100644
--- a/include/s2_pipeline.h
+++ b/include/s2_pipeline.h
@@ -36,9 +36,9 @@ class Pipeline {
     bool synthesize(const PipelineParams & params);
 
     bool synthesize_to_memory(const PipelineParams & params, void** ref_audio_buffer, size_t* ref_audio_size, void** wav_buffer, size_t* wav_size);
-    bool synthesize_raw(const PipelineParams & params, AudioData & ref_audio, std::vector<float> & audio_out);
+    bool synthesize_raw(const PipelineParams & params, AudioData & ref_audio, std::vector<float> & audio_out, int32_t* audio_out_length);
 
-private:
+//private:
     Tokenizer   tokenizer_;
     SlowARModel model_;
     AudioCodec  codec_;
diff --git a/src/s2_codec.cpp b/src/s2_codec.cpp
index 1713631..530866c 100755
--- a/src/s2_codec.cpp
+++ b/src/s2_codec.cpp
@@ -1028,7 +1028,7 @@ bool AudioCodec::encode(const float * audio, int32_t n_samples, int32_t n_thread
 // ---------------------------------------------------------------------------
 
 bool AudioCodec::decode(const int32_t * codes, int32_t n_frames, int32_t n_threads,
-                         std::vector<float> & audio_out) {
+                         std::vector<float> & audio_out, int32_t* audio_n_frames_out) {
     if (n_frames <= 0) return false;
 
     // Step 1: dequantize VQ codes to stage vector (n_frames, quantizer_input_dim)
@@ -1132,10 +1132,12 @@ bool AudioCodec::decode(const int32_t * codes, int32_t n_frames, int32_t n_threa
 
         // audio_t is (1, T) or (C, T) — we expect (1, T), take total elements
         const int32_t n_samples = static_cast<int32_t>(ggml_nelements(audio_t));
-        audio_out.resize(n_samples);
+        if(audio_out.size() < n_samples) { audio_out.resize(n_samples); } //Allows for pre-allocated audio buffers that don't need to resized each time.
         ggml_backend_tensor_get(audio_t, audio_out.data(), 0, n_samples * sizeof(float));
         ggml_gallocr_free(allocr);
         ggml_free(ctx);
+
+        *audio_n_frames_out = n_samples;
     }
     return true;
 }
diff --git a/src/s2_export_api.cpp b/src/s2_export_api.cpp
new file mode 100644
index 0000000..082a7d3
--- /dev/null
+++ b/src/s2_export_api.cpp
@@ -0,0 +1,202 @@
+#include "../include/s2_export_api.h"
+
+s2::Pipeline* AllocS2Pipeline()
+{
+	return new s2::Pipeline();
+}
+void ReleaseS2Pipeline(s2::Pipeline* Pipeline)
+{
+	delete Pipeline;
+}
+void SyncS2TokenizerConfigFromS2Model(s2::SlowARModel* Model, s2::Tokenizer* Tokenizer)
+{
+	const s2::ModelHParams & hp = Model->hparams();
+	s2::TokenizerConfig & tc    = Tokenizer->config();
+	if (hp.semantic_begin_id > 0) tc.semantic_begin_id = hp.semantic_begin_id;
+	if (hp.semantic_end_id   > 0) tc.semantic_end_id   = hp.semantic_end_id;
+	if (hp.num_codebooks     > 0) tc.num_codebooks     = hp.num_codebooks;
+	if (hp.codebook_size     > 0) tc.codebook_size     = hp.codebook_size;
+	if (hp.vocab_size        > 0) tc.vocab_size        = hp.vocab_size;
+}
+int InitializeS2Pipeline(s2::Pipeline* Pipeline, s2::Tokenizer* Tokenizer, s2::SlowARModel* Model, s2::AudioCodec* AudioCodec)
+{
+	if(!Pipeline->initialized_)
+	{
+		Pipeline->tokenizer_ = *Tokenizer;
+		Pipeline->model_ = *Model;
+		Pipeline->codec_ = *AudioCodec;
+		Pipeline->initialized_ = true;
+		return true;
+	}
+	return false;
+}
+
+s2::GenerateParams* AllocS2GenerateParams()
+{
+	return new s2::GenerateParams();
+}
+void ReleaseS2GenerateParams(s2::GenerateParams* GenerateParams)
+{
+	delete GenerateParams;
+}
+int InitializeS2GenerateParams(s2::GenerateParams* GenerateParams, int32_t max_new_tokens, float temperature, float top_p, int32_t top_k, int32_t min_tokens_before_end, int32_t n_threads, int verbose)
+{
+	GenerateParams->max_new_tokens = max_new_tokens >= 0 ? max_new_tokens : GenerateParams->max_new_tokens;
+	GenerateParams->temperature = temperature >= 0 ? temperature : GenerateParams->temperature;
+	GenerateParams->top_p = top_p >= 0 ? top_p : GenerateParams->top_p;
+	GenerateParams->top_k = top_k >= 0 ? top_k : GenerateParams->top_k;
+	GenerateParams->min_tokens_before_end = min_tokens_before_end >= 0 ? min_tokens_before_end : GenerateParams->min_tokens_before_end;
+	GenerateParams->n_threads = n_threads >= 0 ? n_threads : GenerateParams->n_threads;
+	GenerateParams->verbose = verbose >= 0 ? verbose : GenerateParams->verbose;
+	return true;
+}
+
+s2::SlowARModel* AllocS2Model()
+{
+	return new s2::SlowARModel();
+}
+void ReleaseS2Model(s2::SlowARModel* Model)
+{
+	delete Model;
+}
+int InitializeS2Model(s2::SlowARModel* Model, const char* gguf_path, int32_t gpu_device, int32_t backend_type)
+{
+	return Model->load(std::string(gguf_path), gpu_device, backend_type);
+}
+
+s2::Tokenizer* AllocS2Tokenizer()
+{
+	return new s2::Tokenizer();
+}
+void ReleaseS2Tokenizer(s2::Tokenizer* Tokenizer)
+{
+	delete Tokenizer;
+}
+int InitializeS2Tokenizer(s2::Tokenizer* Tokenizer, const char* path)
+{
+	return Tokenizer->load(std::string(path));
+}
+
+s2::AudioCodec* AllocS2AudioCodec()
+{
+	return new s2::AudioCodec();
+}
+void ReleaseS2AudioCodec(s2::AudioCodec* AudioCodec)
+{
+	delete AudioCodec;
+}
+int InitializeS2AudioCodec(s2::AudioCodec* AudioCodec, const char* gguf_path, int32_t gpu_device, int32_t backend_type)
+{
+	return AudioCodec->load(std::string(gguf_path), gpu_device, backend_type);
+}
+
+std::vector<int32_t>* AllocS2AudioPromptCodes()
+{
+	return new std::vector<int32_t>();
+}
+void ReleaseS2AudioPromptCodes(std::vector<int32_t>* AudioPromptCodes)
+{
+	delete AudioPromptCodes;
+}
+int InitializeAudioPromptCodes(s2::Pipeline* Pipeline, int32_t ThreadCount, const char* ReferenceAudioPath, std::vector<int32_t>* AudioPromptCodes, int* TPrompt)
+{
+	int ReturnCode = 1;
+	if(AudioPromptCodes->size() == 0)
+	{
+		if (ReferenceAudioPath != NULL) {
+			s2::AudioData ref_audio;
+			if (load_audio(std::string(ReferenceAudioPath), ref_audio, Pipeline->codec_.sample_rate())) {
+				if (!Pipeline->codec_.encode(ref_audio.samples.data(), (int32_t)ref_audio.samples.size(),
+					ThreadCount, *AudioPromptCodes, *TPrompt)) {
+					ReturnCode = -1; //Pipeline warning: encode failed, running without reference audio.
+					AudioPromptCodes->clear();
+					*TPrompt = 0;
+				}
+			} else {
+				ReturnCode = -2; //Pipeline warning: load_audio failed, running without reference audio.
+			}
+		}
+	}
+	return ReturnCode;
+}
+
+std::vector<float>* AllocS2AudioBuffer(int InitialSize)
+{
+	return InitialSize > 0 ? new std::vector<float>(InitialSize) : new std::vector<float>();
+}
+void ReleaseS2AudioBuffer(std::vector<float>* AudioBuffer)
+{
+	delete AudioBuffer;
+}
+float* GetS2AudioBufferDataPointer(std::vector<float>* AudioBuffer)
+{
+	return AudioBuffer->data();
+}
+
+int S2Synthesize(s2::Pipeline* Pipeline, const s2::GenerateParams* GenerateParams, std::vector<float>* AudioBuffer, std::vector<int32_t>* ReferenceAudioPromptCodes, int32_t* ReferenceAudioTPrompt, const char* ReferenceAudioPath, const char* ReferenceAudioTranscript, const char* TextToInfer, const char* OutputAudioPath, int32_t* AudioBufferOutputLength)
+{
+	if(Pipeline->initialized_)
+	{
+		const int32_t num_codebooks = Pipeline->model_.hparams().num_codebooks; int ReturnCode = true;
+
+		// 1. Audio Prompt Loading
+		// encode() returns codes in row-major (num_codebooks, T_prompt) format,
+		// matching the layout expected by build_prompt() (prompt_codes[c*T+t]).
+		if(ReferenceAudioPromptCodes->size() == 0)
+		{
+			if (ReferenceAudioPath != NULL) {
+				s2::AudioData ref_audio;
+				if (load_audio(std::string(ReferenceAudioPath), ref_audio, Pipeline->codec_.sample_rate())) {
+					if (!Pipeline->codec_.encode(ref_audio.samples.data(), (int32_t)ref_audio.samples.size(),
+						GenerateParams->n_threads, *ReferenceAudioPromptCodes, *ReferenceAudioTPrompt)) {
+						ReturnCode = -1; //Pipeline warning: encode failed, running without reference audio.
+						ReferenceAudioPromptCodes->clear();
+						*ReferenceAudioTPrompt = 0;
+					}
+				} else {
+					ReturnCode = -2; //Pipeline warning: load_audio failed, running without reference audio.
+				}
+			}
+		}
+
+		// 2. Build Prompt Tensor
+		// build_prompt expects prompt_codes as (num_codebooks, T_prompt) row-major,
+		// which is exactly the format produced by encode() above.
+		s2::PromptTensor prompt = s2::build_prompt(
+			Pipeline->tokenizer_, std::string(TextToInfer), std::string(ReferenceAudioTranscript),
+			ReferenceAudioPromptCodes->empty() ? nullptr : ReferenceAudioPromptCodes->data(),
+			num_codebooks, *ReferenceAudioTPrompt);
+
+		// 3. Setup KV Cache
+		int32_t max_seq_len = prompt.cols + GenerateParams->max_new_tokens;
+		if (!Pipeline->model_.init_kv_cache(max_seq_len)) {
+			return -3; //Pipeline error: init_kv_cache failed.
+		}
+
+		// 4. Generate
+		// generate() returns GenerateResult.codes in row-major (num_codebooks, n_frames).
+		s2::GenerateResult res = s2::generate(Pipeline->model_, Pipeline->tokenizer_.config(), prompt, *GenerateParams);
+		if (res.n_frames == 0) {
+			return -4; //Pipeline error: generation produced no frames.
+		}
+
+		// 5. Decode
+		// codec_.decode() receives codes in row-major (num_codebooks, n_frames),
+		// which matches GenerateResult.codes layout.
+		std::vector<float>* audio_out = AudioBuffer == NULL ? new std::vector<float>() : AudioBuffer; int32_t audio_n_frames_out = 0;
+		if (!Pipeline->codec_.decode(res.codes.data(), res.n_frames, GenerateParams->n_threads, *audio_out, audio_n_frames_out)) {
+			return -5; //Pipeline error: decode failed.
+		}
+		if(AudioBufferOutputLength != NULL) { *AudioBufferOutputLength = audio_n_frames_out; }
+
+		if(OutputAudioPath != NULL)
+		{
+			// 6. Save
+			if (!s2::save_audio(std::string(OutputAudioPath), *audio_out, Pipeline->codec_.sample_rate())) {
+				return -6; //Pipeline error: save_audio failed to + (params.output_path).
+			}
+		}
+		return ReturnCode;
+	}
+	return false;
+}
\ No newline at end of file
diff --git a/src/s2_generate.cpp b/src/s2_generate.cpp
index b96183b..05d7d7a 100755
--- a/src/s2_generate.cpp
+++ b/src/s2_generate.cpp
@@ -1,3 +1,4 @@
+#include "../include/s2_config.h"
 #include "../include/s2_generate.h"
 #include <iostream>
 #include <limits>
@@ -47,7 +48,7 @@ GenerateResult generate(
 
     StepResult state;
     if (params.verbose) {
-        std::cout << "[Generate] Prefilling " << prompt.cols << " tokens..." << std::endl;
+        if(!SuppressNonEssentialVerbosity) { std::cout << "[Generate] Prefilling " << prompt.cols << " tokens..." << std::endl; }
     }
     if (!model.prefill(prompt_tm, prompt.cols, params.n_threads, state)) {
         std::cerr << "[Generate] Prefill failed." << std::endl;
@@ -93,7 +94,7 @@ GenerateResult generate(
     const float ras_high_top_p    = 0.9f;
 
     if (params.verbose) {
-        std::cout << "[Generate] Generating (max " << params.max_new_tokens << " tokens)..." << std::endl;
+        if(!SuppressNonEssentialVerbosity) { std::cout << "[Generate] Generating (max " << params.max_new_tokens << " tokens)..." << std::endl; }
     }
 
     int32_t step = 0;
@@ -170,7 +171,7 @@ GenerateResult generate(
 
         step++;
         if (params.verbose && step % 50 == 0) {
-            std::cout << "\r[Generate] " << step << " / " << params.max_new_tokens << " tokens..." << std::flush;
+            if(!SuppressNonEssentialVerbosity) { std::cout << "\r[Generate] " << step << " / " << params.max_new_tokens << " tokens..." << std::flush; }
         }
 
         // Apply semantic mask and sample next main token
@@ -179,8 +180,10 @@ GenerateResult generate(
     }
 
     if (params.verbose) {
+        if(!SuppressNonEssentialVerbosity) { 
         std::cout << std::endl;
         std::cout << "[Generate] Done: " << out.n_frames << " frames generated." << std::endl;
+        }
     }
 
     // Compact codes from (num_cb, max_tokens) stride to (num_cb, n_frames) row-major
diff --git a/src/s2_model.cpp b/src/s2_model.cpp
index 350716f..51f5c8d 100755
--- a/src/s2_model.cpp
+++ b/src/s2_model.cpp
@@ -1,3 +1,4 @@
+#include "../include/s2_config.h"
 #include "../include/s2_model.h"
 #include <iostream>
 #include <vector>
@@ -101,7 +102,7 @@ bool SlowARModel::load(const std::string & gguf_path, int32_t gpu_device, int32_
         if (!backend_ && backend_type == 0) {
             backend_ = ggml_backend_vk_init(static_cast<size_t>(gpu_device));
             if (!backend_) {
-                std::cerr << "[Model] Vulkan init failed, falling back to CPU." << std::endl;
+                if(!SuppressNonEssentialVerbosity) { std::cerr << "[Model] Vulkan init failed, falling back to CPU." << std::endl; }
             }
         }
 #endif
@@ -109,12 +110,13 @@ bool SlowARModel::load(const std::string & gguf_path, int32_t gpu_device, int32_
         if (!backend_ && backend_type == 1) {
             backend_ = ggml_backend_cuda_init(static_cast<size_t>(gpu_device));
             if (!backend_) {
-                std::cerr << "[Model] Cuda init failed, falling back to CPU." << std::endl;
+                if(!SuppressNonEssentialVerbosity) { std::cerr << "[Model] Cuda init failed, falling back to CPU." << std::endl; }
             }
         }
 #endif
-        if (!backend_) {
-            std::cerr << "[Model] NPU not compiled, falling back to CPU." << std::endl;
+        if (!backend_)
+        {
+            if(!SuppressNonEssentialVerbosity) { std::cerr << "[Model] NPU not compiled, falling back to CPU." << std::endl; }
         }
     }
     if (!backend_) {
@@ -135,28 +137,28 @@ bool SlowARModel::load(const std::string & gguf_path, int32_t gpu_device, int32_
         return false;
     }
 
-    std::cout << "[Model] Reading metadata from " << gguf_path << std::endl;
+    if(!SuppressNonEssentialVerbosity) { std::cout << "[Model] Reading metadata from " << gguf_path << std::endl; }
 
     // Helpers to read GGUF metadata
     auto get_u32 = [&](const char * key, uint32_t def) -> uint32_t {
         int id = gguf_find_key(ctx_gguf, key);
-        if (id < 0) { std::cerr << "[GGUF] missing key: " << key << " (using default " << def << ")\n"; return def; }
+        if (id < 0) { if(!SuppressNonEssentialVerbosity) { std::cerr << "[GGUF] missing key: " << key << " (using default " << def << ")\n"; } return def; }
         uint32_t v = gguf_get_val_u32(ctx_gguf, id);
-        std::cout << "[GGUF] " << key << " = " << v << "\n";
+        if(!SuppressNonEssentialVerbosity) { std::cout << "[GGUF] " << key << " = " << v << "\n"; }
         return v;
     };
     auto get_f32 = [&](const char * key, float def) -> float {
         int id = gguf_find_key(ctx_gguf, key);
-        if (id < 0) { std::cerr << "[GGUF] missing key: " << key << " (using default " << def << ")\n"; return def; }
+        if (id < 0) { if(!SuppressNonEssentialVerbosity) { std::cerr << "[GGUF] missing key: " << key << " (using default " << def << ")\n"; } return def; }
         float v = gguf_get_val_f32(ctx_gguf, id);
-        std::cout << "[GGUF] " << key << " = " << v << "\n";
+        if(!SuppressNonEssentialVerbosity) { std::cout << "[GGUF] " << key << " = " << v << "\n"; }
         return v;
     };
     auto get_bool = [&](const char * key, bool def) -> bool {
         int id = gguf_find_key(ctx_gguf, key);
-        if (id < 0) { std::cerr << "[GGUF] missing key: " << key << " (using default " << (def?"true":"false") << ")\n"; return def; }
+        if (id < 0) { if(!SuppressNonEssentialVerbosity) { std::cerr << "[GGUF] missing key: " << key << " (using default " << (def?"true":"false") << ")\n"; } return def; }
         bool v = gguf_get_val_bool(ctx_gguf, id);
-        std::cout << "[GGUF] " << key << " = " << (v?"true":"false") << "\n";
+        if(!SuppressNonEssentialVerbosity) { std::cout << "[GGUF] " << key << " = " << (v?"true":"false") << "\n"; }
         return v;
     };
 
@@ -170,7 +172,7 @@ bool SlowARModel::load(const std::string & gguf_path, int32_t gpu_device, int32_
             std::string arch = gguf_get_val_str(ctx_gguf, arch_id);
             arch_prefix = arch + ".";
             hparams_.has_fast_decoder = (arch == "fish-speech");
-            std::cout << "[Model] Architecture: " << arch << std::endl;
+            if(!SuppressNonEssentialVerbosity) { std::cout << "[Model] Architecture: " << arch << std::endl; }
         }
     }
 
@@ -209,11 +211,13 @@ bool SlowARModel::load(const std::string & gguf_path, int32_t gpu_device, int32_
         hparams_.fast_has_project_in   = get_bool("fish_speech.fast_project_in", false);
     }
 
+    if(!SuppressNonEssentialVerbosity) { 
     std::cout << "[Model] Layers: " << hparams_.block_count
               << ", Dim: " << hparams_.embedding_length
               << ", Vocab: " << hparams_.vocab_size
               << ", head_count: " << hparams_.head_count
               << ", has_fast_decoder: " << hparams_.has_fast_decoder << std::endl;
+    }
 
     // ---------------------------------------------------------------------------
     // Load tensor pointers (metadata only — data loaded below)
@@ -341,7 +345,7 @@ bool SlowARModel::load(const std::string & gguf_path, int32_t gpu_device, int32_
     }
 #endif
 
-    std::cout << "[Model] Weights loaded. Total tensors: " << n_tensors << std::endl;
+    if(!SuppressNonEssentialVerbosity) { std::cout << "[Model] Weights loaded. Total tensors: " << n_tensors << std::endl; }
 
     gguf_free(ctx_gguf);
     return true;
diff --git a/src/s2_pipeline.cpp b/src/s2_pipeline.cpp
index 70e015d..b007b74 100644
--- a/src/s2_pipeline.cpp
+++ b/src/s2_pipeline.cpp
@@ -64,8 +64,8 @@ bool Pipeline::synthesize(const PipelineParams & params) {
             safe_print_error_ln("Pipeline warning: load_audio failed, running without reference audio.");
         }
     }
-
-    if (!this->synthesize_raw(params, ref_audio, audio_out)) {
+    int32_t AudioOutFrames = 0;
+    if (!this->synthesize_raw(params, ref_audio, audio_out, &AudioOutFrames)) {
         safe_print_error_ln("Pipeline error: synthesis failed.");
         return false;
     }
@@ -105,7 +105,8 @@ bool Pipeline::synthesize_to_memory(const PipelineParams & params, void** ref_au
         }
     }
 
-    if (!this->synthesize_raw(params, ref_audio, audio_out)) {
+    int32_t AudioOutFrames = 0;
+    if (!this->synthesize_raw(params, ref_audio, audio_out, &AudioOutFrames)) {
         safe_print_error_ln("Pipeline error: synthesis failed.");
         return false;
     }
@@ -132,7 +133,7 @@ bool Pipeline::synthesize_to_memory(const PipelineParams & params, void** ref_au
     return true;
 }
 
-bool Pipeline::synthesize_raw(const PipelineParams & params, AudioData & ref_audio, std::vector<float>& audio_out) {
+bool Pipeline::synthesize_raw(const PipelineParams & params, AudioData & ref_audio, std::vector<float>& audio_out, int32_t* audio_out_length) {
     std::lock_guard<std::mutex> lock(synthesize_mutex_);
 
     if (!initialized_) {
@@ -177,7 +178,7 @@ bool Pipeline::synthesize_raw(const PipelineParams & params, AudioData & ref_aud
         return false;
     }
 
-    if (!codec_.decode(res.codes.data(), res.n_frames, params.gen.n_threads, audio_out)) {
+    if (!codec_.decode(res.codes.data(), res.n_frames, params.gen.n_threads, audio_out, audio_out_length)) {
         safe_print_error_ln("Pipeline error: decode failed.");
         return false;
     }