Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 68 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
cmake_minimum_required(VERSION 3.14)
project(s2cpp LANGUAGES C CXX)

if (WIN32)
add_definitions(-DWIN32_LEAN_AND_MEAN)
endif()

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
Expand All @@ -15,6 +11,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
option(S2_VULKAN "Build with Vulkan backend" OFF)
option(S2_CUDA "Build with CUDA backend" OFF)
option(S2_METAL "Build with Metal backend" OFF)
option(S2_BUILD_SHARED_LIBRARIES "Compile S2 as a dynamic link library(dll) and a shared(.lib) library as well." OFF)

# ---------------------------------------------------------------------------
# GGML (git submodule — not modified)
Expand All @@ -38,6 +35,7 @@ add_subdirectory(ggml)
# s2 executable
# ---------------------------------------------------------------------------
set(S2_SOURCES
src/s2_export_api.cpp
src/s2_audio.cpp
src/s2_tokenizer.cpp
src/s2_sampler.cpp
Expand All @@ -46,36 +44,98 @@ set(S2_SOURCES
src/s2_prompt.cpp
src/s2_generate.cpp
src/s2_pipeline.cpp
src/s2_server.cpp
src/main.cpp
)

if(S2_BUILD_SHARED_LIBRARIES)
add_executable(s2 ${S2_SOURCES})
add_library(s2_dll SHARED ${S2_SOURCES})
add_library(s2_lib STATIC ${S2_SOURCES})

target_include_directories(s2 PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/third_party
${CMAKE_CURRENT_SOURCE_DIR}/ggml/include
${CMAKE_CURRENT_SOURCE_DIR}/ggml/src
${CMAKE_CURRENT_SOURCE_DIR}/ggml/src # for ggml-common.h etc.
)
target_include_directories(s2_dll PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/third_party
${CMAKE_CURRENT_SOURCE_DIR}/ggml/include
${CMAKE_CURRENT_SOURCE_DIR}/ggml/src # for ggml-common.h etc.
)
target_include_directories(s2_lib PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/third_party
${CMAKE_CURRENT_SOURCE_DIR}/ggml/include
${CMAKE_CURRENT_SOURCE_DIR}/ggml/src # for ggml-common.h etc.
)

target_link_libraries(s2 PRIVATE ggml)
target_link_libraries(s2_dll PRIVATE ggml)
target_link_libraries(s2_lib PRIVATE ggml)


target_compile_definitions(s2_dll PRIVATE S2_LIBRARY)
target_compile_definitions(s2_lib PRIVATE S2_LIBRARY)
if(S2_VULKAN)
target_compile_definitions(s2 PRIVATE GGML_USE_VULKAN)
target_compile_definitions(s2_dll PRIVATE GGML_USE_VULKAN)
target_compile_definitions(s2_lib PRIVATE GGML_USE_VULKAN)
endif()
if(S2_CUDA)
target_compile_definitions(s2 PRIVATE GGML_USE_CUDA)
target_compile_definitions(s2_dll PRIVATE GGML_USE_CUDA)
target_compile_definitions(s2_lib PRIVATE GGML_USE_CUDA)
endif()


# Platform-specific
if(UNIX AND NOT APPLE)
target_link_libraries(s2 PRIVATE pthread m)
target_link_libraries(s2_dll PRIVATE pthread m)
target_link_libraries(s2_lib PRIVATE pthread m)
endif()

# Install
install(TARGETS s2 RUNTIME DESTINATION bin)
install(TARGETS s2_dll RUNTIME DESTINATION bin)
install(TARGETS s2_lib RUNTIME DESTINATION bin)

else()
add_executable(s2 ${S2_SOURCES})

target_include_directories(s2 PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/third_party
${CMAKE_CURRENT_SOURCE_DIR}/ggml/include
${CMAKE_CURRENT_SOURCE_DIR}/ggml/src # for ggml-common.h etc.
)

target_link_libraries(s2 PRIVATE ggml)

if(S2_VULKAN)
target_compile_definitions(s2 PRIVATE GGML_USE_VULKAN)
endif()
if(S2_CUDA)
target_compile_definitions(s2 PRIVATE GGML_USE_CUDA)
endif()

if(WIN32)
target_link_libraries(s2 PRIVATE ws2_32 wsock32)
# Platform-specific
if(UNIX AND NOT APPLE)
target_link_libraries(s2 PRIVATE pthread m)
endif()

# Install
install(TARGETS s2 RUNTIME DESTINATION bin)

endif()









2 changes: 1 addition & 1 deletion include/s2_codec.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class AudioCodec {
// Decode VQ codes to mono float32 audio.
// codes: (num_codebooks, n_frames) flattened row-major.
bool decode(const int32_t * codes, int32_t n_frames, int32_t n_threads,
std::vector<float> & audio_out);
std::vector<float> & audio_out, int32_t* audio_n_frames_out);

int32_t sample_rate() const { return sample_rate_; }
int32_t hop_length() const { return hop_length_; }
Expand Down
20 changes: 20 additions & 0 deletions include/s2_config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#pragma once

#if defined(_WIN32) || defined(__CYGWIN__)

#ifdef S2_LIBRARY
#define S2_Export __declspec(dllexport)
static bool SuppressNonEssentialVerbosity = true;
#else
#define S2_Export
static bool SuppressNonEssentialVerbosity = false;
#endif
#else

// On Unix-like systems, check for GCC 4+ visibility support
#if __GNUC__ >= 4
#define S2_Export __attribute__((visibility("default")))
#else
#define S2_Export
#endif
#endif
44 changes: 44 additions & 0 deletions include/s2_export_api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#pragma once

#include "s2_audio.h"
#include "s2_codec.h"
#include "s2_generate.h"
#include "s2_model.h"
#include "s2_tokenizer.h"
#include "s2_pipeline.h"
#include "s2_prompt.h"
#include "s2_config.h"

extern "C"
{
S2_Export s2::Pipeline* AllocS2Pipeline();
S2_Export void ReleaseS2Pipeline(s2::Pipeline* Pipeline);
S2_Export void SyncS2TokenizerConfigFromS2Model(s2::SlowARModel* Model, s2::Tokenizer* Tokenizer);
S2_Export int InitializeS2Pipeline(s2::Pipeline* Pipeline, s2::Tokenizer* Tokenizer, s2::SlowARModel* Model, s2::AudioCodec* AudioCodec);

S2_Export s2::GenerateParams* AllocS2GenerateParams();
S2_Export void ReleaseS2GenerateParams(s2::GenerateParams* GenerateParams);
S2_Export int InitializeS2GenerateParams(s2::GenerateParams* GenerateParams, int32_t max_new_tokens = -1, float temperature = -1, float top_p = -1, int32_t top_k = -1, int32_t min_tokens_before_end = -1, int32_t n_threads = -1, int verbose = -1);

S2_Export s2::SlowARModel* AllocS2Model();
S2_Export void ReleaseS2Model(s2::SlowARModel* Model);
S2_Export int InitializeS2Model(s2::SlowARModel* Model, const char* gguf_path, int32_t gpu_device, int32_t backend_type);

S2_Export s2::Tokenizer* AllocS2Tokenizer();
S2_Export void ReleaseS2Tokenizer(s2::Tokenizer* Tokenizer);
S2_Export int InitializeS2Tokenizer(s2::Tokenizer* Tokenizer, const char* path);

S2_Export s2::AudioCodec* AllocS2AudioCodec();
S2_Export void ReleaseS2AudioCodec(s2::AudioCodec* AudioCodec);
S2_Export int InitializeS2AudioCodec(s2::AudioCodec* AudioCodec, const char* gguf_path, int32_t gpu_device, int32_t backend_type);

S2_Export std::vector<int32_t>* AllocS2AudioPromptCodes();
S2_Export void ReleaseS2AudioPromptCodes(std::vector<int32_t>* AudioPromptCodes);
S2_Export int InitializeAudioPromptCodes(s2::Pipeline* Pipeline, int32_t ThreadCount, const char* ReferenceAudioPath, std::vector<int32_t>* AudioPromptCodes, int* TPrompt);

S2_Export std::vector<float>* AllocS2AudioBuffer(int InitialSize);
S2_Export void ReleaseS2AudioBuffer(std::vector<float>* AudioBuffer);
S2_Export float* GetS2AudioBufferDataPointer(std::vector<float>* AudioBuffer);

S2_Export int S2Synthesize(s2::Pipeline* Pipeline, const s2::GenerateParams* GenerateParams, std::vector<float>* AudioBuffer, std::vector<int32_t>* ReferenceAudioPromptCodes, int32_t* ReferenceAudioTPrompt, const char* ReferenceAudioPath, const char* ReferenceAudioTranscript, const char* TextToInfer, const char* OutputAudioPath, int32_t* AudioBufferOutputLength);
}
4 changes: 2 additions & 2 deletions include/s2_pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ class Pipeline {
bool synthesize(const PipelineParams & params);

bool synthesize_to_memory(const PipelineParams & params, void** ref_audio_buffer, size_t* ref_audio_size, void** wav_buffer, size_t* wav_size);
bool synthesize_raw(const PipelineParams & params, AudioData & ref_audio, std::vector<float> & audio_out);
bool synthesize_raw(const PipelineParams & params, AudioData & ref_audio, std::vector<float> & audio_out, int32_t* audio_out_length);

private:
//private:
Tokenizer tokenizer_;
SlowARModel model_;
AudioCodec codec_;
Expand Down
6 changes: 4 additions & 2 deletions src/s2_codec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1028,7 +1028,7 @@ bool AudioCodec::encode(const float * audio, int32_t n_samples, int32_t n_thread
// ---------------------------------------------------------------------------

bool AudioCodec::decode(const int32_t * codes, int32_t n_frames, int32_t n_threads,
std::vector<float> & audio_out) {
std::vector<float> & audio_out, int32_t* audio_n_frames_out) {
if (n_frames <= 0) return false;

// Step 1: dequantize VQ codes to stage vector (n_frames, quantizer_input_dim)
Expand Down Expand Up @@ -1132,10 +1132,12 @@ bool AudioCodec::decode(const int32_t * codes, int32_t n_frames, int32_t n_threa

// audio_t is (1, T) or (C, T) — we expect (1, T), take total elements
const int32_t n_samples = static_cast<int32_t>(ggml_nelements(audio_t));
audio_out.resize(n_samples);
if(audio_out.size() < n_samples) { audio_out.resize(n_samples); } //Allows for pre-allocated audio buffers that don't need to resized each time.
ggml_backend_tensor_get(audio_t, audio_out.data(), 0, n_samples * sizeof(float));
ggml_gallocr_free(allocr);
ggml_free(ctx);

*audio_n_frames_out = n_samples;
}
return true;
}
Expand Down
Loading