Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions LLama.Unittest/MtmdWeightsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public void BasicPropertyChecks()
Assert.True(_mtmdWeights.SupportsVision);
Assert.False(_mtmdWeights.UsesMRope);
Assert.True(_mtmdWeights.UsesNonCausalAttention);
Assert.Equal(-1, _mtmdWeights.AudioBitrate);
Assert.Equal(-1, _mtmdWeights.SampleRate);
}

[Fact,Trait("Category", "NoCI")]
Expand Down Expand Up @@ -143,8 +143,8 @@ public void TokenizeProvidesChunkMetadata()
Assert.True(_mtmdWeights.SupportsVision);
Assert.False(_mtmdWeights.SupportsAudio);

var audioBitrate = _mtmdWeights.AudioBitrate;
Assert.True(audioBitrate <= 0);
var audioSampleRate = _mtmdWeights.SampleRate;
Assert.True(audioSampleRate <= 0);
}
}
}
17 changes: 8 additions & 9 deletions LLama/Batched/BatchedExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ public sealed class BatchedExecutor
/// The <see cref="LLamaWeights"/> this executor is using
/// </summary>
public LLamaWeights Model { get; }


/// <summary>
/// The optional <see cref="MtmdWeights"/> this executor is using
/// </summary>
public MtmdWeights? ClipModel { get; }

/// <summary>
/// Get the number of tokens in the batch, waiting for <see cref="Infer"/> to be called
/// </summary>
Expand Down Expand Up @@ -79,21 +84,15 @@ public int BatchedTokenCount
/// </summary>
/// <param name="model">The model to use</param>
/// <param name="contextParams">Parameters to create a new context</param>
public BatchedExecutor(LLamaWeights model, IContextParams contextParams)
: this(model, contextParams, null)
{
}

public BatchedExecutor(LLamaWeights model, IContextParams contextParams, MtmdWeights? clipModel)
/// <param name="clipModel">Clip model to use for multimodal capabilities</param>
public BatchedExecutor(LLamaWeights model, IContextParams contextParams, MtmdWeights? clipModel = null)
{
Model = model;
Context = model.CreateContext(contextParams);
ClipModel = clipModel;
Epoch = 1;
}

public MtmdWeights? ClipModel { get; }

/// <summary>
/// Start a new <see cref="Conversation"/>
/// </summary>
Expand Down
5 changes: 1 addition & 4 deletions LLama/LLamaEmbedder.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using LLama.Abstractions;
using LLama.Exceptions;
using LLama.Native;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.Logging;
using static System.Net.Mime.MediaTypeNames;

namespace LLama;

Expand Down Expand Up @@ -79,7 +76,7 @@ public async Task<IReadOnlyList<float[]>> GetEmbeddings(string input, Cancellati
Context.Dispose();

Context = _weights.CreateContext(_params, _logger);
NativeApi.llama_set_embeddings(Context.NativeHandle, true);
Context.NativeHandle.SetEmbeddings(true);

// Add all of the tokens to the batch
var tokens = Context.Tokenize(input, special: true);
Expand Down
6 changes: 1 addition & 5 deletions LLama/LLamaReranker.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Xml.Linq;
using LLama.Abstractions;
using LLama.Exceptions;
using LLama.Native;
Expand Down Expand Up @@ -44,7 +40,7 @@ public LLamaReranker(LLamaWeights weights, IContextParams @params, ILogger? logg
if (@params.PoolingType != LLamaPoolingType.Rank)
throw new NotSupportedException("Computing rank score, PoolingType must be equal to LLamaPoolingType.Rank");
Context = weights.CreateContext(@params, logger);
NativeApi.llama_set_embeddings(Context.NativeHandle, true);
Context.NativeHandle.SetEmbeddings(true);
}

/// <inheritdoc />
Expand Down
2 changes: 1 addition & 1 deletion LLama/LLamaSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
</ItemGroup>

<PropertyGroup>
<BinaryReleaseId>ff4affb4c1aa7eb4_v3</BinaryReleaseId>
<BinaryReleaseId>3f7c29d318e317b6</BinaryReleaseId>
</PropertyGroup>

<PropertyGroup>
Expand Down
4 changes: 2 additions & 2 deletions LLama/MtmdWeights.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@
public int EvaluateChunks(SafeMtmdInputChunks chunks, SafeLLamaContextHandle llamaContext, ref int nPast, int seqId, int nBatch, bool logitsLast)
=> NativeHandle.EvaluateChunks(chunks, llamaContext, ref nPast, seqId, nBatch, logitsLast);

public int EvaluateChunk(IntPtr chunkPtr, SafeLLamaContextHandle llamaContext, ref int nPast, int seqId, int nBatch, bool logitsLast)

Check warning on line 113 in LLama/MtmdWeights.cs

View workflow job for this annotation

GitHub Actions / Linux ARM64 CPU

Missing XML comment for publicly visible type or member 'MtmdWeights.EvaluateChunk(IntPtr, SafeLLamaContextHandle, ref int, int, int, bool)'

Check warning on line 113 in LLama/MtmdWeights.cs

View workflow job for this annotation

GitHub Actions / Windows x64 CPU

Missing XML comment for publicly visible type or member 'MtmdWeights.EvaluateChunk(IntPtr, SafeLLamaContextHandle, ref int, int, int, bool)'

Check warning on line 113 in LLama/MtmdWeights.cs

View workflow job for this annotation

GitHub Actions / macOS ARM64 Metal

Missing XML comment for publicly visible type or member 'MtmdWeights.EvaluateChunk(IntPtr, SafeLLamaContextHandle, ref int, int, int, bool)'

Check warning on line 113 in LLama/MtmdWeights.cs

View workflow job for this annotation

GitHub Actions / Linux x64 CPU

Missing XML comment for publicly visible type or member 'MtmdWeights.EvaluateChunk(IntPtr, SafeLLamaContextHandle, ref int, int, int, bool)'
=> NativeHandle.EvaluateChunk(chunkPtr, llamaContext, ref nPast, seqId, nBatch, logitsLast);

public int DecodeImageChunk(IntPtr chunkPtr, SafeLLamaContextHandle llamaContext, IntPtr encodedEmbeddings, ref int nPast, int seqId, int nBatch)

Check warning on line 116 in LLama/MtmdWeights.cs

View workflow job for this annotation

GitHub Actions / Linux ARM64 CPU

Missing XML comment for publicly visible type or member 'MtmdWeights.DecodeImageChunk(IntPtr, SafeLLamaContextHandle, IntPtr, ref int, int, int)'

Check warning on line 116 in LLama/MtmdWeights.cs

View workflow job for this annotation

GitHub Actions / Windows x64 CPU

Missing XML comment for publicly visible type or member 'MtmdWeights.DecodeImageChunk(IntPtr, SafeLLamaContextHandle, IntPtr, ref int, int, int)'

Check warning on line 116 in LLama/MtmdWeights.cs

View workflow job for this annotation

GitHub Actions / macOS ARM64 Metal

Missing XML comment for publicly visible type or member 'MtmdWeights.DecodeImageChunk(IntPtr, SafeLLamaContextHandle, IntPtr, ref int, int, int)'

Check warning on line 116 in LLama/MtmdWeights.cs

View workflow job for this annotation

GitHub Actions / Linux x64 CPU

Missing XML comment for publicly visible type or member 'MtmdWeights.DecodeImageChunk(IntPtr, SafeLLamaContextHandle, IntPtr, ref int, int, int)'
=> NativeHandle.DecodeImageChunk(chunkPtr, llamaContext, encodedEmbeddings, ref nPast, seqId, nBatch);

/// <summary>
Expand All @@ -137,9 +137,9 @@
public bool UsesMRope => NativeHandle.DecodeUseMRope();

/// <summary>
/// Gets the audio bitrate advertised by the model.
/// Gets the audio sample rate advertised by the model.
/// </summary>
public int AudioBitrate => NativeHandle.GetAudioBitrate();
public int SampleRate => NativeHandle.GetAudioSampleRate();

/// <inheritdoc />
public void Dispose() => NativeHandle.Dispose();
Expand Down
6 changes: 6 additions & 0 deletions LLama/Native/GPUSplitMode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,10 @@ public enum GPUSplitMode
/// split layers and KV across GPUs, use tensor parallelism if supported
/// </summary>
Row = 2,

// Undocumented in llama.h
/// <summary>
///
/// </summary>
Tensor = 3,
}
12 changes: 11 additions & 1 deletion LLama/Native/LLamaFtype.cs
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,17 @@ public enum LLamaFtype
/// except 1d tensors
/// </summary>
LLAMA_FTYPE_MOSTLY_MXFP4_MOE = 38,


/// <summary>
/// Except 1d tensors
/// </summary>
LLAMA_FTYPE_MOSTLY_NVFP4 = 39,

/// <summary>
/// Except 1d tensors
/// </summary>
LLAMA_FTYPE_MOSTLY_Q1_0 = 40,

/// <summary>
/// File type was not specified
/// </summary>
Expand Down
12 changes: 12 additions & 0 deletions LLama/Native/LLamaModelImatrixData.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace LLama.Native;

/* /// <summary>
///
/// </summary>
/// <remarks>llama_model_imatrix_data</remarks>
public unsafe struct LLamaModelImatrixData
{
char* name;
float* data;
nuint size;
} */
18 changes: 14 additions & 4 deletions LLama/Native/LLamaModelQuantizeParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,25 +79,35 @@ public bool keep_split
}
private sbyte _keep_split;

/// <summary>
/// calculate and show the final quantization size without performing quantization
/// </summary>
public bool dry_run
{
get => Convert.ToBoolean(_dry_run);
set => _dry_run = Convert.ToSByte(value);
}
private sbyte _dry_run;

/// <summary>
/// pointer to importance matrix data
/// </summary>
public IntPtr imatrix;
public IntPtr imatrix; // LLamaModelImatrixData *

/// <summary>
/// pointer to vector containing overrides
/// </summary>
public IntPtr kv_overrides;
public IntPtr kv_overrides; // llama_model_kv_override *

/// <summary>
/// pointer to vector containing tensor types
/// </summary>
public IntPtr tensor_types;
public IntPtr tensor_types; // llama_model_tensor_override *

/// <summary>
/// Pointer to vector containing layer indices to prune
/// </summary>
public IntPtr prune_layers;
public IntPtr prune_layers; // int32 *

/// <summary>
/// Create a LLamaModelQuantizeParams with default values
Expand Down
9 changes: 9 additions & 0 deletions LLama/Native/LLamaModelTensorOverride.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
namespace LLama.Native;

// Unsupported - we can't handle ggml_type since LlamaSharp doesn't wrap/expose ggml
/*
* struct llama_model_tensor_override {
const char * pattern;
enum ggml_type type; // GGMLType might work?
};
*/
16 changes: 14 additions & 2 deletions LLama/Native/LoraAdapter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,24 @@ internal LoraAdapter(SafeLlamaModelHandle model, string path, IntPtr nativePtr)
/// </summary>
public void Unload()
{
// Early exit if already unloaded
if (!Loaded)
return;

// If the model has been unloaded this handle will have been auto unloaded
if (Model.IsClosed)
{
Loaded = false;
return;
}

// Unload
Loaded = false;
llama_adapter_lora_free(Pointer);

// Manually free a LoRA adapter. loaded adapters will be free when the associated model is deleted
// Manually free a LoRA adapter. loaded adapters which have not been
// freed will be automatically freed when the associated model is deleted
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
[Obsolete("adapters are now freed together with the associated model")]
static extern void llama_adapter_lora_free(IntPtr adapter);
}
}
61 changes: 58 additions & 3 deletions LLama/Native/NativeApi.Mtmd.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,50 @@ internal struct mtmd_context_params
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_context_params_default", CallingConvention = CallingConvention.Cdecl)]
internal static extern mtmd_context_params mtmd_context_params_default();

/// <summary>
/// whether we need to set non-causal mask before llama_decode
/// if chunk is nullptr, we assume the default case where chunk is an image chunk
/// </summary>
/// <param name="ctx"></param>
/// <returns></returns>
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_decode_use_non_causal", CallingConvention = CallingConvention.Cdecl)]
[return: MarshalAs(UnmanagedType.I1)]
internal static extern bool mtmd_decode_use_non_causal(SafeMtmdModelHandle ctx);

/// <summary>
/// whether the current model use M-RoPE for llama_decode
/// </summary>
/// <param name="ctx"></param>
/// <returns></returns>
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_decode_use_mrope", CallingConvention = CallingConvention.Cdecl)]
[return: MarshalAs(UnmanagedType.I1)]
internal static extern bool mtmd_decode_use_mrope(SafeMtmdModelHandle ctx);

/// <summary>
/// whether the current model supports vision input
/// </summary>
/// <param name="ctx"></param>
/// <returns></returns>
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_support_vision", CallingConvention = CallingConvention.Cdecl)]
[return: MarshalAs(UnmanagedType.I1)]
internal static extern bool mtmd_support_vision(SafeMtmdModelHandle ctx);

/// <summary>
/// whether the current model supports audio input
/// </summary>
/// <param name="ctx"></param>
/// <returns></returns>
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_support_audio", CallingConvention = CallingConvention.Cdecl)]
[return: MarshalAs(UnmanagedType.I1)]
internal static extern bool mtmd_support_audio(SafeMtmdModelHandle ctx);

[DllImport(mtmdLibraryName, EntryPoint = "mtmd_get_audio_bitrate", CallingConvention = CallingConvention.Cdecl)]
internal static extern int mtmd_get_audio_bitrate(SafeMtmdModelHandle ctx);
/// <summary>
/// get audio sample rate in Hz, for example 16000 for Whisper
/// </summary>
/// <param name="ctx"></param>
/// <returns></returns>
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_get_audio_sample_rate", CallingConvention = CallingConvention.Cdecl)]
internal static extern int mtmd_get_audio_sample_rate(SafeMtmdModelHandle ctx);

// bitmap ------------------------------------------------------------

Expand Down Expand Up @@ -153,9 +179,11 @@ internal static unsafe void mtmd_bitmap_set_id(SafeMtmdEmbed bitmap, string? id)
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_image_tokens_get_n_tokens", CallingConvention = CallingConvention.Cdecl)]
internal static extern UIntPtr mtmd_image_tokens_get_n_tokens(IntPtr image_tokens);

[Obsolete("use mtmd_image_tokens_get_decoder_pos() instead")]
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_image_tokens_get_nx", CallingConvention = CallingConvention.Cdecl)]
internal static extern UIntPtr mtmd_image_tokens_get_nx(IntPtr image_tokens);

[Obsolete("use mtmd_image_tokens_get_decoder_pos() instead")]
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_image_tokens_get_ny", CallingConvention = CallingConvention.Cdecl)]
internal static extern UIntPtr mtmd_image_tokens_get_ny(IntPtr image_tokens);

Expand All @@ -165,10 +193,32 @@ internal static unsafe void mtmd_bitmap_set_id(SafeMtmdEmbed bitmap, string? id)
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_image_tokens_get_n_pos", CallingConvention = CallingConvention.Cdecl)]
internal static extern int mtmd_image_tokens_get_n_pos(IntPtr image_tokens);

[StructLayout(LayoutKind.Explicit)]
internal struct mtmd_decoder_pos
{
[FieldOffset(0)]
uint t;

[FieldOffset(4)]
uint x;

[FieldOffset(8)]
uint y;
};

/// <summary>
/// get position for decoder attention, to be used by M-RoPE models
/// </summary>
/// <param name="image_tokens"></param>
/// <param name="i">i is the index of the embedding token, ranging from 0 to mtmd_image_tokens_get_n_tokens() - 1</param>
/// <returns>return relative position (for example, embedding 0 will have position (0, 0, 0); remember to adjust it to the current absolute position)</returns>
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_image_tokens_get_decoder_pos", CallingConvention = CallingConvention.Cdecl)]
internal static extern mtmd_decoder_pos mtmd_image_tokens_get_decoder_pos(IntPtr image_tokens, nuint i);

// tokenize ----------------------------------------------------------

/// <summary>
/// Native text structure consumed by <see cref="mtmd_tokenize"/>.
/// Native text structure consumed by <see cref="NativeApi.mtmd_tokenize(LLama.Native.SafeMtmdModelHandle,System.IntPtr,in LLama.Native.NativeApi.mtmd_input_text_native,System.IntPtr[],System.UIntPtr)"/>.
/// </summary>
internal unsafe struct mtmd_input_text_native
{
Expand Down Expand Up @@ -259,6 +309,11 @@ internal static unsafe IntPtr mtmd_helper_bitmap_init_from_file(SafeMtmdModelHan
[DllImport(mtmdLibraryName, EntryPoint = "mtmd_helper_get_n_pos", CallingConvention = CallingConvention.Cdecl)]
internal static extern int mtmd_helper_get_n_pos(SafeMtmdInputChunks chunks);

[DllImport(mtmdLibraryName, EntryPoint = "mtmd_helper_image_get_decoder_pos", CallingConvention = CallingConvention.Cdecl)]
// helper to get the list of relative positions corresponding to the embedding tokens, to be used by M-RoPE
// out_pos must have length == mtmd_helper_get_n_tokens(image)
internal static extern void mtmd_helper_image_get_decoder_pos(IntPtr /* mtmd_image_tokens* */ image, IntPtr /* mtmd_decoder_pos* */ out_pos);

[DllImport(mtmdLibraryName, EntryPoint = "mtmd_helper_eval_chunks", CallingConvention = CallingConvention.Cdecl)]
internal static extern int mtmd_helper_eval_chunks(
SafeMtmdModelHandle ctx,
Expand Down
Loading
Loading