From 50e80e1adce6bd93e88250ca3dae0b6bd03b5b2b Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 22 Jun 2026 12:44:04 +0200 Subject: [PATCH 1/7] feat: LFM2.5 text-embedding + ColBERT (MLX/XNNPACK) with prompts & MaxSim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the LFM2.5-Embedding-350M and LFM2.5-ColBERT-350M models, served from HuggingFace (MLX on iOS, XNNPACK on Android / iOS simulator). Text embeddings are unified into one runner and one hook: the native TextEmbeddings model returns the raw [numTokens, embeddingDim] matrix (numTokens === 1 for pooled models, the full sequence for multi-vector / late-interaction models like ColBERT), plus the input token ids. The TS layer reduces it — toVector() for the single-vector case, getTokenVectors() and maxSim() for late interaction. Models trained with asymmetric query/document prompts (LFM uses query:/ document:, ColBERT uses [Q] /[D] ) carry a "prompts" config; forward then requires a role argument ('query' | 'document') that auto-prepends the prompt. The role is type-enforced: required for prompted models, forbidden for plain ones. Also: tokenizer post_processor is now applied for text embeddings so the BOS special token is added (CLS-pooled models depend on it), and the text-to-image Encoder reads the new EmbeddingResult. Example app gains a semantic-search screen and a ColBERT late-interaction search screen demonstrating MaxSim. Authored with Claude. Co-Authored-By: Claude Opus 4.6 (1M context) --- apps/text-embeddings/app/_layout.tsx | 8 + .../app/clip-embeddings/index.tsx | 3 +- apps/text-embeddings/app/colbert/index.tsx | 289 ++++++++++++ .../app/text-embeddings/index.tsx | 439 ++++++++++-------- .../common/rnexecutorch/TokenizerModule.cpp | 19 + .../common/rnexecutorch/TokenizerModule.h | 6 + .../host_objects/JsiConversions.h | 30 ++ .../rnexecutorch/models/embeddings/Types.h | 23 + .../models/embeddings/text/TextEmbeddings.cpp | 22 +- .../models/embeddings/text/TextEmbeddings.h | 8 +- .../models/text_to_image/Encoder.cpp | 7 +- .../src/constants/modelRegistry.ts | 58 +++ .../src/constants/modelUrls.ts | 15 + .../useTextEmbeddings.ts | 18 +- packages/react-native-executorch/src/index.ts | 1 + .../TextEmbeddingsModule.ts | 64 ++- .../src/types/textEmbeddings.ts | 129 +++-- .../src/utils/textEmbeddings.ts | 74 +++ 18 files changed, 938 insertions(+), 275 deletions(-) create mode 100644 apps/text-embeddings/app/colbert/index.tsx create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/embeddings/Types.h create mode 100644 packages/react-native-executorch/src/utils/textEmbeddings.ts diff --git a/apps/text-embeddings/app/_layout.tsx b/apps/text-embeddings/app/_layout.tsx index bb8e1deeb8..57acb26eb2 100644 --- a/apps/text-embeddings/app/_layout.tsx +++ b/apps/text-embeddings/app/_layout.tsx @@ -109,6 +109,14 @@ export default function _layout() { headerTitleStyle: { color: ColorPalette.primary }, }} /> + ); diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx index 02a8a9c656..e0232d3440 100644 --- a/apps/text-embeddings/app/clip-embeddings/index.tsx +++ b/apps/text-embeddings/app/clip-embeddings/index.tsx @@ -16,6 +16,7 @@ import { models, useTextEmbeddings, useImageEmbeddings, + toVector, ImageEmbeddingsProps, } from 'react-native-executorch'; @@ -101,7 +102,7 @@ function ClipEmbeddingsScreen() { const txtStart = Date.now(); const scored: { label: string; similarity: number }[] = []; for (const label of labels) { - const textEmbedding = await textModel.forward(label); + const textEmbedding = toVector(await textModel.forward(label)); scored.push({ label, similarity: dotProduct(imageEmbedding, textEmbedding), diff --git a/apps/text-embeddings/app/colbert/index.tsx b/apps/text-embeddings/app/colbert/index.tsx new file mode 100644 index 0000000000..d686168f43 --- /dev/null +++ b/apps/text-embeddings/app/colbert/index.tsx @@ -0,0 +1,289 @@ +import { useEffect, useState } from 'react'; +import { + StyleSheet, + Text, + TextInput, + TouchableOpacity, + View, + SafeAreaView, + ScrollView, + KeyboardAvoidingView, + Platform, +} from 'react-native'; +import { Ionicons } from '@expo/vector-icons'; +import { useIsFocused } from 'expo-router'; +import { + models, + useTextEmbeddings, + maxSim, + EmbeddingResult, +} from 'react-native-executorch'; +import ColorPalette from '../../colors'; +import ErrorBanner from '../../components/ErrorBanner'; + +const colbertModel = models.text_embedding.lfm2_5_colbert_350m(); + +// The library auto-applies the model's [Q]/[D] prompts via forward(text, role). +// Late-interaction MaxSim is a shipped util; the document skiplist (punctuation +// token ids excluded from scoring) is the consumer's choice — these are the +// LFM2.5-ColBERT skiplist ids. +const SKIPLIST = [ + 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, + 535, 536, 537, 538, 539, 540, 541, 568, 569, 570, 571, 572, 573, 600, 601, + 602, 603, +]; + +const CORPUS: string[] = [ + 'The forecast says heavy showers this afternoon.', + "It's so sunny outside today!", + 'The home team scored in the final minute to win the match.', + 'Fans packed the stadium for the championship game.', + 'Simmer the tomatoes with garlic before adding the pasta.', + 'He whisked the eggs and folded in the melted chocolate.', + 'The new phone has a faster chip and a brighter screen.', + 'The flight to Tokyo was delayed by three hours.', + 'We hiked along the coast and camped near the cliffs.', +]; + +const EXAMPLE_QUERIES: string[] = [ + "What's the weather like?", + 'Who won the match?', + 'How do I cook dinner?', + 'Tell me about the latest technology', +]; + +type Ranked = { sentence: string; score: number }; + +export default function ColbertScreenWrapper() { + return useIsFocused() ? : null; +} + +function ColbertScreen() { + const model = useTextEmbeddings({ model: colbertModel }); + const [error, setError] = useState(null); + const [query, setQuery] = useState(''); + const [docEncs, setDocEncs] = useState< + { sentence: string; enc: EmbeddingResult }[] + >([]); + const [results, setResults] = useState([]); + const [indexing, setIndexing] = useState(false); + const [encodeTime, setEncodeTime] = useState(null); + + useEffect( + () => { + let cancelled = false; + const indexCorpus = async () => { + if (!model.isReady) return; + setIndexing(true); + setResults([]); + try { + const encs = []; + for (const sentence of CORPUS) { + const enc = await model.forward(sentence, 'document'); + if (cancelled) return; + encs.push({ sentence, enc }); + } + setDocEncs(encs); + } catch (e) { + setError(e instanceof Error ? e.message : String(e)); + } finally { + if (!cancelled) setIndexing(false); + } + }; + indexCorpus(); + return () => { + cancelled = true; + }; + }, + // eslint-disable-next-line react-hooks/exhaustive-deps + [model.isReady] + ); + + const runSearch = async (queryText: string = query) => { + const q = queryText.trim(); + if (!model.isReady || !q || docEncs.length === 0) return; + setQuery(queryText); + try { + const start = Date.now(); + const qEnc = await model.forward(q, 'query'); + setEncodeTime(Date.now() - start); + const ranked = docEncs + .map(({ sentence, enc }) => ({ + sentence, + score: maxSim(qEnc, enc, SKIPLIST), + })) + .sort((a, b) => b.score - a.score); + setResults(ranked); + } catch (e) { + setError(e instanceof Error ? e.message : String(e)); + } + }; + + const ready = model.isReady && !indexing && docEncs.length > 0; + const canSearch = ready && !!query.trim(); + + const statusText = model.error + ? `Error: ${model.error}` + : !model.isReady + ? `Loading model ${(model.downloadProgress * 100).toFixed(0)}%` + : indexing + ? 'Indexing corpus…' + : 'Ready'; + + return ( + + + + ColBERT Late-Interaction Search + {statusText} + setError(null)} /> + + + + Search the corpus ({CORPUS.length} sentences) + + + Per-token vectors scored with MaxSim. Tap an example or type a + query. + + + {EXAMPLE_QUERIES.map((q) => ( + runSearch(q)} + > + {q} + + ))} + + runSearch()} + returnKeyType="search" + /> + runSearch()} + style={[styles.button, !canSearch && styles.buttonDisabled]} + disabled={!canSearch} + > + + + {indexing ? 'Indexing…' : 'Search'} + + + {encodeTime !== null && ( + Query encoded in {encodeTime} ms + )} + + + {results.length > 0 && ( + + Results + {results.map((r, i) => ( + + + {r.sentence} + {r.score.toFixed(2)} + + + 0 ? r.score / results[0].score : 0) * 100 + )}%`, + }, + i === 0 && styles.barFillTop, + ]} + /> + + + ))} + + )} + + + + ); +} + +const styles = StyleSheet.create({ + container: { flex: 1, backgroundColor: '#F8FAFC' }, + flex: { flex: 1 }, + scroll: { padding: 20 }, + heading: { fontSize: 22, fontWeight: '500', marginBottom: 8, color: '#0F172A' }, + status: { fontSize: 14, color: '#64748B', marginBottom: 12 }, + card: { + backgroundColor: '#fff', + padding: 16, + borderRadius: 16, + borderColor: '#E2E8F0', + borderWidth: 2, + marginBottom: 20, + }, + sectionTitle: { fontSize: 16, fontWeight: '500', marginBottom: 8, color: '#1E293B' }, + hint: { fontSize: 13, color: '#64748B', marginBottom: 12, lineHeight: 18 }, + chipRow: { flexDirection: 'row', flexWrap: 'wrap', gap: 8, marginBottom: 12 }, + chip: { + backgroundColor: '#EEF2FF', + borderColor: '#C7D2FE', + borderWidth: 1, + borderRadius: 16, + paddingHorizontal: 12, + paddingVertical: 6, + }, + chipDisabled: { opacity: 0.4 }, + chipText: { fontSize: 13, color: 'navy' }, + input: { + backgroundColor: '#F1F5F9', + borderRadius: 10, + padding: 10, + marginBottom: 10, + fontSize: 16, + color: '#0F172A', + minHeight: 40, + }, + button: { + backgroundColor: 'navy', + borderRadius: 10, + paddingVertical: 12, + flexDirection: 'row', + alignItems: 'center', + justifyContent: 'center', + }, + buttonDisabled: { backgroundColor: '#f0f0f0' }, + buttonText: { color: '#fff', fontWeight: '500', marginLeft: 6 }, + buttonTextDisabled: { color: 'gray' }, + stats: { fontSize: 13, color: '#64748B', marginTop: 8, textAlign: 'center' }, + resultRow: { marginBottom: 14 }, + resultHeader: { + flexDirection: 'row', + justifyContent: 'space-between', + marginBottom: 6, + gap: 8, + }, + resultText: { flex: 1, fontSize: 14, color: '#334155' }, + resultScore: { + fontSize: 14, + fontWeight: '600', + color: '#0F172A', + fontVariant: ['tabular-nums'], + }, + barTrack: { height: 8, borderRadius: 4, backgroundColor: '#E2E8F0', overflow: 'hidden' }, + barFill: { height: '100%', borderRadius: 4, backgroundColor: '#94A3B8' }, + barFillTop: { backgroundColor: 'navy' }, +}); diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx index 88e39ce063..470094da02 100644 --- a/apps/text-embeddings/app/text-embeddings/index.tsx +++ b/apps/text-embeddings/app/text-embeddings/index.tsx @@ -15,10 +15,13 @@ import { ModelPicker } from '../../components/ModelPicker'; import { models, useTextEmbeddings, + toVector, TextEmbeddingsProps, } from 'react-native-executorch'; const textEmbedding = models.text_embedding; +// Single-vector (pooled) models: forward() returns the raw result; toVector() +// gives the single embedding. The multi-vector ColBERT model has its own screen. type TextEmbeddingModel = TextEmbeddingsProps['model']; const MODELS: { label: string; value: TextEmbeddingModel }[] = [ @@ -43,6 +46,42 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [ label: 'Multilingual Paraphrase', value: textEmbedding.paraphrase_multilingual_minilm_l12_v2(), }, + { + label: 'LFM2.5 Embedding XNNPACK', + value: textEmbedding.lfm2_5_embedding_350m({ backend: 'xnnpack' }), + }, + { + label: 'LFM2.5 Embedding MLX', + value: textEmbedding.lfm2_5_embedding_350m({ backend: 'mlx' }), + }, +]; + +// A multi-topic corpus so semantic ranking is visible: a weather query should +// float the weather lines to the top and push sports/cooking/tech down, even +// with no shared keywords. +const CORPUS: string[] = [ + 'The forecast says heavy showers this afternoon.', + "It's so sunny outside today!", + 'A thick fog rolled in over the harbor at dawn.', + 'The home team scored in the final minute to win the match.', + 'She sprinted the last lap and broke the national record.', + 'Fans packed the stadium for the championship game.', + 'Simmer the tomatoes with garlic before adding the pasta.', + 'He whisked the eggs and folded in the melted chocolate.', + 'The new phone has a faster chip and a brighter screen.', + 'Our servers crashed under the sudden spike in traffic.', + 'The flight to Tokyo was delayed by three hours.', + 'We hiked along the coast and camped near the cliffs.', +]; + +// Tap-to-run example queries. Natural-language questions — how these models +// are trained to be queried — give the cleanest separation. +const EXAMPLE_QUERIES: string[] = [ + "What's the weather like?", + 'Who won the match?', + 'Tell me about the latest technology', + 'How do I cook dinner?', + 'Where did they travel?', ]; import { useIsFocused } from 'expo-router'; import { dotProduct } from '../../utils/math'; @@ -54,6 +93,8 @@ export default function TextEmbeddingsScreenWrapper() { return isFocused ? : null; } +type RankedResult = { sentence: string; similarity: number }; + function TextEmbeddingsScreen() { const [selectedModel, setSelectedModel] = useState( textEmbedding.all_minilm_l6_v2() @@ -61,88 +102,70 @@ function TextEmbeddingsScreen() { const model = useTextEmbeddings({ model: selectedModel }); const [error, setError] = useState(null); - const [inputSentence, setInputSentence] = useState(''); - const [sentencesWithEmbeddings, setSentencesWithEmbeddings] = useState< + const [query, setQuery] = useState(''); + const [corpusEmbeddings, setCorpusEmbeddings] = useState< { sentence: string; embedding: Float32Array }[] >([]); - const [topMatches, setTopMatches] = useState< - { sentence: string; similarity: number }[] - >([]); + const [results, setResults] = useState([]); const [embeddingTime, setEmbeddingTime] = useState(null); + const [indexing, setIndexing] = useState(false); + // Embed the whole corpus once the model is ready (re-runs on model change so + // prefixes / weights match the active model). useEffect( () => { - const computeEmbeddings = async () => { + let cancelled = false; + const indexCorpus = async () => { if (!model.isReady) return; - - const sentences = [ - 'The weather is lovely today.', - "It's so sunny outside!", - 'He drove to the stadium.', - ]; - + setIndexing(true); + setResults([]); try { - const embeddings = []; - for (const sentence of sentences) { - const embedding = await model.forward(sentence); - embeddings.push({ sentence, embedding }); + const embedded = []; + for (const sentence of CORPUS) { + // forward(_, 'document') auto-applies the model's document prompt + // (a no-op for models without one). + const embedding = toVector( + await model.forward(sentence, 'document') + ); + if (cancelled) return; + embedded.push({ sentence, embedding }); } - - setSentencesWithEmbeddings(embeddings); - } catch (e) { - setError(e instanceof Error ? e.message : String(e)); + setCorpusEmbeddings(embedded); + } catch { + // A transient "Model not loaded" can fire while the hook swaps + // models; the effect re-runs once the new model is ready. + } finally { + if (!cancelled) setIndexing(false); } }; - - computeEmbeddings(); + indexCorpus(); + return () => { + cancelled = true; + }; }, + // Re-index when the model becomes ready OR the selected model changes, so + // the corpus is embedded by the active model. The "Model not loaded" race + // is handled by the isReady gate plus clearing the corpus on switch; + // switching sets isReady false→true so the re-run sees the new model. // eslint-disable-next-line react-hooks/exhaustive-deps - [model.isReady] + [model.isReady, selectedModel] ); - const checkSimilarities = async () => { - if (!model.isReady || !inputSentence.trim()) return; - + const runSearch = async (queryText: string = query) => { + const q = queryText.trim(); + if (!model.isReady || !q || corpusEmbeddings.length === 0) return; + setQuery(queryText); try { const start = Date.now(); - const inputEmbedding = await model.forward(inputSentence); + const queryEmbedding = toVector(await model.forward(q, 'query')); setEmbeddingTime(Date.now() - start); - const matches = sentencesWithEmbeddings.map( - ({ sentence, embedding }) => ({ + const ranked = corpusEmbeddings + .map(({ sentence, embedding }) => ({ sentence, - similarity: dotProduct(inputEmbedding, embedding), - }) - ); - matches.sort((a, b) => b.similarity - a.similarity); - setTopMatches(matches.slice(0, 3)); - } catch (e) { - setError(e instanceof Error ? e.message : String(e)); - } - }; - - const addToSentences = async () => { - if (!model.isReady || !inputSentence.trim()) return; - - try { - const start = Date.now(); - const embedding = await model.forward(inputSentence); - setEmbeddingTime(Date.now() - start); - setSentencesWithEmbeddings((prev) => [ - ...prev, - { sentence: inputSentence, embedding }, - ]); - } catch (e) { - setError(e instanceof Error ? e.message : String(e)); - } - - setInputSentence(''); - setTopMatches([]); - }; - - const clearList = async () => { - if (!model.isReady) return; - try { - setSentencesWithEmbeddings([]); + similarity: dotProduct(queryEmbedding, embedding), + })) + .sort((a, b) => b.similarity - a.similarity); + setResults(ranked); } catch (e) { setError(e instanceof Error ? e.message : String(e)); } @@ -158,6 +181,11 @@ function TextEmbeddingsScreen() { return model.isGenerating ? 'Generating...' : 'Model is ready'; }; + // Chips/examples just need a ready, indexed model; the Search button also + // needs a non-empty typed query. + const ready = model.isReady && !indexing && corpusEmbeddings.length > 0; + const canSearch = ready && !!query.trim(); + return ( - Text Embeddings Playground + Semantic Search {getModelStatusText()} { setSelectedModel(m); - setSentencesWithEmbeddings([]); - setTopMatches([]); + setCorpusEmbeddings([]); + setResults([]); + setQuery(''); }} /> setError(null)} /> - List of Existing Sentences - {sentencesWithEmbeddings.map((item, index) => ( - - - {item.sentence} - - ))} - - - Try Your Sentence + + Search the corpus ({CORPUS.length} sentences) + + + Ranks every sentence by meaning. Ask a full question — tap an + example or type your own. + + + {EXAMPLE_QUERIES.map((q) => ( + runSearch(q)} + > + {q} + + ))} + runSearch()} + returnKeyType="search" /> - - runSearch()} + style={[ + styles.buttonPrimary, + !canSearch && styles.buttonDisabled, + ]} + disabled={!canSearch} + > + + - - - Find Similar - - - - - - - Add to List - - - - - - Clear List - - - - + {indexing ? 'Indexing corpus…' : 'Search'} + + {embeddingTime !== null && ( - Embedding time: {embeddingTime} ms + Query embedded in {embeddingTime} ms )} - {topMatches.length > 0 && ( - - Top Matches - {topMatches.map((item, index) => ( - - {item.sentence} ({item.similarity.toFixed(2)}) - - ))} - - )} + + {results.length > 0 && ( + + Results + {results.map((item, index) => ( + + ))} + + )} ); } +// One ranked result with a similarity bar. The bar is scaled relative to the +// top hit so the ranking is visually obvious; the raw cosine is shown too. +function ResultRow({ + sentence, + similarity, + best, + rank, +}: { + sentence: string; + similarity: number; + best: number; + rank: number; +}) { + const fraction = best > 0 ? Math.max(0, similarity / best) : 0; + return ( + + + {sentence} + {similarity.toFixed(2)} + + + + + + ); +} + const styles = StyleSheet.create({ container: { flex: 1, @@ -323,11 +349,68 @@ const styles = StyleSheet.create({ marginBottom: 12, color: '#1E293B', }, - sentenceText: { - fontSize: 14, + hint: { + fontSize: 13, + color: '#64748B', + marginBottom: 12, + lineHeight: 18, + }, + chipRow: { + flexDirection: 'row', + flexWrap: 'wrap', + gap: 8, + marginBottom: 12, + }, + chip: { + backgroundColor: '#EEF2FF', + borderColor: '#C7D2FE', + borderWidth: 1, + borderRadius: 16, + paddingHorizontal: 12, + paddingVertical: 6, + }, + chipDisabled: { + opacity: 0.4, + }, + chipText: { + fontSize: 13, + color: 'navy', + }, + resultRow: { + marginBottom: 14, + }, + resultHeader: { + flexDirection: 'row', + justifyContent: 'space-between', + alignItems: 'flex-start', marginBottom: 6, + gap: 8, + }, + resultText: { + flex: 1, + fontSize: 14, color: '#334155', }, + resultScore: { + fontSize: 14, + fontWeight: '600', + color: '#0F172A', + fontVariant: ['tabular-nums'], + }, + barTrack: { + height: 8, + borderRadius: 4, + backgroundColor: '#E2E8F0', + overflow: 'hidden', + }, + barFill: { + height: '100%', + borderRadius: 4, + backgroundColor: '#94A3B8', + }, + barFillTop: { + backgroundColor: 'navy', + }, input: { backgroundColor: '#F1F5F9', borderRadius: 10, @@ -338,17 +421,8 @@ const styles = StyleSheet.create({ minHeight: 40, textAlignVertical: 'top', }, - buttonContainer: { - width: '100%', - gap: 10, - }, - buttonGroup: { - flexDirection: 'row', - justifyContent: 'space-between', - gap: 10, - }, buttonPrimary: { - flex: 1, + width: '100%', backgroundColor: 'navy', padding: 12, borderRadius: 10, @@ -356,17 +430,6 @@ const styles = StyleSheet.create({ alignItems: 'center', justifyContent: 'center', }, - buttonSecondary: { - flex: 1, - backgroundColor: 'transparent', - borderWidth: 2, - borderColor: 'navy', - padding: 12, - borderRadius: 10, - flexDirection: 'row', - alignItems: 'center', - justifyContent: 'center', - }, buttonDisabled: { backgroundColor: '#f0f0f0', borderColor: '#d3d3d3', @@ -376,17 +439,9 @@ const styles = StyleSheet.create({ textAlign: 'center', fontWeight: '500', }, - buttonTextOutline: { - color: 'navy', - textAlign: 'center', - fontWeight: '500', - }, buttonTextDisabled: { color: 'gray', }, - topMatchesContainer: { - marginTop: 20, - }, statsText: { fontSize: 13, color: '#64748B', diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp index 76e0fb90c7..3315baa2dd 100644 --- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp @@ -46,6 +46,25 @@ std::vector TokenizerModule::encode(std::string s) const { return encodeResult.get(); } +std::vector +TokenizerModule::encodeWithSpecialTokens(std::string s) const { + if (!tokenizer) { + THROW_NOT_LOADED_ERROR(); + } + + // Passing non-zero bos/eos makes HFTokenizer run the tokenizer.json + // post_processor with add_special_token=true (the underlying encode treats + // these as a flag, not a literal count, when a post_processor is defined). + auto encodeResult = tokenizer->encode(s, /*bos=*/1, /*eos=*/1); + if (!encodeResult.ok()) { + throw RnExecutorchError( + RnExecutorchErrorCode::TokenizerError, + "Unexpected issue occurred while encoding: " + + std::to_string(static_cast(encodeResult.error()))); + } + return encodeResult.get(); +} + std::string TokenizerModule::decode(std::vector vec, bool skipSpecialTokens) const { if (!tokenizer) { diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h index 3c90b25557..a511340af6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h +++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h @@ -13,6 +13,12 @@ class TokenizerModule { std::shared_ptr callInvoker); [[nodiscard("Registered non-void function")]] std::vector encode(std::string s) const; + // Like encode, but applies the tokenizer.json post_processor (e.g. + // TemplateProcessing that prepends BOS). Needed by models whose pooling + // depends on the BOS/CLS token (e.g. CLS-pooled text embeddings). Not JS- + // bound; encode() keeps its single-arg signature for the JS API. + [[nodiscard("Registered non-void function")]] std::vector + encodeWithSpecialTokens(std::string s) const; [[nodiscard("Registered non-void function")]] std::string decode(std::vector vec, bool skipSpecialTokens) const; [[nodiscard("Registered non-void function")]] std::string diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index e4209b2f79..8e211f0028 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -707,6 +708,35 @@ getJsiValue(const models::style_transfer::PixelDataResult &result, return obj; } +// Text embedding output: a [numTokens, embeddingDim] fp32 matrix + input token +// ids. Pooled models give numTokens == 1; multi-vector give the full sequence. +// The TS layer reduces to a single vector or keeps the matrix per model config. +inline jsi::Value +getJsiValue(const models::embeddings::EmbeddingResult &result, + jsi::Runtime &runtime) { + jsi::Object obj(runtime); + + auto arrayBuffer = jsi::ArrayBuffer(runtime, result.dataPtr); + auto float32ArrayCtor = + runtime.global().getPropertyAsFunction(runtime, "Float32Array"); + auto float32Array = + float32ArrayCtor.callAsConstructor(runtime, arrayBuffer) + .getObject(runtime); + obj.setProperty(runtime, "dataPtr", float32Array); + + obj.setProperty(runtime, "numTokens", jsi::Value(result.numTokens)); + obj.setProperty(runtime, "embeddingDim", jsi::Value(result.embeddingDim)); + + auto idsArray = jsi::Array(runtime, result.tokenIds.size()); + for (size_t i = 0; i < result.tokenIds.size(); ++i) { + idsArray.setValueAtIndex( + runtime, i, jsi::Value(static_cast(result.tokenIds[i]))); + } + obj.setProperty(runtime, "tokenIds", idsArray); + + return obj; +} + inline jsi::Value getJsiValue( const rnexecutorch::models::semantic_segmentation::SegmentationResult &result, diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/Types.h new file mode 100644 index 0000000000..f2de1e899a --- /dev/null +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/Types.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include +#include +#include + +namespace rnexecutorch::models::embeddings { + +// Text embedding output as a [numTokens, embeddingDim] fp32 matrix. Pooled +// single-vector models output numTokens == 1 (the exported graph pools + L2- +// normalizes); multi-vector (late-interaction / ColBERT) models output +// numTokens == sequence length. The TS layer reduces to a single vector or +// keeps the per-token matrix based on the model's config. `tokenIds` are the +// input ids (used JS-side for late-interaction skiplist masking). +struct EmbeddingResult { + std::shared_ptr dataPtr; + int32_t numTokens; + int32_t embeddingDim; + std::vector tokenIds; +}; + +} // namespace rnexecutorch::models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp index ba2c3243b2..d673f0ac87 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp @@ -16,7 +16,10 @@ TextEmbeddings::TextEmbeddings(const std::string &modelSource, std::make_unique(tokenizerSource, callInvoker)) {} TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) { - auto inputIds = tokenizer->encode(input); + // Apply the tokenizer's post_processor so declared special tokens (e.g. a + // BOS prepended via TemplateProcessing) are added. CLS-pooled embedding + // models read position 0, so a missing BOS corrupts the pooled vector. + auto inputIds = tokenizer->encodeWithSpecialTokens(input); // Tokenizers-cpp return tokens as int32, but text embedding models require // int64 as input std::vector inputIds64; @@ -40,8 +43,7 @@ void TextEmbeddings::unload() noexcept { BaseModel::unload(); } -std::shared_ptr -TextEmbeddings::generate(const std::string input) { +EmbeddingResult TextEmbeddings::generate(const std::string input) { std::scoped_lock lock(inference_mutex_); auto preprocessed = preprocess(input); @@ -58,7 +60,19 @@ TextEmbeddings::generate(const std::string input) { auto forwardResult = BaseModel::forward({tokenIds, attnMask}); CHECK_OK_OR_THROW_FORWARD_ERROR(forwardResult); - return BaseEmbeddings::postprocess(forwardResult); + // Output is [1, numTokens, embeddingDim] (numTokens == 1 for pooled models, + // == sequence length for multi-vector models). Return the raw matrix + the + // input ids; the TS layer reduces to a single vector or keeps the matrix. + auto out = forwardResult->at(0).toTensor(); + auto sizes = out.sizes(); + + EmbeddingResult result; + result.dataPtr = std::make_shared(out.const_data_ptr(), + out.nbytes()); + result.numTokens = static_cast(sizes[sizes.size() - 2]); + result.embeddingDim = static_cast(sizes[sizes.size() - 1]); + result.tokenIds = std::move(preprocessed.inputIds); + return result; } } // namespace rnexecutorch::models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h index 93d0988c04..cb6059b96e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace rnexecutorch { namespace models::embeddings { @@ -18,8 +19,11 @@ class TextEmbeddings final : public BaseEmbeddings { TextEmbeddings(const std::string &modelSource, const std::string &tokenizerSource, std::shared_ptr callInvoker); - [[nodiscard( - "Registered non-void function")]] std::shared_ptr + // Returns the raw [numTokens, embeddingDim] output. Pooled models give + // numTokens == 1; multi-vector (late-interaction) models give the full + // sequence. The TS layer reduces to a single vector or keeps the matrix + // based on the model's config. + [[nodiscard("Registered non-void function")]] EmbeddingResult generate(const std::string input); void unload() noexcept; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp index 68a9a9fef4..6abbccb9c6 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp @@ -16,9 +16,12 @@ Encoder::Encoder(const std::string &tokenizerSource, encoderSource, tokenizerSource, callInvoker)) {} std::vector Encoder::generate(std::string input) { - std::shared_ptr embeddingsText = encoder->generate(input); + // TextEmbeddings returns the raw [numTokens, embeddingDim] matrix; this + // encoder pools/uses the flat fp32 buffer directly (dataPtr). + std::shared_ptr embeddingsText = + encoder->generate(input).dataPtr; std::shared_ptr embeddingsUncond = - encoder->generate(std::string(constants::kBosToken)); + encoder->generate(std::string(constants::kBosToken)).dataPtr; assert(embeddingsText->size() == embeddingsUncond->size()); size_t embeddingsSize = embeddingsText->size() / sizeof(float); diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts index eb0c98dae7..cb06ccb308 100644 --- a/packages/react-native-executorch/src/constants/modelRegistry.ts +++ b/packages/react-native-executorch/src/constants/modelRegistry.ts @@ -198,6 +198,7 @@ function pair( return variant({ xnnpack: { base: baseC, quant: quantC } }); } + // TTS presets bundle model + voice + phonemizer in a single config; they // don't share the `{ modelName: string }` shape of the rest of the registry, // and have no quant/backend axis. Expose them as a plain `() => Config` @@ -260,6 +261,52 @@ const GEMMA4_E2B_MM_VARIANTS = { }, }; +// Asymmetric query/document prompts the LFM models are trained with. +// forward(text, role) auto-prepends these. +const LFM_EMBEDDING_PROMPTS = { query: 'query: ', document: 'document: ' }; +const LFM_COLBERT_PROMPTS = { query: '[Q] ', document: '[D] ' }; + +const LFM2_5_EMBEDDING_350M_VARIANTS = { + mlx: { + base: { + modelName: 'lfm2-5-embedding-350m' as const, + modelSource: M.LFM2_5_EMBEDDING_350M_MLX_MODEL, + tokenizerSource: M.LFM2_5_EMBEDDING_350M_TOKENIZER, + prompts: LFM_EMBEDDING_PROMPTS, + }, + }, + xnnpack: { + base: { + modelName: 'lfm2-5-embedding-350m' as const, + modelSource: M.LFM2_5_EMBEDDING_350M_XNNPACK_MODEL, + tokenizerSource: M.LFM2_5_EMBEDDING_350M_TOKENIZER, + prompts: LFM_EMBEDDING_PROMPTS, + }, + }, +}; + +// LFM2.5-ColBERT is a plain text-embedding model from the library's POV: it +// returns per-token vectors. Late-interaction scoring (MaxSim / skiplist) is +// the consumer's concern; the library only auto-applies the role prompts. +const LFM2_5_COLBERT_350M_VARIANTS = { + mlx: { + base: { + modelName: 'lfm2-5-colbert-350m' as const, + modelSource: M.LFM2_5_COLBERT_350M_MLX_MODEL, + tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER, + prompts: LFM_COLBERT_PROMPTS, + }, + }, + xnnpack: { + base: { + modelName: 'lfm2-5-colbert-350m' as const, + modelSource: M.LFM2_5_COLBERT_350M_XNNPACK_MODEL, + tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER, + prompts: LFM_COLBERT_PROMPTS, + }, + }, +}; + const EFFICIENTNET_V2_S_VARIANTS = { xnnpack: { base: { @@ -742,6 +789,17 @@ export const models = { M.PARAPHRASE_MULTILINGUAL_MINILM_L12_V2_QUANTIZED ), clip_vit_base_patch32_text: base(M.CLIP_VIT_BASE_PATCH32_TEXT), + lfm2_5_embedding_350m: variant(LFM2_5_EMBEDDING_350M_VARIANTS, { + ios: 'mlx', + android: 'xnnpack', + }), + // ColBERT (late-interaction): forward() returns per-token vectors. Scoring + // (markers / MaxSim / skiplist) is the consumer's concern — see the + // colbert example screen for a reference implementation. + lfm2_5_colbert_350m: variant(LFM2_5_COLBERT_350M_VARIANTS, { + ios: 'mlx', + android: 'xnnpack', + }), }, image_embedding: { clip_vit_base_patch32_image: pair( diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 0e36f812ff..7c4b73483c 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -1197,6 +1197,21 @@ const PARAPHRASE_MULTILINGUAL_MINILM_L12_V2_QUANTIZED_MODEL = `${URL_PREFIX}-par const PARAPHRASE_MULTILINGUAL_MINILM_L12_V2_TOKENIZER = `${URL_PREFIX}-paraphrase-multilingual-MiniLM-L12-v2/${PREVIOUS_VERSION_TAG}/tokenizer.json`; const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`; const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/tokenizer.json`; +// LFM2.5-Embedding-350M: XNNPACK 8da4w (Android/CPU), MLX int4 bf16 (iOS GPU, +// physical device only). The exported graph bakes in CLS pooling + L2 norm. +// Requires the runner to add the BOS special token (CLS-pooled at index 0). +export const LFM2_5_EMBEDDING_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/xnnpack/lfm_2_5_embedding_350m_xnnpack_8da4w.pte`; +export const LFM2_5_EMBEDDING_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/mlx/lfm_2_5_embedding_350m_mlx_int4.pte`; +export const LFM2_5_EMBEDDING_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/tokenizer.json`; +// LFM2.5-ColBERT-350M: late-interaction multi-vector retriever (per-token +// [S,128]). Same bidirectional backbone as the embedding model + a Linear +// 1024->128 head. forward() returns per-token vectors; late-interaction +// scoring (MaxSim) is the consumer's concern (see the colbert example). +// NOTE: pinned to `resolve/main` for testing — the v0.9.0 tag does not exist +// on this repo yet. Switch to `${PREVIOUS_VERSION_TAG}` once the tag is cut. +export const LFM2_5_COLBERT_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/xnnpack/lfm_2_5_colbert_350m_xnnpack_8da4w.pte`; +export const LFM2_5_COLBERT_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/mlx/lfm_2_5_colbert_350m_mlx_int4.pte`; +export const LFM2_5_COLBERT_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/tokenizer.json`; /** * @category Models - Text Embeddings diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts index 31ee179925..b4679b4237 100644 --- a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts +++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts @@ -1,20 +1,25 @@ import { TextEmbeddingsModule } from '../../modules/natural_language_processing/TextEmbeddingsModule'; import { useModuleFactory } from '../useModuleFactory'; import { + AnyTextEmbeddingsModel, + EmbeddingRole, + ForwardFn, TextEmbeddingsType, TextEmbeddingsProps, } from '../../types/textEmbeddings'; /** - * React hook for managing a Text Embeddings model instance. + * React hook for a Text Embeddings model. * @category Hooks - * @param TextEmbeddingsProps - Configuration object containing `model` source and optional `preventLoad` flag. - * @returns Ready to use Text Embeddings model. + * @param TextEmbeddingsProps - `model` source + optional `preventLoad`. + * @returns Ready to use embeddings model. `forward` returns the raw + * [numTokens, embeddingDim] result; use `toVector` for a single vector. + * Models with prompts require a `role` ('query' | 'document') on `forward`. */ -export const useTextEmbeddings = ({ +export const useTextEmbeddings = ({ model, preventLoad = false, -}: TextEmbeddingsProps): TextEmbeddingsType => { +}: TextEmbeddingsProps): TextEmbeddingsType => { const { error, isReady, isGenerating, downloadProgress, runForward } = useModuleFactory({ factory: (config, onProgress) => @@ -24,7 +29,8 @@ export const useTextEmbeddings = ({ preventLoad, }); - const forward = (input: string) => runForward((inst) => inst.forward(input)); + const forward = ((input: string, role?: EmbeddingRole) => + runForward((inst) => inst.forward(input, role))) as ForwardFn; return { error, isReady, isGenerating, downloadProgress, forward }; }; diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts index 1f190d41f5..34cdf97d8d 100644 --- a/packages/react-native-executorch/src/index.ts +++ b/packages/react-native-executorch/src/index.ts @@ -212,6 +212,7 @@ export * from './utils/ResourceFetcher'; export * from './utils/ResourceFetcherUtils'; export * from './utils/BaseResourceFetcherClass'; export * from './utils/llm'; +export * from './utils/textEmbeddings'; export * from './common/Logger'; export * from './utils/llms/context_strategy'; export * from './utils/segmentAnythingPrompts'; diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts index 27b0e59ceb..d9ab4f45da 100644 --- a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts +++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts @@ -1,5 +1,11 @@ import { ResourceSource } from '../../types/common'; -import { TextEmbeddingsModelName } from '../../types/textEmbeddings'; +import { + AnyTextEmbeddingsModel, + EmbeddingPrompts, + EmbeddingResult, + EmbeddingRole, + TextEmbeddingsModelName, +} from '../../types/textEmbeddings'; import { ResourceFetcher } from '../../utils/ResourceFetcher'; import { BaseModule } from '../BaseModule'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; @@ -7,27 +13,28 @@ import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; import { Logger } from '../../common/Logger'; /** - * Module for generating text embeddings from input text. + * Module for text embeddings. Returns the raw [numTokens, embeddingDim] output + * for any model — pooled (numTokens === 1) or multi-vector. Scoring / pooling + * is the consumer's concern (see the `toVector` util for the single-vector + * common case). * @category Typescript API */ export class TextEmbeddingsModule extends BaseModule { - private constructor(nativeModule: unknown) { + private prompts?: EmbeddingPrompts; + + private constructor(nativeModule: unknown, prompts?: EmbeddingPrompts) { super(); this.nativeModule = nativeModule; + this.prompts = prompts; } /** * Creates a text embeddings instance for a built-in model. - * @param namedSources - An object specifying which built-in model to load and where to fetch it from. - * @param onDownloadProgress - Optional callback to monitor download progress, receiving a value between 0 and 1. - * @returns A Promise resolving to a `TextEmbeddingsModule` instance. + * @param namedSources - The model + tokenizer sources. + * @param onDownloadProgress - Optional download progress callback (0..1). */ static async fromModelName( - namedSources: { - modelName: TextEmbeddingsModelName; - modelSource: ResourceSource; - tokenizerSource: ResourceSource; - }, + namedSources: AnyTextEmbeddingsModel, onDownloadProgress: (progress: number) => void = () => {} ): Promise { try { @@ -41,7 +48,8 @@ export class TextEmbeddingsModule extends BaseModule { throw new RnExecutorchError(RnExecutorchErrorCode.DownloadInterrupted); } return new TextEmbeddingsModule( - await global.loadTextEmbeddings(modelPath, tokenizerPath) + await global.loadTextEmbeddings(modelPath, tokenizerPath), + namedSources.prompts ); } catch (error) { Logger.error('Load failed:', error); @@ -50,14 +58,9 @@ export class TextEmbeddingsModule extends BaseModule { } /** - * Creates a text embeddings instance with a user-provided model binary and tokenizer. - * Use this when working with a custom-exported model that is not one of the built-in presets. - * @remarks The native model contract for this method is not formally defined and may change - * between releases. Refer to the native source code for the current expected tensor interface. - * @param modelSource - A fetchable resource pointing to the model binary. - * @param tokenizerSource - A fetchable resource pointing to the tokenizer file. - * @param onDownloadProgress - Optional callback to monitor download progress, receiving a value between 0 and 1. - * @returns A Promise resolving to a `TextEmbeddingsModule` instance. + * Creates a text embeddings instance from a custom model binary + tokenizer. + * @remarks The native tensor contract is not formally guaranteed across + * releases. */ static fromCustomModel( modelSource: ResourceSource, @@ -75,13 +78,24 @@ export class TextEmbeddingsModule extends BaseModule { } /** - * Executes the model's forward pass to generate an embedding for the provided text. - * @param input - The text string to embed. - * @returns A Promise resolving to a `Float32Array` containing the embedding vector. + * Embed text. Returns the raw [numTokens, embeddingDim] result. + * @param input - The text to embed. + * @param role - Optional 'query' | 'document'; prepends the model's prompt + * for that role when configured (no-op otherwise). */ - async forward(input: string): Promise { + async forward( + input: string, + role?: EmbeddingRole + ): Promise { if (this.nativeModule == null) throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded); - return new Float32Array(await this.nativeModule.generate(input)); + const prefix = (role && this.prompts?.[role]) || ''; + const res = await this.nativeModule.generate(prefix + input); + return { + vectors: new Float32Array(res.dataPtr), + numTokens: res.numTokens, + embeddingDim: res.embeddingDim, + tokenIds: res.tokenIds, + }; } } diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts index d9cd120e26..47e056794f 100644 --- a/packages/react-native-executorch/src/types/textEmbeddings.ts +++ b/packages/react-native-executorch/src/types/textEmbeddings.ts @@ -12,65 +12,108 @@ export type TextEmbeddingsModelName = | 'multi-qa-mpnet-base-dot-v1' | 'distiluse-base-multilingual-cased-v2-8da4w' | 'paraphrase-multilingual-minilm-l12-v2-quantized' - | 'clip-vit-base-patch32-text'; + | 'clip-vit-base-patch32-text' + | 'lfm2-5-embedding-350m' + | 'lfm2-5-colbert-350m'; + +/** + * Raw text embedding output: a [numTokens, embeddingDim] fp32 matrix (row- + * major) plus the input token ids. Single-vector (pooled) models give + * numTokens === 1 — use `toVector` for that common case. Multi-vector (late- + * interaction, e.g. ColBERT) models give the full per-token sequence; scoring + * (e.g. MaxSim) is the consumer's concern. + * @category Types + */ +export interface EmbeddingResult { + /** Flat [numTokens * embeddingDim] fp32 vectors (row-major). */ + vectors: Float32Array; + /** Number of token rows (1 for pooled models). */ + numTokens: number; + /** Per-token vector dimension. */ + embeddingDim: number; + /** Input token ids per row. */ + tokenIds: number[]; +} + +/** + * Role for `forward`. Some models are trained with asymmetric query/document + * prompts (e.g. LFM2.5 uses `query: `/`document: `, ColBERT uses `[Q] `/`[D] `). + * Passing a role auto-prepends the model's configured prompt for that role. + * @category Types + */ +export type EmbeddingRole = 'query' | 'document'; + +/** + * Asymmetric prompts a model is trained with. When a model config carries + * these, `forward` REQUIRES a `role` so the matching prompt is always applied + * (forgetting it would silently embed raw text and wreck asymmetric retrieval). + * @category Types + */ +export interface EmbeddingPrompts { + query: string; + document: string; +} + +/** A standard (symmetric) embedding model — `forward(text)`, no role. */ +export interface TextEmbeddingsModel { + modelName: TextEmbeddingsModelName; + modelSource: ResourceSource; + tokenizerSource: ResourceSource; + prompts?: undefined; +} + +/** + * An asymmetric model with query/document prompts — `forward(text, role)` with + * role REQUIRED. + */ +export interface PromptedTextEmbeddingsModel { + modelName: TextEmbeddingsModelName; + modelSource: ResourceSource; + tokenizerSource: ResourceSource; + prompts: EmbeddingPrompts; +} + +export type AnyTextEmbeddingsModel = + | TextEmbeddingsModel + | PromptedTextEmbeddingsModel; + +/** + * `forward`'s signature, discriminated by the model: prompted models require a + * `role` argument; standard models take none. + */ +export type ForwardFn = + M extends PromptedTextEmbeddingsModel + ? (input: string, role: EmbeddingRole) => Promise + : (input: string) => Promise; /** * Props for the useTextEmbeddings hook. * @category Types - * @property {object} model - An object containing the model configuration. - * @property {TextEmbeddingsModelName} model.modelName - Unique name identifying the model. - * @property {ResourceSource} model.modelSource - The source of the text embeddings model binary. - * @property {ResourceSource} model.tokenizerSource - The source of the tokenizer JSON file. - * @property {boolean} [preventLoad] - Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook. */ -export interface TextEmbeddingsProps { - model: { - /** - * The unique name of the text embeddings model. - */ - modelName: TextEmbeddingsModelName; - /** - * The source of the text embeddings model binary. - */ - modelSource: ResourceSource; - /** - * The source of the tokenizer JSON file. - */ - tokenizerSource: ResourceSource; - }; +export interface TextEmbeddingsProps< + M extends AnyTextEmbeddingsModel = AnyTextEmbeddingsModel, +> { + model: M; preventLoad?: boolean; } /** - * React hook state and methods for managing a Text Embeddings model instance. + * React hook state and methods for a Text Embeddings model instance. * @category Types */ -export interface TextEmbeddingsType { - /** - * Contains the error message if the model failed to load or during inference. - */ +export interface TextEmbeddingsType< + M extends AnyTextEmbeddingsModel = AnyTextEmbeddingsModel, +> { error: null | RnExecutorchError; - - /** - * Indicates whether the embeddings model has successfully loaded and is ready for inference. - */ isReady: boolean; - - /** - * Indicates whether the model is currently generating embeddings. - */ isGenerating: boolean; - - /** - * Tracks the progress of the model download process (value between 0 and 1). - */ downloadProgress: number; /** - * Runs the text embeddings model on the provided input string. - * @param input - The text string to embed. - * @returns A promise resolving to a Float32Array containing the vector embeddings. - * @throws {RnExecutorchError} If the model is not loaded or is currently processing another request. + * Embed text into a [numTokens, embeddingDim] result. Pooled models return + * numTokens === 1 (use `toVector`); multi-vector models return the full + * per-token sequence. Models with prompts require a `role` + * ('query' | 'document'); standard models take none. */ - forward(input: string): Promise; + forward: ForwardFn; } diff --git a/packages/react-native-executorch/src/utils/textEmbeddings.ts b/packages/react-native-executorch/src/utils/textEmbeddings.ts new file mode 100644 index 0000000000..c396145489 --- /dev/null +++ b/packages/react-native-executorch/src/utils/textEmbeddings.ts @@ -0,0 +1,74 @@ +import { EmbeddingResult } from '../types/textEmbeddings'; + +/** + * Get the single pooled embedding vector from a result. Convenience for the + * common single-vector case: the exported graph pools + L2-normalizes to a + * [1, embeddingDim] output, so this returns row 0. + * + * For multi-vector (late-interaction) models, prefer the full per-token + * vectors (`getTokenVectors`); row 0 alone is not a meaningful sentence + * embedding there. + * + * @category Utils + */ +export function toVector(result: EmbeddingResult): Float32Array { + return result.vectors.slice(0, result.embeddingDim); +} + +/** + * Split a result's flat `vectors` buffer into per-token rows + * (`numTokens` arrays of length `embeddingDim`). Useful for inspecting or + * storing individual token vectors (e.g. a multi-vector vector DB). + * + * @category Utils + */ +export function getTokenVectors(result: EmbeddingResult): Float32Array[] { + const { vectors, numTokens, embeddingDim } = result; + const rows: Float32Array[] = []; + for (let i = 0; i < numTokens; i++) { + rows.push(vectors.subarray(i * embeddingDim, (i + 1) * embeddingDim)); + } + return rows; +} + +/** + * Late-interaction MaxSim score between a query and a document encoding: + * + * score = Σ_q max_d ( q · d ) + * + * For each query token, takes the max dot product over all (non-skiplist) + * document tokens, then sums across query tokens. Per-token vectors are + * L2-normalized by the graph, so a dot product is a cosine. + * + * `skiplistIds` (e.g. punctuation token ids) are excluded from the document + * side, matching ColBERT's document skiplist. Pass `[]` to score every token. + * + * @category Utils + */ +export function maxSim( + query: EmbeddingResult, + doc: EmbeddingResult, + skiplistIds: number[] = [] +): number { + const dim = query.embeddingDim; + const q = query.vectors; + const d = doc.vectors; + const skip = new Set(skiplistIds); + + let score = 0; + for (let qi = 0; qi < query.numTokens; qi++) { + const qOff = qi * dim; + let best = -Infinity; + for (let di = 0; di < doc.numTokens; di++) { + if (skip.has(doc.tokenIds[di]!)) continue; + const dOff = di * dim; + let dot = 0; + for (let k = 0; k < dim; k++) { + dot += (q[qOff + k] ?? 0) * (d[dOff + k] ?? 0); + } + if (dot > best) best = dot; + } + if (best !== -Infinity) score += best; + } + return score; +} From b2e7e78917bd846773efd6d1f246869ad18f07e9 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 22 Jun 2026 14:34:58 +0200 Subject: [PATCH 2/7] fix: address review on text-embeddings/ColBERT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Migrate the segment-anything (SAM) screen to toVector(forward()) — its CLIP-text path broke when forward started returning EmbeddingResult. - Update the C++ TextEmbeddings integration test for the EmbeddingResult return type (was still using the old OwningArrayBuffer pointer API). - Guard the per-token invariant: throw InvalidModelOutput if output rows != input token count (pooled numTokens==1 exempt), so skiplist masking can't silently misalign if a graph pads/truncates. - Dedup encode()/encodeWithSpecialTokens() into a shared encodeImpl. - Drop the redundant Float32Array copy at the JSI boundary; document the getTokenVectors view lifetime; remove dead BaseEmbeddings::postprocess. Authored with Claude. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../app/segment_anything/index.tsx | 3 +- .../common/rnexecutorch/TokenizerModule.cpp | 34 ++++++------------- .../common/rnexecutorch/TokenizerModule.h | 5 +++ .../models/embeddings/BaseEmbeddings.cpp | 10 ------ .../models/embeddings/BaseEmbeddings.h | 4 --- .../models/embeddings/text/TextEmbeddings.cpp | 15 ++++++++ .../tests/integration/TextEmbeddingsTest.cpp | 30 ++++++++-------- .../src/constants/modelRegistry.ts | 1 - .../src/constants/modelUrls.ts | 8 ++--- .../TextEmbeddingsModule.ts | 4 ++- .../src/utils/textEmbeddings.ts | 5 +++ 11 files changed, 59 insertions(+), 60 deletions(-) diff --git a/apps/computer-vision/app/segment_anything/index.tsx b/apps/computer-vision/app/segment_anything/index.tsx index ac7bbd06b5..0a7af9e1ed 100644 --- a/apps/computer-vision/app/segment_anything/index.tsx +++ b/apps/computer-vision/app/segment_anything/index.tsx @@ -25,6 +25,7 @@ import { useInstanceSegmentation, useImageEmbeddings, useTextEmbeddings, + toVector, InstanceSegmentationModelSources, SegmentedInstance, FastSAMLabel, @@ -208,7 +209,7 @@ export default function SegmentAnythingScreen() { instanceEmbeddingsRef.current = embeddings; setEmbeddingProgress(null); } - const textEmb = await clipText.forward(textPrompt); + const textEmb = toVector(await clipText.forward(textPrompt)); const match = selectByText( instances, instanceEmbeddingsRef.current, diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp index 3315baa2dd..dfd9243c48 100644 --- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp @@ -26,17 +26,15 @@ TokenizerModule::TokenizerModule( memorySizeLowerBound = std::filesystem::file_size(modelPath); } -std::vector TokenizerModule::encode(std::string s) const { +// When the tokenizer.json defines a post_processor, the underlying HFTokenizer +// treats non-zero bos/eos as a flag to run it with add_special_token=true (not +// a literal count). So bos=eos=0 skips special tokens; bos=eos=1 applies them. +std::vector TokenizerModule::encodeImpl(const std::string &s, + int8_t bos, int8_t eos) const { if (!tokenizer) { THROW_NOT_LOADED_ERROR(); } - - // If the used tokenizer.json has defined post_processor field, - // setting any of bos or eos arguments to value other than provided constant - // ( which is 0) will result in running the post_processor with - // 'add_special_token' flag - auto encodeResult = - tokenizer->encode(s, numOfAddedBoSTokens, numOfAddedEoSTokens); + auto encodeResult = tokenizer->encode(s, bos, eos); if (!encodeResult.ok()) { throw RnExecutorchError( RnExecutorchErrorCode::TokenizerError, @@ -46,23 +44,13 @@ std::vector TokenizerModule::encode(std::string s) const { return encodeResult.get(); } +std::vector TokenizerModule::encode(std::string s) const { + return encodeImpl(s, numOfAddedBoSTokens, numOfAddedEoSTokens); +} + std::vector TokenizerModule::encodeWithSpecialTokens(std::string s) const { - if (!tokenizer) { - THROW_NOT_LOADED_ERROR(); - } - - // Passing non-zero bos/eos makes HFTokenizer run the tokenizer.json - // post_processor with add_special_token=true (the underlying encode treats - // these as a flag, not a literal count, when a post_processor is defined). - auto encodeResult = tokenizer->encode(s, /*bos=*/1, /*eos=*/1); - if (!encodeResult.ok()) { - throw RnExecutorchError( - RnExecutorchErrorCode::TokenizerError, - "Unexpected issue occurred while encoding: " + - std::to_string(static_cast(encodeResult.error()))); - } - return encodeResult.get(); + return encodeImpl(s, /*bos=*/1, /*eos=*/1); } std::string TokenizerModule::decode(std::vector vec, diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h index a511340af6..09877dfc65 100644 --- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h +++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h @@ -30,6 +30,11 @@ class TokenizerModule { std::size_t getMemoryLowerBound() const noexcept; private: + // Shared encode implementation. bos/eos act as an add-special-tokens flag + // (not a literal count) when the tokenizer.json defines a post_processor. + std::vector encodeImpl(const std::string &s, int8_t bos, + int8_t eos) const; + std::unique_ptr tokenizer; std::size_t memorySizeLowerBound{0}; }; diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp index bf291136c1..e777be6704 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp @@ -1,19 +1,9 @@ #include "BaseEmbeddings.h" -#include - namespace rnexecutorch::models::embeddings { BaseEmbeddings::BaseEmbeddings(const std::string &modelSource, std::shared_ptr callInvoker) : BaseModel(modelSource, callInvoker) {} -std::shared_ptr -BaseEmbeddings::postprocess(const Result> &forwardResult) { - auto forwardResultTensor = forwardResult->at(0).toTensor(); - auto buffer = std::make_shared( - forwardResultTensor.const_data_ptr(), forwardResultTensor.nbytes()); - return buffer; -} - } // namespace rnexecutorch::models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h index 216d6bf8ce..4b37a3fe93 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h @@ -8,10 +8,6 @@ class BaseEmbeddings : public BaseModel { public: BaseEmbeddings(const std::string &modelSource, std::shared_ptr callInvoker); - -protected: - std::shared_ptr - postprocess(const Result> &forwardResult); }; }; // namespace rnexecutorch::models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp index d673f0ac87..26f3157690 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp @@ -72,6 +72,21 @@ EmbeddingResult TextEmbeddings::generate(const std::string input) { result.numTokens = static_cast(sizes[sizes.size() - 2]); result.embeddingDim = static_cast(sizes[sizes.size() - 1]); result.tokenIds = std::move(preprocessed.inputIds); + + // Invariant for multi-vector models: one output row per input token, so + // numTokens (from the output tensor) must equal tokenIds.size() (from the + // input). Consumers index tokenIds[i] per output row (e.g. skiplist masking), + // which silently breaks if the graph ever pads/truncates the sequence. + // (Pooled models legitimately collapse to numTokens == 1.) + if (result.numTokens != 1 && + result.numTokens != static_cast(result.tokenIds.size())) { + throw RnExecutorchError( + RnExecutorchErrorCode::InvalidModelOutput, + "Embedding output rows (" + std::to_string(result.numTokens) + + ") != input tokens (" + + std::to_string(result.tokenIds.size()) + + "); per-token tokenIds alignment is broken."); + } return result; } diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp index ff1abd4c30..0e0cc846b5 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp @@ -53,23 +53,23 @@ TEST(TextEmbeddingsGenerateTests, EmptyStringReturnsResults) { TextEmbeddings model(kValidTextEmbeddingsModelPath, kValidTextEmbeddingsTokenizerPath, nullptr); auto result = model.generate(""); - EXPECT_NE(result, nullptr); - EXPECT_GT(result->size(), 0u); + EXPECT_NE(result.dataPtr, nullptr); + EXPECT_GT(result.dataPtr->size(), 0u); } TEST(TextEmbeddingsGenerateTests, ValidTextReturnsResults) { TextEmbeddings model(kValidTextEmbeddingsModelPath, kValidTextEmbeddingsTokenizerPath, nullptr); auto result = model.generate("Hello, world!"); - EXPECT_NE(result, nullptr); - EXPECT_GT(result->size(), 0u); + EXPECT_NE(result.dataPtr, nullptr); + EXPECT_GT(result.dataPtr->size(), 0u); } TEST(TextEmbeddingsGenerateTests, ResultsHaveCorrectSize) { TextEmbeddings model(kValidTextEmbeddingsModelPath, kValidTextEmbeddingsTokenizerPath, nullptr); auto result = model.generate("This is a test sentence."); - size_t numFloats = result->size() / sizeof(float); + size_t numFloats = result.dataPtr->size() / sizeof(float); EXPECT_EQ(numFloats, kMiniLmEmbeddingDimensions); } @@ -78,8 +78,8 @@ TEST(TextEmbeddingsGenerateTests, ResultsAreNormalized) { kValidTextEmbeddingsTokenizerPath, nullptr); auto result = model.generate("The quick brown fox jumps over the lazy dog."); - const float *data = reinterpret_cast(result->data()); - size_t numFloats = result->size() / sizeof(float); + const float *data = reinterpret_cast(result.dataPtr->data()); + size_t numFloats = result.dataPtr->size() / sizeof(float); float sumOfSquares = 0.0f; for (size_t i = 0; i < numFloats; ++i) { @@ -94,8 +94,8 @@ TEST(TextEmbeddingsGenerateTests, ResultsContainValidValues) { kValidTextEmbeddingsTokenizerPath, nullptr); auto result = model.generate("Testing valid values."); - const float *data = reinterpret_cast(result->data()); - size_t numFloats = result->size() / sizeof(float); + const float *data = reinterpret_cast(result.dataPtr->data()); + size_t numFloats = result.dataPtr->size() / sizeof(float); for (size_t i = 0; i < numFloats; ++i) { EXPECT_FALSE(std::isnan(data[i])); @@ -110,9 +110,9 @@ TEST(TextEmbeddingsGenerateTests, DifferentTextProducesDifferentEmbeddings) { auto result1 = model.generate("Hello, world!"); auto result2 = model.generate("Goodbye, moon!"); - const float *data1 = reinterpret_cast(result1->data()); - const float *data2 = reinterpret_cast(result2->data()); - size_t numFloats = result1->size() / sizeof(float); + const float *data1 = reinterpret_cast(result1.dataPtr->data()); + const float *data2 = reinterpret_cast(result2.dataPtr->data()); + size_t numFloats = result1.dataPtr->size() / sizeof(float); bool allEqual = true; for (size_t i = 0; i < numFloats; ++i) { @@ -131,9 +131,9 @@ TEST(TextEmbeddingsGenerateTests, SimilarTextProducesSimilarEmbeddings) { auto result1 = model.generate("I love programming"); auto result2 = model.generate("I enjoy coding"); - const float *data1 = reinterpret_cast(result1->data()); - const float *data2 = reinterpret_cast(result2->data()); - size_t numFloats = result1->size() / sizeof(float); + const float *data1 = reinterpret_cast(result1.dataPtr->data()); + const float *data2 = reinterpret_cast(result2.dataPtr->data()); + size_t numFloats = result1.dataPtr->size() / sizeof(float); float dotProduct = 0.0f; for (size_t i = 0; i < numFloats; ++i) { diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts index cb06ccb308..f411631aac 100644 --- a/packages/react-native-executorch/src/constants/modelRegistry.ts +++ b/packages/react-native-executorch/src/constants/modelRegistry.ts @@ -198,7 +198,6 @@ function pair( return variant({ xnnpack: { base: baseC, quant: quantC } }); } - // TTS presets bundle model + voice + phonemizer in a single config; they // don't share the `{ modelName: string }` shape of the rest of the registry, // and have no quant/backend axis. Expose them as a plain `() => Config` diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 7c4b73483c..8fdebb1a6d 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -1207,11 +1207,9 @@ export const LFM2_5_EMBEDDING_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-embedding-3 // [S,128]). Same bidirectional backbone as the embedding model + a Linear // 1024->128 head. forward() returns per-token vectors; late-interaction // scoring (MaxSim) is the consumer's concern (see the colbert example). -// NOTE: pinned to `resolve/main` for testing — the v0.9.0 tag does not exist -// on this repo yet. Switch to `${PREVIOUS_VERSION_TAG}` once the tag is cut. -export const LFM2_5_COLBERT_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/xnnpack/lfm_2_5_colbert_350m_xnnpack_8da4w.pte`; -export const LFM2_5_COLBERT_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/mlx/lfm_2_5_colbert_350m_mlx_int4.pte`; -export const LFM2_5_COLBERT_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/tokenizer.json`; +export const LFM2_5_COLBERT_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/xnnpack/lfm_2_5_colbert_350m_xnnpack_8da4w.pte`; +export const LFM2_5_COLBERT_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/mlx/lfm_2_5_colbert_350m_mlx_int4.pte`; +export const LFM2_5_COLBERT_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/tokenizer.json`; /** * @category Models - Text Embeddings diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts index d9ab4f45da..c11b9c9aff 100644 --- a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts +++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts @@ -91,8 +91,10 @@ export class TextEmbeddingsModule extends BaseModule { throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded); const prefix = (role && this.prompts?.[role]) || ''; const res = await this.nativeModule.generate(prefix + input); + // res.dataPtr is already a Float32Array view over the owned native buffer + // (built at the JSI boundary), so use it directly — no extra copy. return { - vectors: new Float32Array(res.dataPtr), + vectors: res.dataPtr as Float32Array, numTokens: res.numTokens, embeddingDim: res.embeddingDim, tokenIds: res.tokenIds, diff --git a/packages/react-native-executorch/src/utils/textEmbeddings.ts b/packages/react-native-executorch/src/utils/textEmbeddings.ts index c396145489..da10d9aa08 100644 --- a/packages/react-native-executorch/src/utils/textEmbeddings.ts +++ b/packages/react-native-executorch/src/utils/textEmbeddings.ts @@ -20,6 +20,11 @@ export function toVector(result: EmbeddingResult): Float32Array { * (`numTokens` arrays of length `embeddingDim`). Useful for inspecting or * storing individual token vectors (e.g. a multi-vector vector DB). * + * The rows are zero-copy `subarray` VIEWS over `result.vectors` — valid only + * while that buffer is alive and not mutated. Copy them (e.g. `new + * Float32Array(row)`) before storing beyond the result's lifetime. (`toVector` + * by contrast returns an independent copy.) + * * @category Utils */ export function getTokenVectors(result: EmbeddingResult): Float32Array[] { From cf74973f94ada255bcc00eaa910b724ff50658c4 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 22 Jun 2026 14:50:16 +0200 Subject: [PATCH 3/7] refactor: make useTextEmbeddings.forward non-breaking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit forward(text) returns a single pooled Float32Array again for standard models — restoring the original API, so MiniLM/MPNet/CLIP/SAM consumers need no migration. The reduction (row 0 of the native [numTokens, embeddingDim] matrix) happens in the TS module, not at the call site. Multi-vector (late-interaction) models opt in via a `multiVector: true` config flag; for those, forward returns the full per-token EmbeddingResult so MaxSim/skiplist work. Return type is discriminated by the flag, and the role argument by `prompts` (required when prompted, none when not). Authored with Claude. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../app/segment_anything/index.tsx | 3 +- .../app/clip-embeddings/index.tsx | 3 +- .../app/text-embeddings/index.tsx | 14 ++-- .../src/constants/modelRegistry.ts | 2 + .../useTextEmbeddings.ts | 4 +- .../TextEmbeddingsModule.ts | 44 ++++++++---- .../src/types/textEmbeddings.ts | 70 ++++++++++--------- 7 files changed, 79 insertions(+), 61 deletions(-) diff --git a/apps/computer-vision/app/segment_anything/index.tsx b/apps/computer-vision/app/segment_anything/index.tsx index 0a7af9e1ed..ac7bbd06b5 100644 --- a/apps/computer-vision/app/segment_anything/index.tsx +++ b/apps/computer-vision/app/segment_anything/index.tsx @@ -25,7 +25,6 @@ import { useInstanceSegmentation, useImageEmbeddings, useTextEmbeddings, - toVector, InstanceSegmentationModelSources, SegmentedInstance, FastSAMLabel, @@ -209,7 +208,7 @@ export default function SegmentAnythingScreen() { instanceEmbeddingsRef.current = embeddings; setEmbeddingProgress(null); } - const textEmb = toVector(await clipText.forward(textPrompt)); + const textEmb = await clipText.forward(textPrompt); const match = selectByText( instances, instanceEmbeddingsRef.current, diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx index e0232d3440..02a8a9c656 100644 --- a/apps/text-embeddings/app/clip-embeddings/index.tsx +++ b/apps/text-embeddings/app/clip-embeddings/index.tsx @@ -16,7 +16,6 @@ import { models, useTextEmbeddings, useImageEmbeddings, - toVector, ImageEmbeddingsProps, } from 'react-native-executorch'; @@ -102,7 +101,7 @@ function ClipEmbeddingsScreen() { const txtStart = Date.now(); const scored: { label: string; similarity: number }[] = []; for (const label of labels) { - const textEmbedding = toVector(await textModel.forward(label)); + const textEmbedding = await textModel.forward(label); scored.push({ label, similarity: dotProduct(imageEmbedding, textEmbedding), diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx index 470094da02..8cb6777843 100644 --- a/apps/text-embeddings/app/text-embeddings/index.tsx +++ b/apps/text-embeddings/app/text-embeddings/index.tsx @@ -15,13 +15,12 @@ import { ModelPicker } from '../../components/ModelPicker'; import { models, useTextEmbeddings, - toVector, TextEmbeddingsProps, } from 'react-native-executorch'; const textEmbedding = models.text_embedding; -// Single-vector (pooled) models: forward() returns the raw result; toVector() -// gives the single embedding. The multi-vector ColBERT model has its own screen. +// Single-vector (pooled) models: forward() returns a Float32Array directly. +// The multi-vector ColBERT model has its own screen. type TextEmbeddingModel = TextEmbeddingsProps['model']; const MODELS: { label: string; value: TextEmbeddingModel }[] = [ @@ -123,10 +122,9 @@ function TextEmbeddingsScreen() { const embedded = []; for (const sentence of CORPUS) { // forward(_, 'document') auto-applies the model's document prompt - // (a no-op for models without one). - const embedding = toVector( - await model.forward(sentence, 'document') - ); + // (a no-op for models without one). Single-vector models return + // a Float32Array directly. + const embedding = await model.forward(sentence, 'document'); if (cancelled) return; embedded.push({ sentence, embedding }); } @@ -157,7 +155,7 @@ function TextEmbeddingsScreen() { setQuery(queryText); try { const start = Date.now(); - const queryEmbedding = toVector(await model.forward(q, 'query')); + const queryEmbedding = await model.forward(q, 'query'); setEmbeddingTime(Date.now() - start); const ranked = corpusEmbeddings .map(({ sentence, embedding }) => ({ diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts index f411631aac..c2e3a2a21d 100644 --- a/packages/react-native-executorch/src/constants/modelRegistry.ts +++ b/packages/react-native-executorch/src/constants/modelRegistry.ts @@ -294,6 +294,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = { modelSource: M.LFM2_5_COLBERT_350M_MLX_MODEL, tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER, prompts: LFM_COLBERT_PROMPTS, + multiVector: true as const, }, }, xnnpack: { @@ -302,6 +303,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = { modelSource: M.LFM2_5_COLBERT_350M_XNNPACK_MODEL, tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER, prompts: LFM_COLBERT_PROMPTS, + multiVector: true as const, }, }, }; diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts index b4679b4237..2f100b8cbb 100644 --- a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts +++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts @@ -1,9 +1,9 @@ import { TextEmbeddingsModule } from '../../modules/natural_language_processing/TextEmbeddingsModule'; import { useModuleFactory } from '../useModuleFactory'; import { - AnyTextEmbeddingsModel, EmbeddingRole, ForwardFn, + TextEmbeddingsModel, TextEmbeddingsType, TextEmbeddingsProps, } from '../../types/textEmbeddings'; @@ -16,7 +16,7 @@ import { * [numTokens, embeddingDim] result; use `toVector` for a single vector. * Models with prompts require a `role` ('query' | 'document') on `forward`. */ -export const useTextEmbeddings = ({ +export const useTextEmbeddings = ({ model, preventLoad = false, }: TextEmbeddingsProps): TextEmbeddingsType => { diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts index c11b9c9aff..abb620e981 100644 --- a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts +++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts @@ -1,9 +1,9 @@ import { ResourceSource } from '../../types/common'; import { - AnyTextEmbeddingsModel, EmbeddingPrompts, EmbeddingResult, EmbeddingRole, + TextEmbeddingsModel, TextEmbeddingsModelName, } from '../../types/textEmbeddings'; import { ResourceFetcher } from '../../utils/ResourceFetcher'; @@ -13,28 +13,35 @@ import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils'; import { Logger } from '../../common/Logger'; /** - * Module for text embeddings. Returns the raw [numTokens, embeddingDim] output - * for any model — pooled (numTokens === 1) or multi-vector. Scoring / pooling - * is the consumer's concern (see the `toVector` util for the single-vector - * common case). + * Module for text embeddings. `forward` returns a single pooled `Float32Array` + * for standard models, or the per-token `EmbeddingResult` for `multiVector` + * (late-interaction) models. The native runner always produces the raw + * [numTokens, embeddingDim] matrix; the reduction to a single vector happens + * here so the common single-vector API stays `Float32Array`. * @category Typescript API */ export class TextEmbeddingsModule extends BaseModule { private prompts?: EmbeddingPrompts; + private multiVector: boolean; - private constructor(nativeModule: unknown, prompts?: EmbeddingPrompts) { + private constructor( + nativeModule: unknown, + prompts: EmbeddingPrompts | undefined, + multiVector: boolean + ) { super(); this.nativeModule = nativeModule; this.prompts = prompts; + this.multiVector = multiVector; } /** * Creates a text embeddings instance for a built-in model. - * @param namedSources - The model + tokenizer sources. + * @param namedSources - The model config (+ optional prompts / multiVector). * @param onDownloadProgress - Optional download progress callback (0..1). */ static async fromModelName( - namedSources: AnyTextEmbeddingsModel, + namedSources: TextEmbeddingsModel, onDownloadProgress: (progress: number) => void = () => {} ): Promise { try { @@ -49,7 +56,8 @@ export class TextEmbeddingsModule extends BaseModule { } return new TextEmbeddingsModule( await global.loadTextEmbeddings(modelPath, tokenizerPath), - namedSources.prompts + namedSources.prompts, + namedSources.multiVector ?? false ); } catch (error) { Logger.error('Load failed:', error); @@ -78,23 +86,29 @@ export class TextEmbeddingsModule extends BaseModule { } /** - * Embed text. Returns the raw [numTokens, embeddingDim] result. + * Embed text. Standard models return the single pooled `Float32Array`; + * `multiVector` models return the per-token `EmbeddingResult`. * @param input - The text to embed. - * @param role - Optional 'query' | 'document'; prepends the model's prompt - * for that role when configured (no-op otherwise). + * @param role - 'query' | 'document'; prepends the model's prompt for that + * role when configured (no-op otherwise). */ async forward( input: string, role?: EmbeddingRole - ): Promise { + ): Promise { if (this.nativeModule == null) throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded); const prefix = (role && this.prompts?.[role]) || ''; const res = await this.nativeModule.generate(prefix + input); // res.dataPtr is already a Float32Array view over the owned native buffer - // (built at the JSI boundary), so use it directly — no extra copy. + // (built at the JSI boundary). + const vectors = res.dataPtr as Float32Array; + if (!this.multiVector) { + // Pooled models output [1, embeddingDim]; return that single row. + return vectors.subarray(0, res.embeddingDim); + } return { - vectors: res.dataPtr as Float32Array, + vectors, numTokens: res.numTokens, embeddingDim: res.embeddingDim, tokenIds: res.tokenIds, diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts index 47e056794f..c013cb818b 100644 --- a/packages/react-native-executorch/src/types/textEmbeddings.ts +++ b/packages/react-native-executorch/src/types/textEmbeddings.ts @@ -17,17 +17,16 @@ export type TextEmbeddingsModelName = | 'lfm2-5-colbert-350m'; /** - * Raw text embedding output: a [numTokens, embeddingDim] fp32 matrix (row- - * major) plus the input token ids. Single-vector (pooled) models give - * numTokens === 1 — use `toVector` for that common case. Multi-vector (late- - * interaction, e.g. ColBERT) models give the full per-token sequence; scoring - * (e.g. MaxSim) is the consumer's concern. + * Per-token (multi-vector) embedding output for late-interaction models (e.g. + * ColBERT): a [numTokens, embeddingDim] fp32 matrix (row-major) plus the input + * token ids. Standard models return a single pooled `Float32Array` from + * `forward` instead; only `multiVector` models yield this. * @category Types */ export interface EmbeddingResult { /** Flat [numTokens * embeddingDim] fp32 vectors (row-major). */ vectors: Float32Array; - /** Number of token rows (1 for pooled models). */ + /** Number of token rows. */ numTokens: number; /** Per-token vector dimension. */ embeddingDim: number; @@ -54,44 +53,52 @@ export interface EmbeddingPrompts { document: string; } -/** A standard (symmetric) embedding model — `forward(text)`, no role. */ +/** + * A text embeddings model config. Two optional flags drive `forward`: + * - `prompts` present -> `forward` REQUIRES a `role` (auto-prepends the prompt) + * - `multiVector` true -> `forward` returns the per-token `EmbeddingResult`; + * otherwise it returns a single pooled `Float32Array`. + * @category Types + */ export interface TextEmbeddingsModel { modelName: TextEmbeddingsModelName; modelSource: ResourceSource; tokenizerSource: ResourceSource; - prompts?: undefined; + prompts?: EmbeddingPrompts; + multiVector?: boolean; } /** - * An asymmetric model with query/document prompts — `forward(text, role)` with - * role REQUIRED. + * `forward`'s signature, computed from the model config: + * - return type: `EmbeddingResult` if `multiVector`, else `Float32Array`. + * - role arg: required if the model has `prompts`, else absent. */ -export interface PromptedTextEmbeddingsModel { - modelName: TextEmbeddingsModelName; - modelSource: ResourceSource; - tokenizerSource: ResourceSource; - prompts: EmbeddingPrompts; -} - -export type AnyTextEmbeddingsModel = - | TextEmbeddingsModel - | PromptedTextEmbeddingsModel; +export type ForwardReturn = + M extends { multiVector: true } ? EmbeddingResult : Float32Array; /** - * `forward`'s signature, discriminated by the model: prompted models require a - * `role` argument; standard models take none. + * `forward`'s signature, computed from the model config: + * - A model that DEFINITELY has prompts -> `role` is REQUIRED. + * - A model that definitely has NO prompts (`prompts?: undefined`) -> no role. + * - Otherwise (prompts optional / unknown, e.g. a heterogeneous model list) -> + * `role` is OPTIONAL. */ -export type ForwardFn = - M extends PromptedTextEmbeddingsModel - ? (input: string, role: EmbeddingRole) => Promise - : (input: string) => Promise; +export type ForwardFn = M extends { + prompts: EmbeddingPrompts; +} + ? (input: string, role: EmbeddingRole) => Promise> + : undefined extends M['prompts'] + ? M['prompts'] extends undefined + ? (input: string) => Promise> + : (input: string, role?: EmbeddingRole) => Promise> + : (input: string) => Promise>; /** * Props for the useTextEmbeddings hook. * @category Types */ export interface TextEmbeddingsProps< - M extends AnyTextEmbeddingsModel = AnyTextEmbeddingsModel, + M extends TextEmbeddingsModel = TextEmbeddingsModel, > { model: M; preventLoad?: boolean; @@ -102,7 +109,7 @@ export interface TextEmbeddingsProps< * @category Types */ export interface TextEmbeddingsType< - M extends AnyTextEmbeddingsModel = AnyTextEmbeddingsModel, + M extends TextEmbeddingsModel = TextEmbeddingsModel, > { error: null | RnExecutorchError; isReady: boolean; @@ -110,10 +117,9 @@ export interface TextEmbeddingsType< downloadProgress: number; /** - * Embed text into a [numTokens, embeddingDim] result. Pooled models return - * numTokens === 1 (use `toVector`); multi-vector models return the full - * per-token sequence. Models with prompts require a `role` - * ('query' | 'document'); standard models take none. + * Embed text. Standard models return a single pooled `Float32Array`; + * `multiVector` models return the per-token `EmbeddingResult`. Models with + * `prompts` require a `role` ('query' | 'document'). */ forward: ForwardFn; } From e12fb039e08a018784d9341dc6d4ac37b88bdd3d Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 22 Jun 2026 15:07:12 +0200 Subject: [PATCH 4/7] refactor: move skiplist to model config, MaxSim scoring to app Co-Authored-By: Claude Opus 4.6 (1M context) --- apps/text-embeddings/app/colbert/index.tsx | 15 +++---- apps/text-embeddings/utils/math.ts | 34 +++++++++++++++ .../src/constants/modelRegistry.ts | 10 +++++ .../src/types/textEmbeddings.ts | 7 ++++ .../src/utils/textEmbeddings.ts | 42 ------------------- 5 files changed, 56 insertions(+), 52 deletions(-) diff --git a/apps/text-embeddings/app/colbert/index.tsx b/apps/text-embeddings/app/colbert/index.tsx index d686168f43..5136aad9f1 100644 --- a/apps/text-embeddings/app/colbert/index.tsx +++ b/apps/text-embeddings/app/colbert/index.tsx @@ -15,23 +15,18 @@ import { useIsFocused } from 'expo-router'; import { models, useTextEmbeddings, - maxSim, EmbeddingResult, } from 'react-native-executorch'; import ColorPalette from '../../colors'; import ErrorBanner from '../../components/ErrorBanner'; +import { maxSim } from '../../utils/math'; const colbertModel = models.text_embedding.lfm2_5_colbert_350m(); -// The library auto-applies the model's [Q]/[D] prompts via forward(text, role). -// Late-interaction MaxSim is a shipped util; the document skiplist (punctuation -// token ids excluded from scoring) is the consumer's choice — these are the -// LFM2.5-ColBERT skiplist ids. -const SKIPLIST = [ - 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, - 535, 536, 537, 538, 539, 540, 541, 568, 569, 570, 571, 572, 573, 600, 601, - 602, 603, -]; +// The library auto-applies the model's [Q]/[D] prompts via forward(text, role) +// and ships the document skiplist on the model config; we just pass it to the +// shipped MaxSim util. +const SKIPLIST = colbertModel.skiplistIds ?? []; const CORPUS: string[] = [ 'The forecast says heavy showers this afternoon.', diff --git a/apps/text-embeddings/utils/math.ts b/apps/text-embeddings/utils/math.ts index 50c70d1f92..997d3f46fb 100644 --- a/apps/text-embeddings/utils/math.ts +++ b/apps/text-embeddings/utils/math.ts @@ -1,6 +1,7 @@ import { RnExecutorchError, RnExecutorchErrorCode, + EmbeddingResult, } from 'react-native-executorch'; export const dotProduct = (a: Float32Array, b: Float32Array) => { @@ -17,3 +18,36 @@ export const dotProduct = (a: Float32Array, b: Float32Array) => { } return sum; }; + +/** + * ColBERT late-interaction score between a query and a document encoding: + * score = Σ_q max_d ( q · d ) + * For each query token, the max dot over non-skiplist doc tokens, summed. + * Per-token vectors are L2-normalized by the graph, so dot == cosine. Scoring + * is the consumer's concern (the library just yields the per-token vectors), + * so this lives in the app alongside dotProduct. + */ +export const maxSim = ( + query: EmbeddingResult, + doc: EmbeddingResult, + skiplistIds: number[] = [] +) => { + const dim = query.embeddingDim; + const skip = new Set(skiplistIds); + let score = 0; + for (let qi = 0; qi < query.numTokens; qi++) { + const qOff = qi * dim; + let best = -Infinity; + for (let di = 0; di < doc.numTokens; di++) { + if (skip.has(doc.tokenIds[di]!)) continue; + const dOff = di * dim; + let dot = 0; + for (let k = 0; k < dim; k++) { + dot += (query.vectors[qOff + k] ?? 0) * (doc.vectors[dOff + k] ?? 0); + } + if (dot > best) best = dot; + } + if (best !== -Infinity) score += best; + } + return score; +}; diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts index c2e3a2a21d..f57c178b5e 100644 --- a/packages/react-native-executorch/src/constants/modelRegistry.ts +++ b/packages/react-native-executorch/src/constants/modelRegistry.ts @@ -287,6 +287,14 @@ const LFM2_5_EMBEDDING_350M_VARIANTS = { // LFM2.5-ColBERT is a plain text-embedding model from the library's POV: it // returns per-token vectors. Late-interaction scoring (MaxSim / skiplist) is // the consumer's concern; the library only auto-applies the role prompts. +// Document punctuation token ids excluded from MaxSim (ColBERT skiplist), +// derived from the model's config_sentence_transformers.json skiplist_words. +const LFM_COLBERT_SKIPLIST = [ + 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, + 535, 536, 537, 538, 539, 540, 541, 568, 569, 570, 571, 572, 573, 600, 601, + 602, 603, +]; + const LFM2_5_COLBERT_350M_VARIANTS = { mlx: { base: { @@ -295,6 +303,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = { tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER, prompts: LFM_COLBERT_PROMPTS, multiVector: true as const, + skiplistIds: LFM_COLBERT_SKIPLIST, }, }, xnnpack: { @@ -304,6 +313,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = { tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER, prompts: LFM_COLBERT_PROMPTS, multiVector: true as const, + skiplistIds: LFM_COLBERT_SKIPLIST, }, }, }; diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts index c013cb818b..2f42d71e9d 100644 --- a/packages/react-native-executorch/src/types/textEmbeddings.ts +++ b/packages/react-native-executorch/src/types/textEmbeddings.ts @@ -66,6 +66,13 @@ export interface TextEmbeddingsModel { tokenizerSource: ResourceSource; prompts?: EmbeddingPrompts; multiVector?: boolean; + /** + * Document token ids to exclude from late-interaction scoring (e.g. ColBERT's + * punctuation skiplist). Derived from the model's training config, so it's + * shipped here rather than reconstructed by the consumer, who passes it to + * their own MaxSim scoring. + */ + skiplistIds?: number[]; } /** diff --git a/packages/react-native-executorch/src/utils/textEmbeddings.ts b/packages/react-native-executorch/src/utils/textEmbeddings.ts index da10d9aa08..e9be7cf774 100644 --- a/packages/react-native-executorch/src/utils/textEmbeddings.ts +++ b/packages/react-native-executorch/src/utils/textEmbeddings.ts @@ -35,45 +35,3 @@ export function getTokenVectors(result: EmbeddingResult): Float32Array[] { } return rows; } - -/** - * Late-interaction MaxSim score between a query and a document encoding: - * - * score = Σ_q max_d ( q · d ) - * - * For each query token, takes the max dot product over all (non-skiplist) - * document tokens, then sums across query tokens. Per-token vectors are - * L2-normalized by the graph, so a dot product is a cosine. - * - * `skiplistIds` (e.g. punctuation token ids) are excluded from the document - * side, matching ColBERT's document skiplist. Pass `[]` to score every token. - * - * @category Utils - */ -export function maxSim( - query: EmbeddingResult, - doc: EmbeddingResult, - skiplistIds: number[] = [] -): number { - const dim = query.embeddingDim; - const q = query.vectors; - const d = doc.vectors; - const skip = new Set(skiplistIds); - - let score = 0; - for (let qi = 0; qi < query.numTokens; qi++) { - const qOff = qi * dim; - let best = -Infinity; - for (let di = 0; di < doc.numTokens; di++) { - if (skip.has(doc.tokenIds[di]!)) continue; - const dOff = di * dim; - let dot = 0; - for (let k = 0; k < dim; k++) { - dot += (q[qOff + k] ?? 0) * (d[dOff + k] ?? 0); - } - if (dot > best) best = dot; - } - if (best !== -Infinity) score += best; - } - return score; -} From d551b5f7adc5ed27b9e4bb06489c54352c0cd11f Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 22 Jun 2026 16:33:01 +0200 Subject: [PATCH 5/7] refactor(example): merge ColBERT search into text embeddings screen Co-Authored-By: Claude Opus 4.6 (1M context) --- apps/text-embeddings/app/_layout.tsx | 8 - apps/text-embeddings/app/colbert/index.tsx | 284 ------------------ .../app/text-embeddings/index.tsx | 43 ++- 3 files changed, 33 insertions(+), 302 deletions(-) delete mode 100644 apps/text-embeddings/app/colbert/index.tsx diff --git a/apps/text-embeddings/app/_layout.tsx b/apps/text-embeddings/app/_layout.tsx index 57acb26eb2..bb8e1deeb8 100644 --- a/apps/text-embeddings/app/_layout.tsx +++ b/apps/text-embeddings/app/_layout.tsx @@ -109,14 +109,6 @@ export default function _layout() { headerTitleStyle: { color: ColorPalette.primary }, }} /> - ); diff --git a/apps/text-embeddings/app/colbert/index.tsx b/apps/text-embeddings/app/colbert/index.tsx deleted file mode 100644 index 5136aad9f1..0000000000 --- a/apps/text-embeddings/app/colbert/index.tsx +++ /dev/null @@ -1,284 +0,0 @@ -import { useEffect, useState } from 'react'; -import { - StyleSheet, - Text, - TextInput, - TouchableOpacity, - View, - SafeAreaView, - ScrollView, - KeyboardAvoidingView, - Platform, -} from 'react-native'; -import { Ionicons } from '@expo/vector-icons'; -import { useIsFocused } from 'expo-router'; -import { - models, - useTextEmbeddings, - EmbeddingResult, -} from 'react-native-executorch'; -import ColorPalette from '../../colors'; -import ErrorBanner from '../../components/ErrorBanner'; -import { maxSim } from '../../utils/math'; - -const colbertModel = models.text_embedding.lfm2_5_colbert_350m(); - -// The library auto-applies the model's [Q]/[D] prompts via forward(text, role) -// and ships the document skiplist on the model config; we just pass it to the -// shipped MaxSim util. -const SKIPLIST = colbertModel.skiplistIds ?? []; - -const CORPUS: string[] = [ - 'The forecast says heavy showers this afternoon.', - "It's so sunny outside today!", - 'The home team scored in the final minute to win the match.', - 'Fans packed the stadium for the championship game.', - 'Simmer the tomatoes with garlic before adding the pasta.', - 'He whisked the eggs and folded in the melted chocolate.', - 'The new phone has a faster chip and a brighter screen.', - 'The flight to Tokyo was delayed by three hours.', - 'We hiked along the coast and camped near the cliffs.', -]; - -const EXAMPLE_QUERIES: string[] = [ - "What's the weather like?", - 'Who won the match?', - 'How do I cook dinner?', - 'Tell me about the latest technology', -]; - -type Ranked = { sentence: string; score: number }; - -export default function ColbertScreenWrapper() { - return useIsFocused() ? : null; -} - -function ColbertScreen() { - const model = useTextEmbeddings({ model: colbertModel }); - const [error, setError] = useState(null); - const [query, setQuery] = useState(''); - const [docEncs, setDocEncs] = useState< - { sentence: string; enc: EmbeddingResult }[] - >([]); - const [results, setResults] = useState([]); - const [indexing, setIndexing] = useState(false); - const [encodeTime, setEncodeTime] = useState(null); - - useEffect( - () => { - let cancelled = false; - const indexCorpus = async () => { - if (!model.isReady) return; - setIndexing(true); - setResults([]); - try { - const encs = []; - for (const sentence of CORPUS) { - const enc = await model.forward(sentence, 'document'); - if (cancelled) return; - encs.push({ sentence, enc }); - } - setDocEncs(encs); - } catch (e) { - setError(e instanceof Error ? e.message : String(e)); - } finally { - if (!cancelled) setIndexing(false); - } - }; - indexCorpus(); - return () => { - cancelled = true; - }; - }, - // eslint-disable-next-line react-hooks/exhaustive-deps - [model.isReady] - ); - - const runSearch = async (queryText: string = query) => { - const q = queryText.trim(); - if (!model.isReady || !q || docEncs.length === 0) return; - setQuery(queryText); - try { - const start = Date.now(); - const qEnc = await model.forward(q, 'query'); - setEncodeTime(Date.now() - start); - const ranked = docEncs - .map(({ sentence, enc }) => ({ - sentence, - score: maxSim(qEnc, enc, SKIPLIST), - })) - .sort((a, b) => b.score - a.score); - setResults(ranked); - } catch (e) { - setError(e instanceof Error ? e.message : String(e)); - } - }; - - const ready = model.isReady && !indexing && docEncs.length > 0; - const canSearch = ready && !!query.trim(); - - const statusText = model.error - ? `Error: ${model.error}` - : !model.isReady - ? `Loading model ${(model.downloadProgress * 100).toFixed(0)}%` - : indexing - ? 'Indexing corpus…' - : 'Ready'; - - return ( - - - - ColBERT Late-Interaction Search - {statusText} - setError(null)} /> - - - - Search the corpus ({CORPUS.length} sentences) - - - Per-token vectors scored with MaxSim. Tap an example or type a - query. - - - {EXAMPLE_QUERIES.map((q) => ( - runSearch(q)} - > - {q} - - ))} - - runSearch()} - returnKeyType="search" - /> - runSearch()} - style={[styles.button, !canSearch && styles.buttonDisabled]} - disabled={!canSearch} - > - - - {indexing ? 'Indexing…' : 'Search'} - - - {encodeTime !== null && ( - Query encoded in {encodeTime} ms - )} - - - {results.length > 0 && ( - - Results - {results.map((r, i) => ( - - - {r.sentence} - {r.score.toFixed(2)} - - - 0 ? r.score / results[0].score : 0) * 100 - )}%`, - }, - i === 0 && styles.barFillTop, - ]} - /> - - - ))} - - )} - - - - ); -} - -const styles = StyleSheet.create({ - container: { flex: 1, backgroundColor: '#F8FAFC' }, - flex: { flex: 1 }, - scroll: { padding: 20 }, - heading: { fontSize: 22, fontWeight: '500', marginBottom: 8, color: '#0F172A' }, - status: { fontSize: 14, color: '#64748B', marginBottom: 12 }, - card: { - backgroundColor: '#fff', - padding: 16, - borderRadius: 16, - borderColor: '#E2E8F0', - borderWidth: 2, - marginBottom: 20, - }, - sectionTitle: { fontSize: 16, fontWeight: '500', marginBottom: 8, color: '#1E293B' }, - hint: { fontSize: 13, color: '#64748B', marginBottom: 12, lineHeight: 18 }, - chipRow: { flexDirection: 'row', flexWrap: 'wrap', gap: 8, marginBottom: 12 }, - chip: { - backgroundColor: '#EEF2FF', - borderColor: '#C7D2FE', - borderWidth: 1, - borderRadius: 16, - paddingHorizontal: 12, - paddingVertical: 6, - }, - chipDisabled: { opacity: 0.4 }, - chipText: { fontSize: 13, color: 'navy' }, - input: { - backgroundColor: '#F1F5F9', - borderRadius: 10, - padding: 10, - marginBottom: 10, - fontSize: 16, - color: '#0F172A', - minHeight: 40, - }, - button: { - backgroundColor: 'navy', - borderRadius: 10, - paddingVertical: 12, - flexDirection: 'row', - alignItems: 'center', - justifyContent: 'center', - }, - buttonDisabled: { backgroundColor: '#f0f0f0' }, - buttonText: { color: '#fff', fontWeight: '500', marginLeft: 6 }, - buttonTextDisabled: { color: 'gray' }, - stats: { fontSize: 13, color: '#64748B', marginTop: 8, textAlign: 'center' }, - resultRow: { marginBottom: 14 }, - resultHeader: { - flexDirection: 'row', - justifyContent: 'space-between', - marginBottom: 6, - gap: 8, - }, - resultText: { flex: 1, fontSize: 14, color: '#334155' }, - resultScore: { - fontSize: 14, - fontWeight: '600', - color: '#0F172A', - fontVariant: ['tabular-nums'], - }, - barTrack: { height: 8, borderRadius: 4, backgroundColor: '#E2E8F0', overflow: 'hidden' }, - barFill: { height: '100%', borderRadius: 4, backgroundColor: '#94A3B8' }, - barFillTop: { backgroundColor: 'navy' }, -}); diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx index 8cb6777843..c2e3d14e29 100644 --- a/apps/text-embeddings/app/text-embeddings/index.tsx +++ b/apps/text-embeddings/app/text-embeddings/index.tsx @@ -16,12 +16,15 @@ import { models, useTextEmbeddings, TextEmbeddingsProps, + EmbeddingResult, } from 'react-native-executorch'; const textEmbedding = models.text_embedding; -// Single-vector (pooled) models: forward() returns a Float32Array directly. -// The multi-vector ColBERT model has its own screen. +// forward() returns a Float32Array for pooled (single-vector) models and an +// EmbeddingResult for multi-vector (late-interaction) models. We store the raw +// return for the whole corpus and pick the scorer per model below. type TextEmbeddingModel = TextEmbeddingsProps['model']; +type Encoding = Float32Array | EmbeddingResult; const MODELS: { label: string; value: TextEmbeddingModel }[] = [ { label: 'MiniLM L6', value: textEmbedding.all_minilm_l6_v2() }, @@ -53,6 +56,10 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [ label: 'LFM2.5 Embedding MLX', value: textEmbedding.lfm2_5_embedding_350m({ backend: 'mlx' }), }, + { + label: 'LFM2.5 ColBERT (late-interaction)', + value: textEmbedding.lfm2_5_colbert_350m(), + }, ]; // A multi-topic corpus so semantic ranking is visible: a weather query should @@ -83,7 +90,7 @@ const EXAMPLE_QUERIES: string[] = [ 'Where did they travel?', ]; import { useIsFocused } from 'expo-router'; -import { dotProduct } from '../../utils/math'; +import { dotProduct, maxSim } from '../../utils/math'; import ErrorBanner from '../../components/ErrorBanner'; export default function TextEmbeddingsScreenWrapper() { @@ -101,9 +108,15 @@ function TextEmbeddingsScreen() { const model = useTextEmbeddings({ model: selectedModel }); const [error, setError] = useState(null); + // ColBERT-style models score per-token vectors with MaxSim and exclude + // punctuation tokens; pooled models score the single vector with a dot + // product. Both are driven off the selected model's config. + const isMultiVector = !!selectedModel.multiVector; + const skiplistIds = selectedModel.skiplistIds ?? []; + const [query, setQuery] = useState(''); const [corpusEmbeddings, setCorpusEmbeddings] = useState< - { sentence: string; embedding: Float32Array }[] + { sentence: string; embedding: Encoding }[] >([]); const [results, setResults] = useState([]); const [embeddingTime, setEmbeddingTime] = useState(null); @@ -122,8 +135,8 @@ function TextEmbeddingsScreen() { const embedded = []; for (const sentence of CORPUS) { // forward(_, 'document') auto-applies the model's document prompt - // (a no-op for models without one). Single-vector models return - // a Float32Array directly. + // (a no-op for models without one). Pooled models return a + // Float32Array, multi-vector models an EmbeddingResult. const embedding = await model.forward(sentence, 'document'); if (cancelled) return; embedded.push({ sentence, embedding }); @@ -155,12 +168,21 @@ function TextEmbeddingsScreen() { setQuery(queryText); try { const start = Date.now(); - const queryEmbedding = await model.forward(q, 'query'); + const queryEmbedding = (await model.forward(q, 'query')) as Encoding; setEmbeddingTime(Date.now() - start); const ranked = corpusEmbeddings .map(({ sentence, embedding }) => ({ sentence, - similarity: dotProduct(queryEmbedding, embedding), + similarity: isMultiVector + ? maxSim( + queryEmbedding as EmbeddingResult, + embedding as EmbeddingResult, + skiplistIds + ) + : dotProduct( + queryEmbedding as Float32Array, + embedding as Float32Array + ), })) .sort((a, b) => b.similarity - a.similarity); setResults(ranked); @@ -210,8 +232,9 @@ function TextEmbeddingsScreen() { Search the corpus ({CORPUS.length} sentences) - Ranks every sentence by meaning. Ask a full question — tap an - example or type your own. + {isMultiVector + ? 'Ranks per-token vectors with MaxSim (late interaction). Ask a full question — tap an example or type your own.' + : 'Ranks every sentence by meaning. Ask a full question — tap an example or type your own.'} {EXAMPLE_QUERIES.map((q) => ( From b91153064ca041e99295b9b0d6511979e8716ba3 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 22 Jun 2026 17:19:07 +0200 Subject: [PATCH 6/7] refactor: drop empty BaseEmbeddings layer, rename skipList, trim comments Co-Authored-By: Claude Opus 4.6 (1M context) --- .../app/text-embeddings/index.tsx | 41 ++++--------------- apps/text-embeddings/utils/math.ts | 12 +----- .../common/rnexecutorch/TokenizerModule.h | 6 --- .../host_objects/JsiConversions.h | 13 ++---- .../models/embeddings/BaseEmbeddings.cpp | 9 ---- .../models/embeddings/BaseEmbeddings.h | 13 ------ .../models/embeddings/text/TextEmbeddings.cpp | 2 +- .../models/embeddings/text/TextEmbeddings.h | 4 +- .../models/text_to_image/Encoder.cpp | 2 - .../common/rnexecutorch/tests/CMakeLists.txt | 3 -- .../src/constants/modelRegistry.ts | 14 ++----- .../src/constants/modelUrls.ts | 11 +---- .../src/types/textEmbeddings.ts | 11 +++-- 13 files changed, 29 insertions(+), 112 deletions(-) delete mode 100644 packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp delete mode 100644 packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx index c2e3d14e29..2c62a22922 100644 --- a/apps/text-embeddings/app/text-embeddings/index.tsx +++ b/apps/text-embeddings/app/text-embeddings/index.tsx @@ -5,7 +5,6 @@ import { TextInput, TouchableOpacity, View, - SafeAreaView, ScrollView, KeyboardAvoidingView, Platform, @@ -18,11 +17,13 @@ import { TextEmbeddingsProps, EmbeddingResult, } from 'react-native-executorch'; +import { useIsFocused } from 'expo-router'; +import { dotProduct, maxSim } from '../../utils/math'; +import ErrorBanner from '../../components/ErrorBanner'; +import { SafeAreaView } from 'react-native-safe-area-context'; + const textEmbedding = models.text_embedding; -// forward() returns a Float32Array for pooled (single-vector) models and an -// EmbeddingResult for multi-vector (late-interaction) models. We store the raw -// return for the whole corpus and pick the scorer per model below. type TextEmbeddingModel = TextEmbeddingsProps['model']; type Encoding = Float32Array | EmbeddingResult; @@ -62,9 +63,6 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [ }, ]; -// A multi-topic corpus so semantic ranking is visible: a weather query should -// float the weather lines to the top and push sports/cooking/tech down, even -// with no shared keywords. const CORPUS: string[] = [ 'The forecast says heavy showers this afternoon.', "It's so sunny outside today!", @@ -80,8 +78,6 @@ const CORPUS: string[] = [ 'We hiked along the coast and camped near the cliffs.', ]; -// Tap-to-run example queries. Natural-language questions — how these models -// are trained to be queried — give the cleanest separation. const EXAMPLE_QUERIES: string[] = [ "What's the weather like?", 'Who won the match?', @@ -89,9 +85,6 @@ const EXAMPLE_QUERIES: string[] = [ 'How do I cook dinner?', 'Where did they travel?', ]; -import { useIsFocused } from 'expo-router'; -import { dotProduct, maxSim } from '../../utils/math'; -import ErrorBanner from '../../components/ErrorBanner'; export default function TextEmbeddingsScreenWrapper() { const isFocused = useIsFocused(); @@ -108,11 +101,8 @@ function TextEmbeddingsScreen() { const model = useTextEmbeddings({ model: selectedModel }); const [error, setError] = useState(null); - // ColBERT-style models score per-token vectors with MaxSim and exclude - // punctuation tokens; pooled models score the single vector with a dot - // product. Both are driven off the selected model's config. const isMultiVector = !!selectedModel.multiVector; - const skiplistIds = selectedModel.skiplistIds ?? []; + const skipListIds = selectedModel.skipListIds ?? []; const [query, setQuery] = useState(''); const [corpusEmbeddings, setCorpusEmbeddings] = useState< @@ -122,8 +112,6 @@ function TextEmbeddingsScreen() { const [embeddingTime, setEmbeddingTime] = useState(null); const [indexing, setIndexing] = useState(false); - // Embed the whole corpus once the model is ready (re-runs on model change so - // prefixes / weights match the active model). useEffect( () => { let cancelled = false; @@ -134,17 +122,11 @@ function TextEmbeddingsScreen() { try { const embedded = []; for (const sentence of CORPUS) { - // forward(_, 'document') auto-applies the model's document prompt - // (a no-op for models without one). Pooled models return a - // Float32Array, multi-vector models an EmbeddingResult. const embedding = await model.forward(sentence, 'document'); if (cancelled) return; embedded.push({ sentence, embedding }); } setCorpusEmbeddings(embedded); - } catch { - // A transient "Model not loaded" can fire while the hook swaps - // models; the effect re-runs once the new model is ready. } finally { if (!cancelled) setIndexing(false); } @@ -154,10 +136,7 @@ function TextEmbeddingsScreen() { cancelled = true; }; }, - // Re-index when the model becomes ready OR the selected model changes, so - // the corpus is embedded by the active model. The "Model not loaded" race - // is handled by the isReady gate plus clearing the corpus on switch; - // switching sets isReady false→true so the re-run sees the new model. + // eslint-disable-next-line react-hooks/exhaustive-deps [model.isReady, selectedModel] ); @@ -177,7 +156,7 @@ function TextEmbeddingsScreen() { ? maxSim( queryEmbedding as EmbeddingResult, embedding as EmbeddingResult, - skiplistIds + skipListIds ) : dotProduct( queryEmbedding as Float32Array, @@ -201,8 +180,6 @@ function TextEmbeddingsScreen() { return model.isGenerating ? 'Generating...' : 'Model is ready'; }; - // Chips/examples just need a ready, indexed model; the Search button also - // needs a non-empty typed query. const ready = model.isReady && !indexing && corpusEmbeddings.length > 0; const canSearch = ready && !!query.trim(); @@ -306,8 +283,6 @@ function TextEmbeddingsScreen() { ); } -// One ranked result with a similarity bar. The bar is scaled relative to the -// top hit so the ranking is visually obvious; the raw cosine is shown too. function ResultRow({ sentence, similarity, diff --git a/apps/text-embeddings/utils/math.ts b/apps/text-embeddings/utils/math.ts index 997d3f46fb..44248e1658 100644 --- a/apps/text-embeddings/utils/math.ts +++ b/apps/text-embeddings/utils/math.ts @@ -19,21 +19,13 @@ export const dotProduct = (a: Float32Array, b: Float32Array) => { return sum; }; -/** - * ColBERT late-interaction score between a query and a document encoding: - * score = Σ_q max_d ( q · d ) - * For each query token, the max dot over non-skiplist doc tokens, summed. - * Per-token vectors are L2-normalized by the graph, so dot == cosine. Scoring - * is the consumer's concern (the library just yields the per-token vectors), - * so this lives in the app alongside dotProduct. - */ export const maxSim = ( query: EmbeddingResult, doc: EmbeddingResult, - skiplistIds: number[] = [] + skipListIds: number[] = [] ) => { const dim = query.embeddingDim; - const skip = new Set(skiplistIds); + const skip = new Set(skipListIds); let score = 0; for (let qi = 0; qi < query.numTokens; qi++) { const qOff = qi * dim; diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h index 09877dfc65..0e1356f121 100644 --- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h +++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h @@ -13,10 +13,6 @@ class TokenizerModule { std::shared_ptr callInvoker); [[nodiscard("Registered non-void function")]] std::vector encode(std::string s) const; - // Like encode, but applies the tokenizer.json post_processor (e.g. - // TemplateProcessing that prepends BOS). Needed by models whose pooling - // depends on the BOS/CLS token (e.g. CLS-pooled text embeddings). Not JS- - // bound; encode() keeps its single-arg signature for the JS API. [[nodiscard("Registered non-void function")]] std::vector encodeWithSpecialTokens(std::string s) const; [[nodiscard("Registered non-void function")]] std::string @@ -30,8 +26,6 @@ class TokenizerModule { std::size_t getMemoryLowerBound() const noexcept; private: - // Shared encode implementation. bos/eos act as an add-special-tokens flag - // (not a literal count) when the tokenizer.json defines a post_processor. std::vector encodeImpl(const std::string &s, int8_t bos, int8_t eos) const; diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h index 8e211f0028..fdc87cd9af 100644 --- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h +++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h @@ -708,20 +708,15 @@ getJsiValue(const models::style_transfer::PixelDataResult &result, return obj; } -// Text embedding output: a [numTokens, embeddingDim] fp32 matrix + input token -// ids. Pooled models give numTokens == 1; multi-vector give the full sequence. -// The TS layer reduces to a single vector or keeps the matrix per model config. -inline jsi::Value -getJsiValue(const models::embeddings::EmbeddingResult &result, - jsi::Runtime &runtime) { +inline jsi::Value getJsiValue(const models::embeddings::EmbeddingResult &result, + jsi::Runtime &runtime) { jsi::Object obj(runtime); auto arrayBuffer = jsi::ArrayBuffer(runtime, result.dataPtr); auto float32ArrayCtor = runtime.global().getPropertyAsFunction(runtime, "Float32Array"); - auto float32Array = - float32ArrayCtor.callAsConstructor(runtime, arrayBuffer) - .getObject(runtime); + auto float32Array = float32ArrayCtor.callAsConstructor(runtime, arrayBuffer) + .getObject(runtime); obj.setProperty(runtime, "dataPtr", float32Array); obj.setProperty(runtime, "numTokens", jsi::Value(result.numTokens)); diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp deleted file mode 100644 index e777be6704..0000000000 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include "BaseEmbeddings.h" - -namespace rnexecutorch::models::embeddings { - -BaseEmbeddings::BaseEmbeddings(const std::string &modelSource, - std::shared_ptr callInvoker) - : BaseModel(modelSource, callInvoker) {} - -} // namespace rnexecutorch::models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h deleted file mode 100644 index 4b37a3fe93..0000000000 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include - -namespace rnexecutorch::models::embeddings { - -class BaseEmbeddings : public BaseModel { -public: - BaseEmbeddings(const std::string &modelSource, - std::shared_ptr callInvoker); -}; - -}; // namespace rnexecutorch::models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp index 26f3157690..d80c4fb4fe 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp @@ -11,7 +11,7 @@ using namespace executorch::extension; TextEmbeddings::TextEmbeddings(const std::string &modelSource, const std::string &tokenizerSource, std::shared_ptr callInvoker) - : BaseEmbeddings(modelSource, callInvoker), + : BaseModel(modelSource, callInvoker), tokenizer( std::make_unique(tokenizerSource, callInvoker)) {} diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h index cb6059b96e..da51e4d26e 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h @@ -3,7 +3,7 @@ #include "rnexecutorch/metaprogramming/ConstructorHelpers.h" #include #include -#include +#include #include namespace rnexecutorch { @@ -14,7 +14,7 @@ struct TokenIdsWithAttentionMask { std::vector attentionMask; }; -class TextEmbeddings final : public BaseEmbeddings { +class TextEmbeddings final : public BaseModel { public: TextEmbeddings(const std::string &modelSource, const std::string &tokenizerSource, diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp index 6abbccb9c6..3bf5fa2206 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp @@ -16,8 +16,6 @@ Encoder::Encoder(const std::string &tokenizerSource, encoderSource, tokenizerSource, callInvoker)) {} std::vector Encoder::generate(std::string input) { - // TextEmbeddings returns the raw [numTokens, embeddingDim] matrix; this - // encoder pools/uses the flat fp32 buffer directly (dataPtr). std::shared_ptr embeddingsText = encoder->generate(input).dataPtr; std::shared_ptr embeddingsUncond = diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt index 5f9d7287a5..a901cd56fc 100644 --- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt +++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt @@ -218,7 +218,6 @@ add_rn_test(ObjectDetectionTests integration/ObjectDetectionTest.cpp add_rn_test(ImageEmbeddingsTests integration/ImageEmbeddingsTest.cpp SOURCES ${RNEXECUTORCH_DIR}/models/embeddings/image/ImageEmbeddings.cpp - ${RNEXECUTORCH_DIR}/models/embeddings/BaseEmbeddings.cpp ${RNEXECUTORCH_DIR}/models/VisionModel.cpp ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp @@ -230,7 +229,6 @@ add_rn_test(ImageEmbeddingsTests integration/ImageEmbeddingsTest.cpp add_rn_test(TextEmbeddingsTests integration/TextEmbeddingsTest.cpp SOURCES ${RNEXECUTORCH_DIR}/models/embeddings/text/TextEmbeddings.cpp - ${RNEXECUTORCH_DIR}/models/embeddings/BaseEmbeddings.cpp ${TOKENIZER_SOURCES} LIBS tokenizers_deps ) @@ -306,7 +304,6 @@ add_rn_test(TextToImageTests integration/TextToImageTest.cpp ${RNEXECUTORCH_DIR}/models/text_to_image/Decoder.cpp ${RNEXECUTORCH_DIR}/models/text_to_image/Scheduler.cpp ${RNEXECUTORCH_DIR}/models/embeddings/text/TextEmbeddings.cpp - ${RNEXECUTORCH_DIR}/models/embeddings/BaseEmbeddings.cpp ${TOKENIZER_SOURCES} LIBS tokenizers_deps ) diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts index f57c178b5e..4c36c6a1fa 100644 --- a/packages/react-native-executorch/src/constants/modelRegistry.ts +++ b/packages/react-native-executorch/src/constants/modelRegistry.ts @@ -284,12 +284,7 @@ const LFM2_5_EMBEDDING_350M_VARIANTS = { }, }; -// LFM2.5-ColBERT is a plain text-embedding model from the library's POV: it -// returns per-token vectors. Late-interaction scoring (MaxSim / skiplist) is -// the consumer's concern; the library only auto-applies the role prompts. -// Document punctuation token ids excluded from MaxSim (ColBERT skiplist), -// derived from the model's config_sentence_transformers.json skiplist_words. -const LFM_COLBERT_SKIPLIST = [ +const LFM_COLBERT_SKIP_LIST = [ 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 535, 536, 537, 538, 539, 540, 541, 568, 569, 570, 571, 572, 573, 600, 601, 602, 603, @@ -303,7 +298,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = { tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER, prompts: LFM_COLBERT_PROMPTS, multiVector: true as const, - skiplistIds: LFM_COLBERT_SKIPLIST, + skipListIds: LFM_COLBERT_SKIP_LIST, }, }, xnnpack: { @@ -313,7 +308,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = { tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER, prompts: LFM_COLBERT_PROMPTS, multiVector: true as const, - skiplistIds: LFM_COLBERT_SKIPLIST, + skipListIds: LFM_COLBERT_SKIP_LIST, }, }, }; @@ -804,9 +799,6 @@ export const models = { ios: 'mlx', android: 'xnnpack', }), - // ColBERT (late-interaction): forward() returns per-token vectors. Scoring - // (markers / MaxSim / skiplist) is the consumer's concern — see the - // colbert example screen for a reference implementation. lfm2_5_colbert_350m: variant(LFM2_5_COLBERT_350M_VARIANTS, { ios: 'mlx', android: 'xnnpack', diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 8fdebb1a6d..bd6cddf4a3 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -1195,21 +1195,14 @@ export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL = `${URL_PREFIX}-d export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${PREVIOUS_VERSION_TAG}/tokenizer.json`; const PARAPHRASE_MULTILINGUAL_MINILM_L12_V2_QUANTIZED_MODEL = `${URL_PREFIX}-paraphrase-multilingual-MiniLM-L12-v2/${PREVIOUS_VERSION_TAG}/xnnpack/paraphrase_multilingual_minilm_l12_v2_xnnpack_8da4w.pte`; const PARAPHRASE_MULTILINGUAL_MINILM_L12_V2_TOKENIZER = `${URL_PREFIX}-paraphrase-multilingual-MiniLM-L12-v2/${PREVIOUS_VERSION_TAG}/tokenizer.json`; -const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`; -const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/tokenizer.json`; -// LFM2.5-Embedding-350M: XNNPACK 8da4w (Android/CPU), MLX int4 bf16 (iOS GPU, -// physical device only). The exported graph bakes in CLS pooling + L2 norm. -// Requires the runner to add the BOS special token (CLS-pooled at index 0). export const LFM2_5_EMBEDDING_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/xnnpack/lfm_2_5_embedding_350m_xnnpack_8da4w.pte`; export const LFM2_5_EMBEDDING_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/mlx/lfm_2_5_embedding_350m_mlx_int4.pte`; export const LFM2_5_EMBEDDING_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/tokenizer.json`; -// LFM2.5-ColBERT-350M: late-interaction multi-vector retriever (per-token -// [S,128]). Same bidirectional backbone as the embedding model + a Linear -// 1024->128 head. forward() returns per-token vectors; late-interaction -// scoring (MaxSim) is the consumer's concern (see the colbert example). export const LFM2_5_COLBERT_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/xnnpack/lfm_2_5_colbert_350m_xnnpack_8da4w.pte`; export const LFM2_5_COLBERT_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/mlx/lfm_2_5_colbert_350m_mlx_int4.pte`; export const LFM2_5_COLBERT_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/tokenizer.json`; +const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`; +const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/tokenizer.json`; /** * @category Models - Text Embeddings diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts index 2f42d71e9d..1b056a1f7b 100644 --- a/packages/react-native-executorch/src/types/textEmbeddings.ts +++ b/packages/react-native-executorch/src/types/textEmbeddings.ts @@ -68,11 +68,11 @@ export interface TextEmbeddingsModel { multiVector?: boolean; /** * Document token ids to exclude from late-interaction scoring (e.g. ColBERT's - * punctuation skiplist). Derived from the model's training config, so it's + * punctuation skipList). Derived from the model's training config, so it's * shipped here rather than reconstructed by the consumer, who passes it to * their own MaxSim scoring. */ - skiplistIds?: number[]; + skipListIds?: number[]; } /** @@ -80,8 +80,11 @@ export interface TextEmbeddingsModel { * - return type: `EmbeddingResult` if `multiVector`, else `Float32Array`. * - role arg: required if the model has `prompts`, else absent. */ -export type ForwardReturn = - M extends { multiVector: true } ? EmbeddingResult : Float32Array; +export type ForwardReturn = M extends { + multiVector: true; +} + ? EmbeddingResult + : Float32Array; /** * `forward`'s signature, computed from the model config: From 9691184b595c9fede86d8fdf472f8963b9e791d4 Mon Sep 17 00:00:00 2001 From: Norbert Klockiewicz Date: Mon, 22 Jun 2026 17:42:29 +0200 Subject: [PATCH 7/7] refactor: extract TextEmbeddings::buildResult, validate output rank Co-Authored-By: Claude Opus 4.6 (1M context) --- .../models/embeddings/text/TextEmbeddings.cpp | 51 +++++++++++-------- .../models/embeddings/text/TextEmbeddings.h | 2 + 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp index d80c4fb4fe..6e5982c2a5 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp @@ -60,34 +60,41 @@ EmbeddingResult TextEmbeddings::generate(const std::string input) { auto forwardResult = BaseModel::forward({tokenIds, attnMask}); CHECK_OK_OR_THROW_FORWARD_ERROR(forwardResult); - // Output is [1, numTokens, embeddingDim] (numTokens == 1 for pooled models, - // == sequence length for multi-vector models). Return the raw matrix + the - // input ids; the TS layer reduces to a single vector or keeps the matrix. - auto out = forwardResult->at(0).toTensor(); - auto sizes = out.sizes(); + return buildResult(forwardResult->at(0).toTensor(), + std::move(preprocessed.inputIds)); +} - EmbeddingResult result; - result.dataPtr = std::make_shared(out.const_data_ptr(), - out.nbytes()); - result.numTokens = static_cast(sizes[sizes.size() - 2]); - result.embeddingDim = static_cast(sizes[sizes.size() - 1]); - result.tokenIds = std::move(preprocessed.inputIds); +// Output is [1, numTokens, embeddingDim] (numTokens == 1 for pooled models, +// == sequence length for multi-vector models). Multi-vector consumers index +// tokenIds[i] per output row (e.g. skiplist masking), so numTokens must match +// the input token count or that alignment silently breaks. +EmbeddingResult +TextEmbeddings::buildResult(const executorch::aten::Tensor &output, + std::vector tokenIds) { + auto sizes = output.sizes(); + if (sizes.size() < 2) { + throw RnExecutorchError(RnExecutorchErrorCode::InvalidModelOutput, + "Embedding output must be at least 2D, got rank " + + std::to_string(sizes.size())); + } - // Invariant for multi-vector models: one output row per input token, so - // numTokens (from the output tensor) must equal tokenIds.size() (from the - // input). Consumers index tokenIds[i] per output row (e.g. skiplist masking), - // which silently breaks if the graph ever pads/truncates the sequence. - // (Pooled models legitimately collapse to numTokens == 1.) - if (result.numTokens != 1 && - result.numTokens != static_cast(result.tokenIds.size())) { + const auto numTokens = static_cast(sizes[sizes.size() - 2]); + const auto inputTokens = static_cast(tokenIds.size()); + if (numTokens != 1 && numTokens != inputTokens) { throw RnExecutorchError( RnExecutorchErrorCode::InvalidModelOutput, - "Embedding output rows (" + std::to_string(result.numTokens) + - ") != input tokens (" + - std::to_string(result.tokenIds.size()) + + "Embedding output rows (" + std::to_string(numTokens) + + ") != input tokens (" + std::to_string(inputTokens) + "); per-token tokenIds alignment is broken."); } - return result; + + return EmbeddingResult{ + .dataPtr = std::make_shared(output.const_data_ptr(), + output.nbytes()), + .numTokens = numTokens, + .embeddingDim = static_cast(sizes[sizes.size() - 1]), + .tokenIds = std::move(tokenIds), + }; } } // namespace rnexecutorch::models::embeddings diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h index da51e4d26e..02cfefde4d 100644 --- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h @@ -31,6 +31,8 @@ class TextEmbeddings final : public BaseModel { mutable std::mutex inference_mutex_; std::vector> inputShapes; TokenIdsWithAttentionMask preprocess(const std::string &input); + static EmbeddingResult buildResult(const executorch::aten::Tensor &output, + std::vector tokenIds); std::unique_ptr tokenizer; }; } // namespace models::embeddings