From 2de366d516574db2b2683e5521a4e371f09ad149 Mon Sep 17 00:00:00 2001 From: Matthew McNeely Date: Fri, 6 Mar 2026 17:41:57 -0500 Subject: [PATCH 1/2] Update readme --- README.md | 169 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 145 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 5405611..c8825b9 100644 --- a/README.md +++ b/README.md @@ -227,30 +227,34 @@ type MyNode struct { modusGraph uses struct tags to define how each field should be handled in the graph database: -| Directive | Option | Description | Example | -| ----------- | -------- | ----------------------------------------------- | ------------------------------------------------------------------------------------ | -| **index** | exact | Creates an exact-match index for string fields | Name string `json:"name" dgraph:"index=exact"` | -| | hash | Creates a hash index (same as exact) | Code string `json:"code" dgraph:"index=hash"` | -| | term | Creates a term index for text search | Description string `json:"description" dgraph:"index=term"` | -| | fulltext | Creates a full-text search index | Content string `json:"content" dgraph:"index=fulltext"` | -| | int | Creates an index for integer fields | Age int `json:"age" dgraph:"index=int"` | -| | geo | Creates a geolocation index | Location `json:"location" dgraph:"index=geo"` | -| | day | Creates a day-based index for datetime fields | Created time.Time `json:"created" dgraph:"index=day"` | -| | year | Creates a year-based index for datetime fields | Birthday time.Time `json:"birthday" dgraph:"index=year"` | -| | month | Creates a month-based index for datetime fields | Hired time.Time `json:"hired" dgraph:"index=month"` | -| | hour | Creates an hour-based index for datetime fields | Login time.Time `json:"login" dgraph:"index=hour"` | -| | hnsw | Creates a vector similarity index | Vector \*dg.VectorFloat32 `json:"vector" dgraph:"index=hnsw(metric:cosine)"` | -| **type** | geo | Specifies a geolocation field | Location `json:"location" dgraph:"type=geo"` | -| | datetime | Specifies a datetime field | CreatedAt time.Time `json:"createdAt" dgraph:"type=datetime"` | -| | int | Specifies an integer field | Count int `json:"count" dgraph:"type=int"` | -| | float | Specifies a floating-point field | Price float64 `json:"price" dgraph:"type=float"` | -| | bool | Specifies a boolean field | Active bool `json:"active" dgraph:"type=bool"` | -| | password | Specifies a password field (stored securely) | Password string `json:"password" dgraph:"type=password"` | -| **count** | | Creates a count index | Visits int `json:"visits" dgraph:"count"` | -| **unique** | | Enforces uniqueness for the field | Email string `json:"email" dgraph:"index=hash unique"` | -| **upsert** | | Allows a field to be used in upsert operations | UserID string `json:"userID" dgraph:"index=hash upsert"` | -| **reverse** | | Creates a bidirectional edge | Friends []\*Person `json:"friends" dgraph:"reverse"` | -| **lang** | | Enables multi-language support for the field | Description string `json:"description" dgraph:"lang"` | +| Directive | Option | Description | Example | +| ------------- | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------- | +| **index** | exact | Creates an exact-match index for string fields | Name string `json:"name" dgraph:"index=exact"` | +| | hash | Creates a hash index (same as exact) | Code string `json:"code" dgraph:"index=hash"` | +| | term | Creates a term index for text search | Description string `json:"description" dgraph:"index=term"` | +| | fulltext | Creates a full-text search index | Content string `json:"content" dgraph:"index=fulltext"` | +| | int | Creates an index for integer fields | Age int `json:"age" dgraph:"index=int"` | +| | geo | Creates a geolocation index | Location `json:"location" dgraph:"index=geo"` | +| | day | Creates a day-based index for datetime fields | Created time.Time `json:"created" dgraph:"index=day"` | +| | year | Creates a year-based index for datetime fields | Birthday time.Time `json:"birthday" dgraph:"index=year"` | +| | month | Creates a month-based index for datetime fields | Hired time.Time `json:"hired" dgraph:"index=month"` | +| | hour | Creates an hour-based index for datetime fields | Login time.Time `json:"login" dgraph:"index=hour"` | +| | hnsw | Creates a vector similarity index | Vector \*dg.VectorFloat32 `json:"vector" dgraph:"index=hnsw(metric:cosine)"` | +| **type** | geo | Specifies a geolocation field | Location `json:"location" dgraph:"type=geo"` | +| | datetime | Specifies a datetime field | CreatedAt time.Time `json:"createdAt" dgraph:"type=datetime"` | +| | int | Specifies an integer field | Count int `json:"count" dgraph:"type=int"` | +| | float | Specifies a floating-point field | Price float64 `json:"price" dgraph:"type=float"` | +| | bool | Specifies a boolean field | Active bool `json:"active" dgraph:"type=bool"` | +| | password | Specifies a password field (stored securely) | Password string `json:"password" dgraph:"type=password"` | +| **count** | | Creates a count index | Visits int `json:"visits" dgraph:"count"` | +| **unique** | | Enforces uniqueness for the field | Email string `json:"email" dgraph:"index=hash unique"` | +| **upsert** | | Allows a field to be used in upsert operations | UserID string `json:"userID" dgraph:"index=hash upsert"` | +| **reverse** | | Creates a bidirectional edge | Friends []\*Person `json:"friends" dgraph:"reverse"` | +| **lang** | | Enables multi-language support for the field | Description string `json:"description" dgraph:"lang"` | +| **embedding** | | Marks a `SimString` field for automatic vector embedding. modusGraph calls the configured `EmbeddingProvider` on insert/update and maintains a shadow `__vec` predicate. Can be combined with `index=term` and other string indexes. | Description SimString `json:"description" dgraph:"embedding,index=term"` | +| | metric= | HNSW index metric (default: `cosine`). Options: `cosine`, `euclidean`, `dotproduct` | Description SimString `json:"description" dgraph:"embedding,metric=euclidean"` | +| | exponent= | HNSW index exponent controlling index size (default: `4`) | Description SimString `json:"description" dgraph:"embedding,exponent=5"` | +| | threshold= | Minimum rune count required to embed. Texts shorter than this have their shadow vector deleted rather than left stale, preventing false positives. Default: `0` (always embed) | Description SimString `json:"description" dgraph:"embedding,threshold=20"` | ### Relationships @@ -515,6 +519,123 @@ with language-specific analyzers, geolocation queries, and more. The ability to Dgraph client gives you the full power of Dgraph's query language while still benefiting from modusGraph's simplified client interface and schema management. +## Automatic Similarity Search (`SimString`) + +`SimString` is a string type that transparently manages vector embeddings and HNSW-indexed shadow +predicates. When a struct field of this type is tagged with `dgraph:"embedding"`, modusGraph +automatically calls the configured `EmbeddingProvider` on every insert, upsert, and update, storing +the resulting vector in a `__vec` shadow predicate. This eliminates the need to manually +maintain `VectorFloat32` fields or call embedding APIs. + +### Setup + +Configure an embedding provider when creating the client: + +```go +import mg "github.com/matthewmcneely/modusgraph" + +// OpenAICompatibleProvider works with OpenAI, Ollama, and any OpenAI-compatible endpoint. +provider := mg.NewOpenAICompatibleProvider(mg.OpenAICompatibleConfig{ + BaseURL: "http://localhost:11434", // Ollama; use "https://api.openai.com" for OpenAI + Model: "bge-m3:latest", + Dims: 1024, + // APIKey: os.Getenv("OPENAI_API_KEY"), // required for OpenAI +}) + +client, err := mg.NewClient(uri, + mg.WithAutoSchema(true), + mg.WithEmbeddingProvider(provider), +) +``` + +### Defining a struct with `SimString` + +```go +type Product struct { + Name string `json:"name,omitempty" dgraph:"index=term"` + // index=term — also maintain a standard term index on the text predicate + // embedding — auto-embed on every write + // threshold=20 — skip embedding (and delete stale vector) for very short strings + Description mg.SimString `json:"description,omitempty" dgraph:"index=term,embedding,threshold=20"` + + UID string `json:"uid,omitempty"` + DType []string `json:"dgraph.type,omitempty"` +} +``` + +When `AutoSchema` is enabled, `UpdateSchema` automatically registers the shadow predicate: + +``` +description__vec: float32vector @index(hnsw(exponent: "4", metric: "cosine")) . +``` + +### Inserting and updating + +No changes to the regular insert/update API — the embedding happens automatically: + +```go +ctx := context.Background() + +product := &Product{ + Name: "Trail Runner X", + Description: "Lightweight trail running shoe with aggressive grip for mountain terrain", +} +err := client.Insert(ctx, product) +// product.UID is now set; description__vec has been written automatically. + +// Update: the shadow vector is re-embedded along with the text change. +product.Description = "Waterproof trail shoe with rock plate for muddy mountain terrain" +err = client.Update(ctx, product) +``` + +### Querying by similarity + +Use `SimilarToText` to embed a query string and find the nearest neighbours in a single call: + +```go +var result Product +err := mg.SimilarToText(client, ctx, &result, "description", "running shoes for trails", 1) +if err != nil { + log.Fatal(err) +} +fmt.Println("Best match:", result.Name) +``` + +For queries where you already have a pre-computed vector, use `SimilarTo` with an explicit +`*dg.TxnContext`: + +```go +dgoClient, cleanup, err := client.DgraphClient() +defer cleanup() + +tx := dg.NewReadOnlyTxn(dgoClient) + +var result Product +err = mg.SimilarTo(tx, &result, "description", myVec, 5).Scan() +``` + +### `embedding` tag options + +| Option | Default | Description | +| ----------- | -------- | ------------------------------------------------------------------------------------------ | +| `metric` | `cosine` | HNSW distance metric: `cosine`, `euclidean`, or `dotproduct` | +| `exponent` | `4` | HNSW index size exponent | +| `threshold` | `0` | Minimum rune count to embed. Below this, the shadow vector is **deleted** (not left stale) | + +You can combine `embedding` with any standard string index, e.g. `dgraph:"embedding,index=term"` to +enable both term search and similarity search on the same predicate. + +### Implementing a custom provider + +Any type that satisfies `EmbeddingProvider` can be used: + +```go +type EmbeddingProvider interface { + Embed(ctx context.Context, text string) ([]float32, error) + Dims() int +} +``` + ## Schema Management modusGraph provides robust schema management features that simplify working with Dgraph's schema From 491fe6e749eb900a78e905ed6f840a797090b5ab Mon Sep 17 00:00:00 2001 From: Matthew McNeely Date: Fri, 6 Mar 2026 17:45:47 -0500 Subject: [PATCH 2/2] Satisfy linter --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c8825b9..f8ce2dc 100644 --- a/README.md +++ b/README.md @@ -565,7 +565,7 @@ type Product struct { When `AutoSchema` is enabled, `UpdateSchema` automatically registers the shadow predicate: -``` +```go description__vec: float32vector @index(hnsw(exponent: "4", metric: "cosine")) . ```