-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmetadata_filtering.py
More file actions
91 lines (74 loc) · 2.78 KB
/
metadata_filtering.py
File metadata and controls
91 lines (74 loc) · 2.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""Metadata filtering example for sqlite-vec-client.
Demonstrates:
- Adding records with metadata
- Filtering by metadata fields
- Counting records by metadata
- Combined similarity search with metadata filtering
- Updating metadata
"""
from sqlite_vec_client import SQLiteVecClient
def main():
client = SQLiteVecClient(table="articles", db_path=":memory:")
client.create_table(dim=128, distance="cosine")
# Add articles with metadata
texts = [
"Introduction to Python programming",
"Advanced machine learning techniques",
"Python for data science",
]
embeddings = [
[0.1] * 128,
[0.2] * 128,
[0.15] * 128,
]
metadata = [
{"category": "programming", "author": "Alice", "year": 2023},
{"category": "ai", "author": "Bob", "year": 2024},
{"category": "data-science", "author": "Alice", "year": 2024},
]
rowids = client.add(texts=texts, embeddings=embeddings, metadata=metadata)
print(f"Added {len(rowids)} articles")
# Filter by metadata - efficient JSON_EXTRACT queries
print("\nAlice's articles (using filter_by_metadata):")
results = client.filter_by_metadata({"author": "Alice"})
for rowid, text, meta, _ in results:
print(f" [{rowid}] {text} - {meta}")
# Filter by multiple fields
print("\nArticles from 2024:")
results = client.filter_by_metadata({"year": 2024})
for rowid, text, meta, _ in results:
print(f" [{rowid}] {text} - Year: {meta['year']}")
# Count records by metadata
count = client.count_by_metadata({"author": "Alice"})
print(f"\nTotal articles by Alice: {count}")
# Combined similarity search with metadata filtering
print("\nSimilar to 'Python' in category 'programming':")
query_emb = [0.1] * 128
hits = client.similarity_search_with_filter(
embedding=query_emb, filters={"category": "programming"}, top_k=5
)
for rowid, text, distance in hits:
print(f" [{rowid}] {text} (distance: {distance:.4f})")
# Update metadata and verify with filter
if rowids:
client.update(
rowids[0],
metadata={
"category": "programming",
"author": "Alice",
"year": 2024,
"updated": True,
},
)
# Find updated records
updated_records = client.filter_by_metadata({"updated": True})
print(f"\nUpdated records: {len(updated_records)}")
if updated_records:
print(f" Metadata: {updated_records[0][2]}")
# Pagination example
print("\nPagination example (limit=2):")
page1 = client.filter_by_metadata({"year": 2024}, limit=2, offset=0)
print(f" Page 1: {len(page1)} results")
client.close()
if __name__ == "__main__":
main()