-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapi_example.py
More file actions
52 lines (39 loc) · 1.58 KB
/
Copy pathapi_example.py
File metadata and controls
52 lines (39 loc) · 1.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""Example: Using the peekdocs Python API to search documents programmatically."""
from peekdocs import search
def main():
# Basic search — find "budget" in the current directory
result = search(["budget"], directory=".")
print(f"Files searched: {len(result.files_searched)}")
print(f"Matches found: {len(result.matches)}")
print(f"Elapsed: {result.elapsed:.2f}s")
print()
# Print each match
for match in result.matches[:10]: # first 10 matches
print(f" {match.filename}, line {match.line_num}: {match.text[:80]}")
print()
# Advanced search — AND mode, recursive, only PDFs and Word docs
result = search(
["invoice", "payment"],
directory=".",
match_all=True, # AND mode — both terms must appear on the same line
recursive=True, # search subfolders
file_types=[".pdf", ".docx"], # only PDFs and Word docs
)
print(f"AND search: {len(result.matches)} match(es) in {len(result.files_searched)} file(s)")
# Regex search — find reference numbers like REF-12345
result = search(
[r"\bREF-\d{4,}\b"],
directory=".",
use_regex=True,
recursive=True,
)
print(f"Reference number pattern: {len(result.matches)} match(es) found")
# Access match details
for match in result.matches:
print(f" File: {match.filename}")
print(f" Line: {match.line_num}")
print(f" Text: {match.text}")
print()
# Required for multiprocessing on macOS and Windows
if __name__ == "__main__":
main()