From 0664982da5ea835aabbaab8c8b3432ce2052075c Mon Sep 17 00:00:00 2001 From: Deborah Jacob Date: Fri, 6 Feb 2026 20:10:22 -0500 Subject: [PATCH] =?UTF-8?q?feat:=20Botanu=20SDK=20for=20Python=20=E2=80=94?= =?UTF-8?q?=20OpenTelemetry-native=20cost=20attribution?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Complete implementation of the Botanu SDK including: - Zero-config bootstrap with enable()/disable() lifecycle - Auto-instrumentation for 50+ libraries (HTTP, DB, messaging, GenAI) - W3C Baggage propagation for run_id across services - ALWAYS_ON sampler (never drops spans) - TracerProvider reuse (no double-spanning) - Resource detection (cloud, container, process) - Lean enricher (run_id + use_case per span) - YAML + env var configuration with BOTANU_* precedence - Thread-safe initialization with RLock - CI/CD workflows (lint, typecheck, test 3.9-3.13, build, DCO) - 365 unit tests at 73% coverage - LF-compliant documentation and repo structure Co-Authored-By: Claude Opus 4.6 Signed-off-by: Deborah Jacob --- .clomonitor.yml | 27 + .github/ISSUE_TEMPLATE/bug_report.yml | 88 +++ .github/ISSUE_TEMPLATE/config.yml | 11 + .github/ISSUE_TEMPLATE/feature_request.yml | 53 ++ .github/PULL_REQUEST_TEMPLATE.md | 38 ++ .github/repolinter.json | 77 +++ .github/workflows/ci.yml | 106 ++++ .github/workflows/codeql.yml | 40 ++ .github/workflows/release.yml | 137 ++++ .github/workflows/repolinter.yml | 24 + .github/workflows/scorecard.yml | 35 ++ .pre-commit-config.yaml | 46 ++ .repolinterrc.yml | 124 ++++ CHANGELOG.md | 75 +++ CODE_OF_CONDUCT.md | 3 + CONTRIBUTING.md | 87 +++ DCO | 34 + GOVERNANCE.md | 39 ++ LICENSE | 212 ++++++- MAINTAINERS.md | 29 + NOTICE | 17 + README.md | 159 +++++ RELEASE.md | 199 ++++++ SECURITY.md | 36 ++ docs/api/configuration.md | 417 +++++++++++++ docs/api/decorators.md | 99 +++ docs/api/tracking.md | 511 +++++++++++++++ docs/concepts/architecture.md | 265 ++++++++ docs/concepts/context-propagation.md | 239 +++++++ docs/concepts/run-context.md | 188 ++++++ docs/getting-started/configuration.md | 271 ++++++++ docs/getting-started/installation.md | 80 +++ docs/getting-started/quickstart.md | 98 +++ docs/index.md | 65 ++ docs/integration/auto-instrumentation.md | 138 +++++ docs/integration/collector.md | 422 +++++++++++++ docs/integration/existing-otel.md | 295 +++++++++ docs/integration/kubernetes.md | 382 ++++++++++++ docs/patterns/anti-patterns.md | 490 +++++++++++++++ docs/patterns/best-practices.md | 416 +++++++++++++ docs/tracking/data-tracking.md | 412 ++++++++++++ docs/tracking/llm-tracking.md | 332 ++++++++++ docs/tracking/outcomes.md | 363 +++++++++++ pyproject.toml | 242 ++++++++ src/botanu/__init__.py | 76 +++ src/botanu/_version.py | 13 + src/botanu/models/__init__.py | 10 + src/botanu/models/run_context.py | 320 ++++++++++ src/botanu/processors/__init__.py | 12 + src/botanu/processors/enricher.py | 81 +++ src/botanu/py.typed | 0 src/botanu/resources/__init__.py | 8 + src/botanu/resources/detector.py | 366 +++++++++++ src/botanu/sdk/__init__.py | 38 ++ src/botanu/sdk/bootstrap.py | 381 ++++++++++++ src/botanu/sdk/config.py | 330 ++++++++++ src/botanu/sdk/context.py | 78 +++ src/botanu/sdk/decorators.py | 294 +++++++++ src/botanu/sdk/middleware.py | 106 ++++ src/botanu/sdk/span_helpers.py | 92 +++ src/botanu/tracking/__init__.py | 77 +++ src/botanu/tracking/data.py | 488 +++++++++++++++ src/botanu/tracking/ledger.py | 420 +++++++++++++ src/botanu/tracking/llm.py | 688 +++++++++++++++++++++ tests/__init__.py | 2 + tests/conftest.py | 59 ++ tests/integration/__init__.py | 2 + tests/unit/__init__.py | 2 + tests/unit/test_bootstrap.py | 670 ++++++++++++++++++++ tests/unit/test_config.py | 360 +++++++++++ tests/unit/test_context.py | 107 ++++ tests/unit/test_data_tracking.py | 473 ++++++++++++++ tests/unit/test_decorators.py | 335 ++++++++++ tests/unit/test_enricher.py | 160 +++++ tests/unit/test_ledger.py | 495 +++++++++++++++ tests/unit/test_llm_tracking.py | 537 ++++++++++++++++ tests/unit/test_middleware.py | 175 ++++++ tests/unit/test_resource_detector.py | 455 ++++++++++++++ tests/unit/test_run_context.py | 204 ++++++ tests/unit/test_span_helpers.py | 124 ++++ 80 files changed, 15442 insertions(+), 17 deletions(-) create mode 100644 .clomonitor.yml create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yml create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/repolinter.json create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/codeql.yml create mode 100644 .github/workflows/release.yml create mode 100644 .github/workflows/repolinter.yml create mode 100644 .github/workflows/scorecard.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .repolinterrc.yml create mode 100644 CHANGELOG.md create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 DCO create mode 100644 GOVERNANCE.md create mode 100644 MAINTAINERS.md create mode 100644 NOTICE create mode 100644 README.md create mode 100644 RELEASE.md create mode 100644 SECURITY.md create mode 100644 docs/api/configuration.md create mode 100644 docs/api/decorators.md create mode 100644 docs/api/tracking.md create mode 100644 docs/concepts/architecture.md create mode 100644 docs/concepts/context-propagation.md create mode 100644 docs/concepts/run-context.md create mode 100644 docs/getting-started/configuration.md create mode 100644 docs/getting-started/installation.md create mode 100644 docs/getting-started/quickstart.md create mode 100644 docs/index.md create mode 100644 docs/integration/auto-instrumentation.md create mode 100644 docs/integration/collector.md create mode 100644 docs/integration/existing-otel.md create mode 100644 docs/integration/kubernetes.md create mode 100644 docs/patterns/anti-patterns.md create mode 100644 docs/patterns/best-practices.md create mode 100644 docs/tracking/data-tracking.md create mode 100644 docs/tracking/llm-tracking.md create mode 100644 docs/tracking/outcomes.md create mode 100644 pyproject.toml create mode 100644 src/botanu/__init__.py create mode 100644 src/botanu/_version.py create mode 100644 src/botanu/models/__init__.py create mode 100644 src/botanu/models/run_context.py create mode 100644 src/botanu/processors/__init__.py create mode 100644 src/botanu/processors/enricher.py create mode 100644 src/botanu/py.typed create mode 100644 src/botanu/resources/__init__.py create mode 100644 src/botanu/resources/detector.py create mode 100644 src/botanu/sdk/__init__.py create mode 100644 src/botanu/sdk/bootstrap.py create mode 100644 src/botanu/sdk/config.py create mode 100644 src/botanu/sdk/context.py create mode 100644 src/botanu/sdk/decorators.py create mode 100644 src/botanu/sdk/middleware.py create mode 100644 src/botanu/sdk/span_helpers.py create mode 100644 src/botanu/tracking/__init__.py create mode 100644 src/botanu/tracking/data.py create mode 100644 src/botanu/tracking/ledger.py create mode 100644 src/botanu/tracking/llm.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/integration/__init__.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/test_bootstrap.py create mode 100644 tests/unit/test_config.py create mode 100644 tests/unit/test_context.py create mode 100644 tests/unit/test_data_tracking.py create mode 100644 tests/unit/test_decorators.py create mode 100644 tests/unit/test_enricher.py create mode 100644 tests/unit/test_ledger.py create mode 100644 tests/unit/test_llm_tracking.py create mode 100644 tests/unit/test_middleware.py create mode 100644 tests/unit/test_resource_detector.py create mode 100644 tests/unit/test_run_context.py create mode 100644 tests/unit/test_span_helpers.py diff --git a/.clomonitor.yml b/.clomonitor.yml new file mode 100644 index 0000000..81639fe --- /dev/null +++ b/.clomonitor.yml @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 +# +# CLOMonitor metadata — used by LF AI & Data Foundation to track +# project maturity and best-practice adoption. +# See: https://clomonitor.io/docs/topics/checks/ + +# Documentation +documentation: + adopters: "https://github.com/botanu-ai/botanu-sdk-python/blob/main/ADOPTERS.md" + changelog: "https://github.com/botanu-ai/botanu-sdk-python/blob/main/CHANGELOG.md" + code_of_conduct: "https://github.com/botanu-ai/botanu-sdk-python/blob/main/CODE_OF_CONDUCT.md" + contributing: "https://github.com/botanu-ai/botanu-sdk-python/blob/main/CONTRIBUTING.md" + governance: "https://github.com/botanu-ai/botanu-sdk-python/blob/main/GOVERNANCE.md" + maintainers: "https://github.com/botanu-ai/botanu-sdk-python/blob/main/MAINTAINERS.md" + readme: "https://github.com/botanu-ai/botanu-sdk-python/blob/main/README.md" + security: "https://github.com/botanu-ai/botanu-sdk-python/blob/main/SECURITY.md" + +# License +license: + approved: true + spdx_id: "Apache-2.0" + +# Best practices +best_practices: + dco: true + openssf_badge: false # TODO: apply at https://www.bestpractices.dev/ diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..da664ab --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,88 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +name: Bug Report +description: Report a bug in the Botanu SDK +labels: ["bug", "triage"] +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to report a bug. + Please fill in the details below to help us reproduce and fix the issue. + + - type: input + id: version + attributes: + label: Botanu SDK version + description: "Output of `python -c 'import botanu; print(botanu.__version__)'`" + placeholder: "0.1.0" + validations: + required: true + + - type: input + id: python-version + attributes: + label: Python version + description: "Output of `python --version`" + placeholder: "3.12.1" + validations: + required: true + + - type: dropdown + id: init-mode + attributes: + label: Initialization mode + options: + - Standalone (no existing TracerProvider) + - Attach (OTEL-native vendor — Splunk, Honeycomb, etc.) + - Alongside (proprietary agent — Datadog, New Relic, etc.) + - Unknown / not sure + validations: + required: true + + - type: textarea + id: description + attributes: + label: Description + description: A clear and concise description of the bug. + validations: + required: true + + - type: textarea + id: reproduce + attributes: + label: Steps to reproduce + description: Minimal code or steps to reproduce the issue. + render: python + validations: + required: true + + - type: textarea + id: expected + attributes: + label: Expected behavior + description: What you expected to happen. + validations: + required: true + + - type: textarea + id: actual + attributes: + label: Actual behavior + description: What actually happened. Include tracebacks if applicable. + render: shell + validations: + required: true + + - type: textarea + id: context + attributes: + label: Additional context + description: | + - OS and platform + - OTel SDK / instrumentation versions + - Existing observability vendor (Datadog, Splunk, etc.) + - Collector configuration + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..4acc5ec --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,11 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +blank_issues_enabled: false +contact_links: + - name: Questions & Discussions + url: https://github.com/botanu-ai/botanu-sdk-python/discussions + about: Ask questions and discuss ideas + - name: Security Vulnerabilities + url: https://github.com/botanu-ai/botanu-sdk-python/blob/main/SECURITY.md + about: Report security vulnerabilities privately (do NOT open a public issue) diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..d35d736 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,53 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +name: Feature Request +description: Suggest a new feature or enhancement +labels: ["enhancement"] +body: + - type: markdown + attributes: + value: | + Thanks for suggesting an improvement to Botanu SDK! + + - type: textarea + id: problem + attributes: + label: Problem statement + description: What problem does this feature solve? Is this related to a frustration? + validations: + required: true + + - type: textarea + id: solution + attributes: + label: Proposed solution + description: Describe the solution you'd like. Include API sketches if possible. + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: Alternatives considered + description: Any alternative approaches you've considered. + validations: + required: false + + - type: dropdown + id: scope + attributes: + label: Which component does this affect? + multiple: true + options: + - Core SDK (bootstrap / attach) + - Run context / decorators + - Span processors + - Carrier propagation (SQS, Kafka, Celery) + - LLM / GenAI tracking + - Resource detection + - Collector configuration + - Documentation + - Other + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..42cfbe4 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,38 @@ + + + +## Summary + + + +## Changes + + + +- + +## Type of change + + + +- [ ] Bug fix (non-breaking change that fixes an issue) +- [ ] New feature (non-breaking change that adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to change) +- [ ] Documentation update +- [ ] CI / build / tooling + +## Testing + + + +- [ ] Unit tests pass (`pytest`) +- [ ] Lint passes (`ruff check`) +- [ ] Type check passes (`mypy`) + +## Checklist + +- [ ] My code follows the project's coding style +- [ ] I have added SPDX headers to new files +- [ ] I have added tests for my changes +- [ ] I have updated documentation if needed +- [ ] All commits are signed off (`git commit -s`) per the [DCO](../DCO) diff --git a/.github/repolinter.json b/.github/repolinter.json new file mode 100644 index 0000000..2c38bab --- /dev/null +++ b/.github/repolinter.json @@ -0,0 +1,77 @@ +{ + "$schema": "https://raw.githubusercontent.com/todogroup/repolinter/master/rulesets/schema.json", + "version": 2, + "axioms": {}, + "rules": { + "license-file-exists": { + "level": "error", + "rule": { + "type": "file-existence", + "options": { + "globsAny": ["LICENSE*", "COPYING*"] + } + } + }, + "readme-file-exists": { + "level": "error", + "rule": { + "type": "file-existence", + "options": { + "globsAny": ["README*"] + } + } + }, + "contributing-file-exists": { + "level": "warning", + "rule": { + "type": "file-existence", + "options": { + "globsAny": ["CONTRIBUTING*"] + } + } + }, + "changelog-file-exists": { + "level": "warning", + "rule": { + "type": "file-existence", + "options": { + "globsAny": ["CHANGELOG*"] + } + } + }, + "code-of-conduct-file-exists": { + "level": "warning", + "rule": { + "type": "file-existence", + "options": { + "globsAny": ["CODE_OF_CONDUCT*", "CODE-OF-CONDUCT*", ".github/CODE_OF_CONDUCT*"] + } + } + }, + "security-file-exists": { + "level": "warning", + "rule": { + "type": "file-existence", + "options": { + "globsAny": ["SECURITY*", ".github/SECURITY*"] + } + } + }, + "notice-file-exists": { + "level": "warning", + "rule": { + "type": "file-existence", + "options": { + "globsAny": ["NOTICE*"] + } + } + }, + "license-detectable-by-licensee": { + "level": "warning", + "rule": { + "type": "license-detectable-by-licensee", + "options": {} + } + } + } +} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..01ad7c5 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,106 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +name: CI + +on: + push: + branches: [main, developer-deborah] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + # ------------------------------------------------------------------- + # Lint & format check + # ------------------------------------------------------------------- + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - run: pip install ruff + - run: ruff check src/ tests/ + - run: ruff format --check src/ tests/ + + # ------------------------------------------------------------------- + # Type checking + # ------------------------------------------------------------------- + typecheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - run: pip install -e ".[dev]" + - run: mypy src/botanu/ + + # ------------------------------------------------------------------- + # Test matrix — Python 3.9 → 3.13 + # ------------------------------------------------------------------- + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # hatch-vcs needs full history + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: pip install -e ".[dev]" + + - name: Run tests with coverage + run: pytest --cov=botanu --cov-report=xml --cov-report=term-missing + + - name: Upload coverage + if: matrix.python-version == '3.12' + uses: codecov/codecov-action@v4 + with: + file: coverage.xml + fail_ci_if_error: false + + # ------------------------------------------------------------------- + # Build verification — ensure the package builds cleanly + # ------------------------------------------------------------------- + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - run: pip install build + - run: python -m build + - uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + + # ------------------------------------------------------------------- + # DCO sign-off check (required by Linux Foundation) + # ------------------------------------------------------------------- + dco: + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: DCO check + uses: christophebedard/dco-check@0.5.0 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..b0d5105 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,40 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +name: CodeQL + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + - cron: "23 4 * * 1" # Weekly Monday 04:23 UTC + +permissions: + contents: read + +jobs: + analyze: + runs-on: ubuntu-latest + permissions: + security-events: write + strategy: + fail-fast: false + matrix: + language: [python] + steps: + - uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..266eda0 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,137 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +name: Release to PyPI + +on: + push: + tags: + - "v*" + workflow_dispatch: + inputs: + publish_target: + description: 'Publish target' + required: true + default: 'testpypi' + type: choice + options: + - testpypi + - pypi + +permissions: + contents: read + +jobs: + # ------------------------------------------------------------------- + # Build the package + # ------------------------------------------------------------------- + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # hatch-vcs needs full history + + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install build tools + run: pip install build twine + + - name: Build sdist and wheel + run: python -m build + + - name: Check package with twine + run: twine check dist/* + + - name: List build artifacts + run: ls -la dist/ + + - uses: actions/upload-artifact@v4 + with: + name: dist + path: dist/ + + # ------------------------------------------------------------------- + # Publish to TestPyPI (manual trigger or pre-release tags) + # Uses Trusted Publishing (OIDC — no API tokens needed) + # Requires TestPyPI project to be configured for GitHub OIDC: + # https://test.pypi.org/manage/project/botanu/settings/publishing/ + # ------------------------------------------------------------------- + publish-testpypi: + needs: build + if: >- + github.event_name == 'workflow_dispatch' && github.event.inputs.publish_target == 'testpypi' + || (github.event_name == 'push' && (contains(github.ref, '-alpha') || contains(github.ref, '-beta') || contains(github.ref, '-rc'))) + runs-on: ubuntu-latest + environment: + name: testpypi + url: https://test.pypi.org/p/botanu + permissions: + id-token: write # required for OIDC trusted publishing + steps: + - uses: actions/download-artifact@v4 + with: + name: dist + path: dist/ + + - name: Publish to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + skip-existing: true + + # ------------------------------------------------------------------- + # Publish to PyPI via Trusted Publishing (OIDC — no API tokens) + # Requires PyPI project to be configured for GitHub OIDC: + # https://pypi.org/manage/project/botanu/settings/publishing/ + # ------------------------------------------------------------------- + publish-pypi: + needs: build + if: | + github.event_name == 'workflow_dispatch' && github.event.inputs.publish_target == 'pypi' + || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && !contains(github.ref, '-')) + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/botanu + permissions: + id-token: write # required for OIDC trusted publishing + steps: + - uses: actions/download-artifact@v4 + with: + name: dist + path: dist/ + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + # ------------------------------------------------------------------- + # Create GitHub Release with auto-generated notes + # ------------------------------------------------------------------- + github-release: + needs: [build, publish-pypi] + if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/download-artifact@v4 + with: + name: dist + path: dist/ + + - name: Create GitHub Release + env: + GH_TOKEN: ${{ github.token }} + run: | + if [[ "${{ github.ref_name }}" == *"-"* ]]; then + gh release create "${{ github.ref_name }}" dist/* --generate-notes --prerelease + else + gh release create "${{ github.ref_name }}" dist/* --generate-notes + fi diff --git a/.github/workflows/repolinter.yml b/.github/workflows/repolinter.yml new file mode 100644 index 0000000..3f1add9 --- /dev/null +++ b/.github/workflows/repolinter.yml @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +name: Repolinter + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run Repolinter + uses: todogroup/repolinter-action@v1 + with: + config_file: .github/repolinter.json diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000..2e56bfc --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,35 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +name: OpenSSF Scorecard + +on: + push: + branches: [main] + schedule: + - cron: "30 1 * * 1" # Weekly Monday 01:30 UTC + +permissions: read-all + +jobs: + analysis: + runs-on: ubuntu-latest + permissions: + security-events: write # upload SARIF + id-token: write # publish results + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Run OpenSSF Scorecard + uses: ossf/scorecard-action@v2 + with: + results_file: results.sarif + results_format: sarif + publish_results: true + + - name: Upload SARIF to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: results.sarif diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..7aba505 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +repos: + # General file hygiene + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-toml + - id: check-added-large-files + args: ["--maxkb=500"] + - id: check-merge-conflict + - id: detect-private-key + + # Ruff — linter + formatter (replaces flake8, isort, black) + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.8 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + + # Type checking + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.10.0 + hooks: + - id: mypy + additional_dependencies: ["opentelemetry-api>=1.20.0"] + args: [--ignore-missing-imports] + pass_filenames: false + entry: mypy src/botanu/ + + # SPDX license header check + - repo: https://github.com/fsfe/reuse-tool + rev: v3.0.2 + hooks: + - id: reuse + + # DCO sign-off check (local — CI uses dcoapp/app) + - repo: https://github.com/christophebedard/dco-check + rev: v1.1.0 + hooks: + - id: dco-check diff --git a/.repolinterrc.yml b/.repolinterrc.yml new file mode 100644 index 0000000..d692b3b --- /dev/null +++ b/.repolinterrc.yml @@ -0,0 +1,124 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 +# +# Repolinter configuration for LF AI & Data Foundation compliance. +# See: https://github.com/todogroup/repolinter + +version: 2 +axioms: + linguist: language + licensee: license + packagers: packager + +rules: + # ---- License ---- + license-file-exists: + level: error + rule: + type: file-existence + options: + globsAny: + - LICENSE* + - COPYING* + + # ---- README ---- + readme-file-exists: + level: error + rule: + type: file-existence + options: + globsAny: + - README* + + # ---- CONTRIBUTING ---- + contributing-file-exists: + level: error + rule: + type: file-existence + options: + globsAny: + - CONTRIBUTING* + - .github/CONTRIBUTING* + + # ---- Code of Conduct ---- + code-of-conduct-file-exists: + level: error + rule: + type: file-existence + options: + globsAny: + - CODE_OF_CONDUCT* + - .github/CODE_OF_CONDUCT* + + # ---- SECURITY ---- + security-file-exists: + level: warning + rule: + type: file-existence + options: + globsAny: + - SECURITY* + - .github/SECURITY* + + # ---- NOTICE / attribution ---- + notice-file-exists: + level: warning + rule: + type: file-existence + options: + globsAny: + - NOTICE* + + # ---- DCO ---- + dco-file-exists: + level: warning + rule: + type: file-existence + options: + globsAny: + - DCO* + + # ---- CHANGELOG ---- + changelog-file-exists: + level: warning + rule: + type: file-existence + options: + globsAny: + - CHANGELOG* + - HISTORY* + + # ---- No binaries ---- + binaries-not-present: + level: error + rule: + type: file-type-exclusion + options: + type: + - "**/*.exe" + - "**/*.dll" + - "**/*.so" + - "**/*.dylib" + - "**/*.pyc" + - "**/*.pyo" + + # ---- Source files have SPDX headers ---- + source-license-headers-exist: + level: warning + rule: + type: file-contents + options: + globsAll: + - "src/**/*.py" + content: "SPDX-License-Identifier" + fail-on-non-existent: false + + # ---- No test credentials ---- + test-directory-exists: + level: warning + rule: + type: file-existence + options: + globsAny: + - tests/* + - test/* diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..9eed0fc --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,75 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [0.1.0] - 2026-02-05 + +### Added + +- Initial open-source release under Apache-2.0 license +- **Core SDK** + - `enable()` / `disable()` bootstrap functions for SDK initialization + - `@botanu_use_case` decorator with UUIDv7 run_id generation + - `@botanu_outcome` decorator for sub-function outcome tracking + - `emit_outcome()` helper for recording business outcomes + - `set_business_context()` for cost attribution dimensions + - `RunContextEnricher` span processor for automatic run_id propagation + +- **LLM Tracking** (aligned with OTel GenAI semantic conventions) + - `track_llm_call()` context manager for LLM/model operations + - `track_tool_call()` context manager for tool/function calls + - Token usage tracking (input, output, cached) + - Provider normalization for 15+ LLM providers + - Support for all GenAI operations (chat, embeddings, etc.) + +- **Data Tracking** + - `track_db_operation()` for database operations + - `track_storage_operation()` for object storage (S3, GCS, Azure Blob) + - `track_messaging_operation()` for message queues (SQS, Kafka, Pub/Sub) + - System normalization for 30+ database/storage systems + +- **Context Propagation** + - W3C Baggage propagation for cross-service run_id correlation + - Lean mode (default) and full mode propagation options + - `RunContext` model with retry tracking and deadline support + +- **Resource Detection** + - Kubernetes (pod, namespace, container) + - AWS (EC2, ECS, Lambda, Fargate) + - GCP (GCE, Cloud Run, Cloud Functions) + - Azure (VM, Container Apps, Functions) + +- **Auto-Instrumentation Support** + - HTTP clients: requests, httpx, urllib3, aiohttp + - Web frameworks: FastAPI, Flask, Django, Starlette + - Databases: SQLAlchemy, psycopg2, asyncpg, pymongo, Redis + - Messaging: Celery, Kafka + - GenAI: OpenAI, Anthropic, Vertex AI, Google GenAI, LangChain + +- **Optional Extras** + - `[sdk]` - OTel SDK + OTLP exporter + - `[instruments]` - Common library instrumentation + - `[genai]` - GenAI provider instrumentation + - `[carriers]` - Cross-service propagation helpers + - `[all]` - Everything included + - `[dev]` - Development and testing tools + +- **Documentation** + - Comprehensive docs in `/docs` following LF format + - Getting started guides + - API reference + - Best practices and anti-patterns + +### Dependencies + +- Core: `opentelemetry-api >= 1.20.0` +- SDK extra: `opentelemetry-sdk`, `opentelemetry-exporter-otlp-proto-http` +- Python: `>= 3.9` + +[Unreleased]: https://github.com/botanu-ai/botanu-sdk-python/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/botanu-ai/botanu-sdk-python/releases/tag/v0.1.0 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..643856c --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,3 @@ +# Botanu Code of Conduct + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers agree to abide by the Code of Conduct available at https://lfprojects.org/policies/code-of-conduct/ \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..6d13cd5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,87 @@ +# Contributing to Botanu SDK + +Thank you for your interest in contributing to Botanu SDK! This document provides guidelines and instructions for contributing. + +## Developer Certificate of Origin (DCO) + +This project requires all commits to be signed off in accordance with the [Developer Certificate of Origin (DCO)](https://developercertificate.org/). This certifies that you have the right to submit your contribution under the project's open source license. + +To sign off your commits, add the `-s` flag to your git commit command: + +```bash +git commit -s -m "Your commit message" +``` + +This will add a `Signed-off-by` line to your commit message: + +``` +Signed-off-by: Your Name +``` + +If you've already made commits without signing off, you can amend them: + +```bash +# Amend the last commit +git commit --amend -s + +# Rebase and sign off multiple commits +git rebase --signoff HEAD~N # where N is the number of commits +``` + +## Development Setup + +1. Clone the repository: + ```bash + git clone https://github.com/botanu-ai/botanu-sdk-python.git + cd botanu-sdk-python + ``` + +2. Create a virtual environment and install dependencies: + ```bash + python -m venv .venv + source .venv/bin/activate # On Windows: .venv\Scripts\activate + pip install -e ".[dev]" + ``` + +3. Run tests: + ```bash + pytest tests/ + ``` + +4. Run linting and type checks: + ```bash + ruff check src/ tests/ + ruff format src/ tests/ + mypy src/botanu/ + ``` + +## Pull Request Process + +1. Fork the repository and create a feature branch +2. Make your changes with appropriate tests +3. Ensure all tests pass and linting is clean +4. Sign off all commits with DCO +5. Submit a pull request with a clear description + +## Code Style + +- Follow [PEP 8](https://pep8.org/) style guidelines +- Use type hints for all function signatures +- Write docstrings for public APIs +- Keep commits focused and atomic + +## Reporting Issues + +Please use GitHub Issues to report bugs or request features. Include: +- A clear description of the issue +- Steps to reproduce (for bugs) +- Expected vs actual behavior +- Python version and OS + +## Code of Conduct + +This project follows the [LF Projects Code of Conduct](https://lfprojects.org/policies/code-of-conduct/). + +## License + +By contributing, you agree that your contributions will be licensed under the Apache License 2.0. diff --git a/DCO b/DCO new file mode 100644 index 0000000..49b8cb0 --- /dev/null +++ b/DCO @@ -0,0 +1,34 @@ +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. diff --git a/GOVERNANCE.md b/GOVERNANCE.md new file mode 100644 index 0000000..9f7a9f0 --- /dev/null +++ b/GOVERNANCE.md @@ -0,0 +1,39 @@ +# Governance + +This project follows the governance model of the [LF AI & Data Foundation](https://lfaidata.foundation/). + +## Roles + +### Maintainers + +Maintainers are responsible for: +- Reviewing and merging pull requests +- Triaging issues +- Releasing new versions +- Ensuring project quality and direction + +Current maintainers are listed in [MAINTAINERS.md](./MAINTAINERS.md). + +### Contributors + +Anyone can contribute by: +- Opening issues +- Submitting pull requests +- Participating in discussions +- Improving documentation + +See [CONTRIBUTING.md](./CONTRIBUTING.md) for contribution guidelines. + +## Decision Making + +- Technical decisions are made through pull request reviews +- Significant changes require approval from at least one maintainer +- Disputes are resolved by maintainer consensus + +## Code of Conduct + +All participants must follow the [Code of Conduct](./CODE_OF_CONDUCT.md). + +## License + +This project is licensed under Apache-2.0. See [LICENSE](./LICENSE). diff --git a/LICENSE b/LICENSE index 49d106a..454411d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,22 +1,200 @@ -BOTANU SOFTWARE LICENSE AGREEMENT -Copyright (c) 2026 Botanu, Inc. -All rights reserved. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ -This software and associated documentation files are proprietary -and confidential to Botanu, Inc. + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -Permission is hereby granted to install and use the Software solely for internal -business purposes and only in connection with authorized use of Botanu services, -subject to the terms of a separate written agreement between you and Botanu, Inc. + 1. Definitions. -You may not: -- copy, modify, merge, publish, distribute, sublicense, or sell copies of the Software; -- reverse engineer, decompile, or disassemble the Software; -- remove or alter any proprietary notices contained in the Software. + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE -FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by the Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding any notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. Please also get the + boilerplate text of the NOTICE file for your work. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MAINTAINERS.md b/MAINTAINERS.md new file mode 100644 index 0000000..cdd0615 --- /dev/null +++ b/MAINTAINERS.md @@ -0,0 +1,29 @@ +# Maintainers + +This file lists the maintainers of the Botanu SDK Python project. + +## Current Maintainers + +The maintainers are listed in alphabetical order by GitHub handle. + +| Name | GitHub | Role | +|------|--------|------| +| Deborah Jacob | [@deborahjacob-botanu](https://github.com/deborahjacob-botanu) | Lead Maintainer | + +## Becoming a Maintainer + +Maintainers are contributors who have demonstrated: + +- Sustained contributions to the project +- Deep understanding of the codebase +- Commitment to the project's goals and community + +If you're interested in becoming a maintainer, start by making regular contributions and engaging with the community. + +## Maintainer Responsibilities + +- Review and merge pull requests +- Triage issues +- Participate in project planning +- Uphold the Code of Conduct +- Help onboard new contributors diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..0ff65a4 --- /dev/null +++ b/NOTICE @@ -0,0 +1,17 @@ +Botanu SDK for Python +Copyright 2026 The Botanu Authors + +This product includes software developed at +Botanu, Inc. (https://botanu.ai/). + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +This product includes software from the following open source projects: + +- OpenTelemetry Python (https://github.com/open-telemetry/opentelemetry-python) + Copyright The OpenTelemetry Authors + Licensed under the Apache License, Version 2.0 diff --git a/README.md b/README.md new file mode 100644 index 0000000..0455a84 --- /dev/null +++ b/README.md @@ -0,0 +1,159 @@ +# Botanu SDK for Python + +[![CI](https://github.com/botanu-ai/botanu-sdk-python/actions/workflows/ci.yml/badge.svg)](https://github.com/botanu-ai/botanu-sdk-python/actions/workflows/ci.yml) +[![PyPI version](https://img.shields.io/pypi/v/botanu)](https://pypi.org/project/botanu/) +[![Python](https://img.shields.io/badge/python-3.9%20|%203.10%20|%203.11%20|%203.12%20|%203.13-blue)](https://www.python.org/) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) +[![LF AI & Data](https://img.shields.io/badge/LF%20AI%20%26%20Data-member-blue)](https://lfaidata.foundation/) + +**Run-level cost attribution for AI workflows, built on OpenTelemetry.** + +Botanu adds **runs** on top of distributed tracing. A run represents one business transaction that may span multiple LLM calls, database queries, and microservices. By correlating every operation to a stable `run_id`, you get per-transaction cost attribution without sampling artifacts. + +## How It Works + +``` +User Request + | + v + Entry Service Intermediate Service LLM / DB + @botanu_use_case --> enable() propagates --> auto-instrumented + creates run_id run_id via W3C Baggage spans tagged with run_id +``` + +1. **Entry point** creates a `run_id` with `@botanu_use_case` +2. **Every service** calls `enable()` to propagate the `run_id` via W3C Baggage +3. **All spans** across all services share the same `run_id` +4. **Traces export** to your OTel Collector via OTLP (configured by environment variable) + +## Quick Start + +### Install + +```bash +pip install botanu +``` + +One install. Includes OTel SDK, OTLP exporter, and auto-instrumentation for 50+ libraries. + +### Instrument Your Code + +**Entry service** (where the workflow begins): + +```python +from botanu import enable, botanu_use_case + +enable() # reads config from env vars + +@botanu_use_case(name="Customer Support") +async def handle_ticket(ticket_id: str): + data = await db.query(ticket_id) + result = await llm.complete(data) + return result +``` + +**Every other service** (intermediate, downstream): + +```python +from botanu import enable + +enable() # propagates run_id from incoming request +``` + +That's it. No collector endpoint in code. No manual span creation. + +### Configure via Environment Variables + +All configuration is via environment variables. **Zero hardcoded values in code.** + +| Variable | Description | Default | +|----------|-------------|---------| +| `OTEL_EXPORTER_OTLP_ENDPOINT` | Collector endpoint | `http://localhost:4318` | +| `OTEL_SERVICE_NAME` | Service name | `unknown_service` | +| `BOTANU_ENVIRONMENT` | Deployment environment | `production` | + +```yaml +# docker-compose.yml / Kubernetes deployment +environment: + - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 + - OTEL_SERVICE_NAME=my-service +``` + +See [Configuration Reference](./docs/getting-started/configuration.md) for all options. + +## Auto-Instrumentation + +Everything is included and auto-detected. If the library is in your dependencies, it gets instrumented: + +| Category | Libraries | +|----------|-----------| +| **LLM Providers** | OpenAI, Anthropic, Vertex AI, Google GenAI, LangChain, Ollama, CrewAI | +| **Web Frameworks** | FastAPI, Flask, Django, Starlette, Falcon, Pyramid, Tornado | +| **HTTP Clients** | requests, httpx, urllib3, aiohttp | +| **Databases** | PostgreSQL (psycopg2/3, asyncpg), MySQL, SQLite, MongoDB, Redis, SQLAlchemy, Elasticsearch, Cassandra | +| **Messaging** | Celery, Kafka, RabbitMQ (pika) | +| **AWS** | botocore, boto3 (SQS) | +| **gRPC** | Client + Server | +| **Runtime** | logging, threading, asyncio | + +No manual instrumentation required. Libraries not installed are silently skipped. + +## Kubernetes at Scale + +For large deployments (2000+ services), only entry points need code changes: + +| Service Type | Code Change | Configuration | +|--------------|-------------|---------------| +| Entry point | `@botanu_use_case` decorator | `OTEL_EXPORTER_OTLP_ENDPOINT` env var | +| Intermediate | `enable()` call only | `OTEL_EXPORTER_OTLP_ENDPOINT` env var | + +See [Kubernetes Deployment Guide](./docs/integration/kubernetes.md) for details. + +## Architecture + +``` + +---------+ +---------+ +---------+ + | Service | --> | Service | --> | Service | + | enable()| --> | enable()| --> | enable()| + +---------+ +---------+ +---------+ + | | | + v v v + +-------------------------------------+ + | OTel Collector (OTLP) | + +-------------------------------------+ + | | | + v v v + Jaeger/Tempo Prometheus Your Backend +``` + +The SDK is a thin layer on OpenTelemetry: +- **SDK**: Generates `run_id`, propagates context, auto-instruments +- **Collector**: PII redaction, cardinality limits, routing, vendor enrichment + +## Documentation + +- [Getting Started](./docs/getting-started/) - Installation, quickstart, configuration +- [Concepts](./docs/concepts/) - Runs, context propagation, cost attribution +- [Integration](./docs/integration/) - Auto-instrumentation, Kubernetes, collector setup +- [API Reference](./docs/api/) - `enable()`, `@botanu_use_case`, `emit_outcome()` + +## Requirements + +- Python 3.9+ +- OpenTelemetry Collector (recommended for production) + +## Contributing + +We welcome contributions. See [CONTRIBUTING.md](./CONTRIBUTING.md). + +This project follows the [Developer Certificate of Origin (DCO)](https://developercertificate.org/). Sign off your commits: + +```bash +git commit -s -m "Your commit message" +``` + +## License + +[Apache-2.0](./LICENSE) + +This project is an [LF AI & Data Foundation](https://lfaidata.foundation/) project. diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000..d2454ea --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,199 @@ +# Release Process + +This document describes the release process for Botanu SDK. + +## Versioning + +Botanu SDK follows [Semantic Versioning](https://semver.org/): + +- **MAJOR** (1.0.0): Breaking changes to public API +- **MINOR** (0.2.0): New features, backwards compatible +- **PATCH** (0.1.1): Bug fixes, backwards compatible + +Pre-release versions use suffixes: +- `-alpha.N`: Early development, unstable +- `-beta.N`: Feature complete, testing +- `-rc.N`: Release candidate, final testing + +## Prerequisites + +Before releasing, ensure: + +1. All CI checks pass on `main` branch +2. CHANGELOG.md is updated with release notes +3. Documentation is up to date +4. Test coverage meets threshold (70%+) + +## Release Workflow + +### 1. Prepare the Release + +```bash +# Ensure you're on main with latest changes +git checkout main +git pull origin main + +# Update CHANGELOG.md +# - Move items from [Unreleased] to new version section +# - Add release date +# - Update comparison links at bottom + +# Commit changelog +git add CHANGELOG.md +git commit -s -m "docs: prepare release v0.1.0" +git push origin main +``` + +### 2. Create a Release Tag + +```bash +# For production release +git tag -a v0.1.0 -m "Release v0.1.0" + +# For pre-release +git tag -a v0.1.0-alpha.1 -m "Release v0.1.0-alpha.1" + +# Push tag +git push origin v0.1.0 +``` + +### 3. Automated Publishing + +When a tag is pushed: + +- **Pre-release tags** (`v*-alpha*`, `v*-beta*`, `v*-rc*`) → TestPyPI +- **Release tags** (`v*` without suffix) → PyPI + GitHub Release + +The workflow uses [Trusted Publishing (OIDC)](https://docs.pypi.org/trusted-publishers/) — no API tokens needed. + +### 4. Manual Publishing (if needed) + +You can manually trigger publishing from the Actions tab: + +1. Go to Actions → "Release to PyPI" +2. Click "Run workflow" +3. Select target: `testpypi` or `pypi` +4. Click "Run workflow" + +## TestPyPI Verification + +After publishing to TestPyPI, verify installation: + +```bash +# Create a test environment +python -m venv test-env +source test-env/bin/activate # or test-env\Scripts\activate on Windows + +# Install from TestPyPI +pip install --index-url https://test.pypi.org/simple/ \ + --extra-index-url https://pypi.org/simple/ \ + botanu + +# Verify import +python -c "import botanu; print(botanu.__version__)" + +# Run quick test +python -c " +from botanu import enable, botanu_use_case +enable(service_name='test') +print('Botanu SDK loaded successfully!') +" +``` + +## PyPI Trusted Publishing Setup + +### Initial Setup (One-time) + +1. **Create PyPI project** (if not exists): + - Go to https://pypi.org/manage/projects/ + - Create new project named `botanu` + +2. **Configure Trusted Publisher on PyPI**: + - Go to https://pypi.org/manage/project/botanu/settings/publishing/ + - Add new publisher: + - Owner: `botanu-ai` + - Repository: `botanu-sdk-python` + - Workflow: `release.yml` + - Environment: `pypi` + +3. **Configure Trusted Publisher on TestPyPI**: + - Go to https://test.pypi.org/manage/project/botanu/settings/publishing/ + - Add new publisher with same settings, environment: `testpypi` + +4. **Create GitHub Environments**: + - Go to repo Settings → Environments + - Create `pypi` environment (for production) + - Create `testpypi` environment (for testing) + - Optionally add protection rules (required reviewers, etc.) + +## Local Build Verification + +Before releasing, verify the build locally: + +```bash +# Install build tools +pip install build twine + +# Build the package +python -m build + +# Check the package +twine check dist/* + +# List contents +tar -tvf dist/botanu-*.tar.gz +unzip -l dist/botanu-*.whl + +# Test installation from local wheel +pip install dist/botanu-*.whl +python -c "import botanu; print(botanu.__version__)" +``` + +## Version Determination + +The version is determined by `hatch-vcs` from git tags: + +- Tagged commit: `0.1.0` +- Commits after tag: `0.1.1.dev3+g1234567` +- No tags: `0.0.0.dev0` + +To see what version will be used: + +```bash +pip install hatch-vcs +python -c "from setuptools_scm import get_version; print(get_version())" +``` + +## Rollback Procedure + +If a release has issues: + +1. **Yank from PyPI** (hides from install, but doesn't delete): + ```bash + # Via web UI: PyPI project → Release history → Yank + # Or via API (requires token) + ``` + +2. **Delete GitHub Release** (if needed): + ```bash + gh release delete v0.1.0 --yes + git push origin --delete v0.1.0 + ``` + +3. **Fix and re-release** with a new patch version (e.g., `v0.1.1`) + +## Release Checklist + +- [ ] All CI checks pass +- [ ] CHANGELOG.md updated +- [ ] Documentation updated +- [ ] Version tag follows semver +- [ ] Tag pushed to origin +- [ ] TestPyPI verification passed (for major releases) +- [ ] PyPI package visible +- [ ] GitHub Release created +- [ ] Announcement posted (if applicable) + +## Maintainers + +See [MAINTAINERS.md](./MAINTAINERS.md) for the list of release maintainers. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..4f89ae0 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,36 @@ +# Security Policy + +## Supported Versions + +| Version | Supported | +| ------- | ------------------ | +| 0.1.x | :white_check_mark: | + +Please upgrade to the latest stable version of Botanu which will have known security issues addressed. + +## Reporting a Vulnerability + +The Botanu team takes security vulnerabilities seriously. We appreciate your efforts to responsibly disclose your findings. + +### How to Report + +To report a security vulnerability, please use one of the following methods: + +1. **GitHub Security Advisories** (Preferred): Use the [Security tab](https://github.com/botanu-ai/botanu-sdk-python/security/advisories/new) to privately report a vulnerability. + +2. **Email**: Contact the [maintainer team](https://github.com/botanu-ai/botanu-sdk-python/blob/main/MAINTAINERS.md) + +Please do **not** post security vulnerabilities to the public issue tracker. + +### What to Include + +- Type of vulnerability +- Full paths of affected source files +- Step-by-step instructions to reproduce the issue +- Impact of the issue and potential attack scenarios + +### Response Timeline + +- **Initial Response**: Within 48 hours +- **Status Update**: Within 7 days +- **Resolution Target**: Within 90 days (depending on complexity) diff --git a/docs/api/configuration.md b/docs/api/configuration.md new file mode 100644 index 0000000..cf417ac --- /dev/null +++ b/docs/api/configuration.md @@ -0,0 +1,417 @@ +# Configuration API Reference + +## BotanuConfig + +Dataclass for SDK configuration. + +```python +from botanu.sdk.config import BotanuConfig +``` + +### Fields + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `service_name` | `str` | `"unknown_service"` | Service name (from `OTEL_SERVICE_NAME`) | +| `service_version` | `str` | `None` | Service version (from `OTEL_SERVICE_VERSION`) | +| `service_namespace` | `str` | `None` | Service namespace (from `OTEL_SERVICE_NAMESPACE`) | +| `deployment_environment` | `str` | `"production"` | Environment (from `OTEL_DEPLOYMENT_ENVIRONMENT` or `BOTANU_ENVIRONMENT`) | +| `auto_detect_resources` | `bool` | `True` | Auto-detect cloud resources | +| `otlp_endpoint` | `str` | `"http://localhost:4318/v1/traces"` | OTLP endpoint | +| `otlp_headers` | `dict` | `None` | Custom headers for OTLP exporter | +| `max_export_batch_size` | `int` | `512` | Max spans per batch | +| `max_queue_size` | `int` | `2048` | Max spans in queue | +| `schedule_delay_millis` | `int` | `5000` | Delay between batch exports | +| `propagation_mode` | `str` | `"lean"` | `"lean"` or `"full"` | +| `auto_instrument_packages` | `list` | `[...]` | Packages to auto-instrument | + +### Constructor + +```python +config = BotanuConfig( + service_name="my-service", + deployment_environment="production", + otlp_endpoint="http://collector:4318/v1/traces", +) +``` + +### Class Methods + +#### from_yaml() + +Load configuration from a YAML file. + +```python +@classmethod +def from_yaml(cls, path: Optional[str] = None) -> BotanuConfig +``` + +**Parameters:** +- `path`: Path to YAML config file + +**Raises:** +- `FileNotFoundError`: If config file doesn't exist +- `ValueError`: If YAML is malformed +- `ImportError`: If PyYAML is not installed + +**Example:** + +```python +config = BotanuConfig.from_yaml("config/botanu.yaml") +``` + +#### from_file_or_env() + +Load config from file if exists, otherwise use environment variables. + +```python +@classmethod +def from_file_or_env(cls, path: Optional[str] = None) -> BotanuConfig +``` + +**Search order:** +1. Explicit `path` argument +2. `BOTANU_CONFIG_FILE` environment variable +3. `./botanu.yaml` +4. `./botanu.yml` +5. `./config/botanu.yaml` +6. `./config/botanu.yml` +7. Falls back to environment-only config + +**Example:** + +```python +# Auto-discovers config file +config = BotanuConfig.from_file_or_env() + +# Explicit path +config = BotanuConfig.from_file_or_env("my-config.yaml") +``` + +### Instance Methods + +#### to_dict() + +Export configuration as dictionary. + +```python +def to_dict(self) -> Dict[str, Any] +``` + +**Example:** + +```python +config = BotanuConfig(service_name="my-service") +print(config.to_dict()) +# { +# "service": {"name": "my-service", ...}, +# "otlp": {"endpoint": "...", ...}, +# ... +# } +``` + +--- + +## YAML Configuration Format + +### Full Schema + +```yaml +service: + name: string # Service name + version: string # Service version + namespace: string # Service namespace + environment: string # Deployment environment + +resource: + auto_detect: boolean # Auto-detect cloud resources + +otlp: + endpoint: string # OTLP endpoint URL + headers: # Custom headers + header-name: value + +export: + batch_size: integer # Max spans per batch + queue_size: integer # Max spans in queue + delay_ms: integer # Delay between exports + +propagation: + mode: string # "lean" or "full" + +auto_instrument_packages: # List of packages to instrument + - package_name +``` + +### Environment Variable Interpolation + +```yaml +service: + name: ${OTEL_SERVICE_NAME:-default-service} + environment: ${ENVIRONMENT} + +otlp: + endpoint: ${COLLECTOR_URL:-http://localhost:4318}/v1/traces + headers: + Authorization: Bearer ${API_TOKEN} +``` + +Syntax: +- `${VAR_NAME}` - Required variable +- `${VAR_NAME:-default}` - Variable with default value + +--- + +## enable() + +Bootstrap function to initialize the SDK. + +```python +from botanu import enable + +enable( + service_name: Optional[str] = None, + otlp_endpoint: Optional[str] = None, + config: Optional[BotanuConfig] = None, + auto_instrument: bool = True, + auto_instrument_packages: Optional[List[str]] = None, + propagation_mode: Optional[str] = None, + **kwargs: Any, +) -> None +``` + +### Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `service_name` | `str` | From env | Service name | +| `otlp_endpoint` | `str` | From env | OTLP endpoint URL | +| `config` | `BotanuConfig` | `None` | Pre-built configuration | +| `auto_instrument` | `bool` | `True` | Enable auto-instrumentation | +| `auto_instrument_packages` | `list` | `None` | Override default packages | +| `propagation_mode` | `str` | `None` | `"lean"` or `"full"` | +| `**kwargs` | `Any` | `{}` | Additional config fields | + +### Behavior + +1. Creates/merges `BotanuConfig` +2. Configures `TracerProvider` with `RunContextEnricher` +3. Sets up OTLP exporter (if SDK extras installed) +4. Enables auto-instrumentation (if requested) +5. Configures W3C Baggage propagation + +### Examples + +#### Minimal + +```python +from botanu import enable + +enable(service_name="my-service") +``` + +#### With Config Object + +```python +from botanu import enable +from botanu.sdk.config import BotanuConfig + +config = BotanuConfig.from_yaml("config/botanu.yaml") +enable(config=config) +``` + +#### Custom Options + +```python +enable( + service_name="my-service", + otlp_endpoint="http://collector:4318/v1/traces", + auto_instrument_packages=["fastapi", "openai_v2"], + propagation_mode="full", +) +``` + +--- + +## disable() + +Disable the SDK and clean up resources. + +```python +from botanu import disable + +disable() -> None +``` + +### Behavior + +1. Flushes pending spans +2. Shuts down span processors +3. Disables instrumentation + +--- + +## is_enabled() + +Check if the SDK is currently enabled. + +```python +from botanu import is_enabled + +is_enabled() -> bool +``` + +### Example + +```python +if not is_enabled(): + enable(service_name="my-service") +``` + +--- + +## Environment Variables + +### OpenTelemetry Standard + +| Variable | Description | Default | +|----------|-------------|---------| +| `OTEL_SERVICE_NAME` | Service name | `"unknown_service"` | +| `OTEL_SERVICE_VERSION` | Service version | None | +| `OTEL_SERVICE_NAMESPACE` | Service namespace | None | +| `OTEL_DEPLOYMENT_ENVIRONMENT` | Deployment environment | `"production"` | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP base endpoint | `"http://localhost:4318"` | +| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | OTLP traces endpoint (full URL) | None | +| `OTEL_EXPORTER_OTLP_HEADERS` | OTLP headers (key=value pairs) | None | + +### Botanu-Specific + +| Variable | Description | Default | +|----------|-------------|---------| +| `BOTANU_ENVIRONMENT` | Fallback for environment | `"production"` | +| `BOTANU_PROPAGATION_MODE` | `"lean"` or `"full"` | `"lean"` | +| `BOTANU_AUTO_DETECT_RESOURCES` | Auto-detect cloud resources | `"true"` | +| `BOTANU_CONFIG_FILE` | Path to YAML config file | None | + +--- + +## RunContext + +Model for run metadata. + +```python +from botanu.models.run_context import RunContext +``` + +### Class Methods + +#### create() + +Create a new run context. + +```python +@classmethod +def create( + cls, + use_case: str, + workflow: Optional[str] = None, + workflow_version: Optional[str] = None, + environment: Optional[str] = None, + tenant_id: Optional[str] = None, + parent_run_id: Optional[str] = None, + deadline_seconds: Optional[float] = None, +) -> RunContext +``` + +#### create_retry() + +Create a retry context from an original run. + +```python +@classmethod +def create_retry(cls, original: RunContext) -> RunContext +``` + +#### from_baggage() + +Reconstruct context from baggage dictionary. + +```python +@classmethod +def from_baggage(cls, baggage: Dict[str, str]) -> Optional[RunContext] +``` + +### Instance Methods + +#### to_baggage_dict() + +Serialize to baggage format. + +```python +def to_baggage_dict(self, lean_mode: bool = True) -> Dict[str, str] +``` + +#### to_span_attributes() + +Serialize to span attributes. + +```python +def to_span_attributes(self) -> Dict[str, Any] +``` + +#### as_current() + +Context manager to set this as the current run. + +```python +def as_current(self) -> ContextManager +``` + +#### complete() + +Mark the run as complete. + +```python +def complete( + self, + status: RunStatus, + error_class: Optional[str] = None, +) -> None +``` + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| `run_id` | `str` | Unique UUIDv7 identifier | +| `root_run_id` | `str` | Root run ID (same as run_id for first attempt) | +| `use_case` | `str` | Business use case name | +| `workflow` | `str` | Workflow/function name | +| `workflow_version` | `str` | Version hash | +| `environment` | `str` | Deployment environment | +| `tenant_id` | `str` | Tenant identifier | +| `parent_run_id` | `str` | Parent run ID | +| `attempt` | `int` | Attempt number | +| `start_time` | `datetime` | Run start time | +| `outcome` | `RunOutcome` | Recorded outcome | + +--- + +## RunStatus + +Enum for run status. + +```python +from botanu.models.run_context import RunStatus + +class RunStatus(Enum): + SUCCESS = "success" + FAILURE = "failure" + PARTIAL = "partial" +``` + +## See Also + +- [Configuration Guide](../getting-started/configuration.md) - Configuration how-to +- [Architecture](../concepts/architecture.md) - SDK design +- [Existing OTel Setup](../integration/existing-otel.md) - Integration patterns diff --git a/docs/api/decorators.md b/docs/api/decorators.md new file mode 100644 index 0000000..36eb768 --- /dev/null +++ b/docs/api/decorators.md @@ -0,0 +1,99 @@ +# Decorators API Reference + +## @botanu_use_case + +The primary decorator for creating runs with automatic context propagation. + +```python +from botanu import botanu_use_case + +@botanu_use_case( + name: str, + workflow: Optional[str] = None, + environment: Optional[str] = None, + tenant_id: Optional[str] = None, +) +``` + +### Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `name` | `str` | Required | Use case name for grouping | +| `workflow` | `str` | Function name | Workflow identifier | +| `environment` | `str` | From env | Deployment environment | +| `tenant_id` | `str` | `None` | Tenant identifier for multi-tenant systems | + +### Example + +```python +from botanu import botanu_use_case + +@botanu_use_case(name="my_workflow") +def my_function(): + data = db.query(...) + result = llm.complete(...) + return result +``` + +### Span Attributes + +| Attribute | Description | +|-----------|-------------| +| `botanu.run_id` | Generated UUIDv7 | +| `botanu.use_case` | `name` parameter | +| `botanu.workflow` | `workflow` parameter or function name | +| `botanu.environment` | Deployment environment | +| `botanu.tenant_id` | Tenant identifier (if provided) | + +### Alias + +`use_case` is an alias for `botanu_use_case`: + +```python +from botanu import use_case + +@use_case(name="my_workflow") +def my_function(): + return db.query(...) +``` + +## @botanu_outcome + +Decorator for sub-functions to emit outcomes based on success/failure. + +```python +from botanu import botanu_outcome + +@botanu_outcome() +def extract_data(): + return fetch_from_source() +``` + +- Emits "success" on completion +- Emits "failed" with exception class name if exception raised +- Does NOT create a new run + +### Example + +```python +from botanu import botanu_use_case, botanu_outcome + +@botanu_use_case(name="my_workflow") +def my_function(): + step_one() + step_two() + +@botanu_outcome() +def step_one(): + return do_work() + +@botanu_outcome() +def step_two(): + return do_more_work() +``` + +## See Also + +- [Quickstart](../getting-started/quickstart.md) +- [Run Context](../concepts/run-context.md) diff --git a/docs/api/tracking.md b/docs/api/tracking.md new file mode 100644 index 0000000..dcd35f7 --- /dev/null +++ b/docs/api/tracking.md @@ -0,0 +1,511 @@ +# Tracking API Reference + +## LLM Tracking + +### track_llm_call() + +Context manager for tracking LLM/model calls. + +```python +from botanu.tracking.llm import track_llm_call + +with track_llm_call( + provider: str, + model: str, + operation: str = ModelOperation.CHAT, + client_request_id: Optional[str] = None, + **kwargs: Any, +) -> Generator[LLMTracker, None, None]: +``` + +#### Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `provider` | `str` | Required | LLM provider (openai, anthropic, etc.) | +| `model` | `str` | Required | Model name/ID (gpt-4, claude-3-opus, etc.) | +| `operation` | `str` | `"chat"` | Operation type (see ModelOperation) | +| `client_request_id` | `str` | `None` | Your tracking ID | +| `**kwargs` | `Any` | `{}` | Additional span attributes | + +#### Returns + +Yields an `LLMTracker` instance. + +#### Example + +```python +with track_llm_call(provider="openai", model="gpt-4") as tracker: + response = await client.chat.completions.create(...) + tracker.set_tokens( + input_tokens=response.usage.prompt_tokens, + output_tokens=response.usage.completion_tokens, + ) + tracker.set_request_id(response.id) +``` + +--- + +### LLMTracker + +Tracker object for recording LLM call details. + +#### Methods + +##### set_tokens() + +```python +def set_tokens( + input_tokens: int = 0, + output_tokens: int = 0, + cached_tokens: int = 0, + cache_read_tokens: int = 0, + cache_write_tokens: int = 0, +) -> LLMTracker +``` + +Records token usage. + +##### set_request_id() + +```python +def set_request_id( + provider_request_id: Optional[str] = None, + client_request_id: Optional[str] = None, +) -> LLMTracker +``` + +Records request IDs for billing reconciliation. + +##### set_response_model() + +```python +def set_response_model(model: str) -> LLMTracker +``` + +Records the actual model used in response. + +##### set_finish_reason() + +```python +def set_finish_reason(reason: str) -> LLMTracker +``` + +Records the stop reason (stop, length, content_filter, etc.). + +##### set_streaming() + +```python +def set_streaming(is_streaming: bool = True) -> LLMTracker +``` + +Marks request as streaming. + +##### set_cache_hit() + +```python +def set_cache_hit(cache_hit: bool = True) -> LLMTracker +``` + +Marks as a cache hit. + +##### set_attempt() + +```python +def set_attempt(attempt_number: int) -> LLMTracker +``` + +Sets retry attempt number. + +##### set_request_params() + +```python +def set_request_params( + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_tokens: Optional[int] = None, + stop_sequences: Optional[List[str]] = None, + frequency_penalty: Optional[float] = None, + presence_penalty: Optional[float] = None, +) -> LLMTracker +``` + +Records request parameters. + +##### set_error() + +```python +def set_error(error: Exception) -> LLMTracker +``` + +Records an error. + +##### add_metadata() + +```python +def add_metadata(**kwargs: Any) -> LLMTracker +``` + +Adds custom span attributes. + +--- + +### track_tool_call() + +Context manager for tracking tool/function calls. + +```python +from botanu.tracking.llm import track_tool_call + +with track_tool_call( + tool_name: str, + tool_call_id: Optional[str] = None, + provider: Optional[str] = None, + **kwargs: Any, +) -> Generator[ToolTracker, None, None]: +``` + +#### Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `tool_name` | `str` | Required | Name of the tool/function | +| `tool_call_id` | `str` | `None` | Tool call ID from LLM response | +| `provider` | `str` | `None` | Tool provider if external | + +--- + +### ModelOperation + +Constants for operation types. + +| Constant | Value | +|----------|-------| +| `CHAT` | `"chat"` | +| `TEXT_COMPLETION` | `"text_completion"` | +| `EMBEDDINGS` | `"embeddings"` | +| `GENERATE_CONTENT` | `"generate_content"` | +| `EXECUTE_TOOL` | `"execute_tool"` | +| `CREATE_AGENT` | `"create_agent"` | +| `INVOKE_AGENT` | `"invoke_agent"` | +| `RERANK` | `"rerank"` | +| `IMAGE_GENERATION` | `"image_generation"` | +| `SPEECH_TO_TEXT` | `"speech_to_text"` | +| `TEXT_TO_SPEECH` | `"text_to_speech"` | + +--- + +## Data Tracking + +### track_db_operation() + +Context manager for tracking database operations. + +```python +from botanu.tracking.data import track_db_operation + +with track_db_operation( + system: str, + operation: str, + database: Optional[str] = None, + **kwargs: Any, +) -> Generator[DBTracker, None, None]: +``` + +#### Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `system` | `str` | Required | Database system (postgresql, mongodb, etc.) | +| `operation` | `str` | Required | Operation type (SELECT, INSERT, etc.) | +| `database` | `str` | `None` | Database name | + +#### Example + +```python +with track_db_operation(system="postgresql", operation="SELECT") as db: + result = await cursor.execute(query) + db.set_result(rows_returned=len(result)) +``` + +--- + +### DBTracker + +#### Methods + +##### set_result() + +```python +def set_result( + rows_returned: int = 0, + rows_affected: int = 0, + bytes_read: int = 0, + bytes_written: int = 0, +) -> DBTracker +``` + +##### set_table() + +```python +def set_table(table_name: str, schema: Optional[str] = None) -> DBTracker +``` + +##### set_query_id() + +```python +def set_query_id(query_id: str) -> DBTracker +``` + +##### set_bytes_scanned() + +```python +def set_bytes_scanned(bytes_scanned: int) -> DBTracker +``` + +##### set_error() + +```python +def set_error(error: Exception) -> DBTracker +``` + +##### add_metadata() + +```python +def add_metadata(**kwargs: Any) -> DBTracker +``` + +--- + +### track_storage_operation() + +Context manager for tracking object storage operations. + +```python +from botanu.tracking.data import track_storage_operation + +with track_storage_operation( + system: str, + operation: str, + **kwargs: Any, +) -> Generator[StorageTracker, None, None]: +``` + +#### Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `system` | `str` | Required | Storage system (s3, gcs, azure_blob, etc.) | +| `operation` | `str` | Required | Operation type (GET, PUT, DELETE, etc.) | + +--- + +### StorageTracker + +#### Methods + +##### set_result() + +```python +def set_result( + objects_count: int = 0, + bytes_read: int = 0, + bytes_written: int = 0, +) -> StorageTracker +``` + +##### set_bucket() + +```python +def set_bucket(bucket: str) -> StorageTracker +``` + +##### set_error() + +```python +def set_error(error: Exception) -> StorageTracker +``` + +##### add_metadata() + +```python +def add_metadata(**kwargs: Any) -> StorageTracker +``` + +--- + +### track_messaging_operation() + +Context manager for tracking messaging operations. + +```python +from botanu.tracking.data import track_messaging_operation + +with track_messaging_operation( + system: str, + operation: str, + destination: str, + **kwargs: Any, +) -> Generator[MessagingTracker, None, None]: +``` + +#### Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `system` | `str` | Required | Messaging system (sqs, kafka, pubsub, etc.) | +| `operation` | `str` | Required | Operation type (publish, consume, etc.) | +| `destination` | `str` | Required | Queue/topic name | + +--- + +### MessagingTracker + +#### Methods + +##### set_result() + +```python +def set_result( + message_count: int = 0, + bytes_transferred: int = 0, +) -> MessagingTracker +``` + +##### set_error() + +```python +def set_error(error: Exception) -> MessagingTracker +``` + +##### add_metadata() + +```python +def add_metadata(**kwargs: Any) -> MessagingTracker +``` + +--- + +## Span Helpers + +### emit_outcome() + +Emit a business outcome for the current span. + +```python +from botanu import emit_outcome + +emit_outcome( + status: str, + *, + value_type: Optional[str] = None, + value_amount: Optional[float] = None, + confidence: Optional[float] = None, + reason: Optional[str] = None, +) -> None +``` + +#### Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `status` | `str` | Required | Outcome status ("success", "partial", "failed") | +| `value_type` | `str` | `None` | Type of business value achieved | +| `value_amount` | `float` | `None` | Quantified value amount | +| `confidence` | `float` | `None` | Confidence score (0.0-1.0) | +| `reason` | `str` | `None` | Reason for the outcome | + +#### Example + +```python +emit_outcome("success", value_type="tickets_resolved", value_amount=1) +emit_outcome("failed", reason="rate_limit_exceeded") +``` + +--- + +### set_business_context() + +Set business context attributes on the current span. + +```python +from botanu import set_business_context + +set_business_context( + *, + customer_id: Optional[str] = None, + team: Optional[str] = None, + cost_center: Optional[str] = None, + region: Optional[str] = None, +) -> None +``` + +#### Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `customer_id` | `str` | `None` | Customer identifier | +| `team` | `str` | `None` | Team or department | +| `cost_center` | `str` | `None` | Cost center for financial tracking | +| `region` | `str` | `None` | Geographic region | + +--- + +## Context Helpers + +### get_run_id() + +Get the current run ID from baggage. + +```python +from botanu import get_run_id + +run_id = get_run_id() +``` + +### get_use_case() + +Get the current use case from baggage. + +```python +from botanu import get_use_case + +use_case = get_use_case() +``` + +### get_baggage() + +Get a baggage value by key. + +```python +from botanu import get_baggage + +value = get_baggage("botanu.tenant_id") +``` + +### set_baggage() + +Set a baggage value. + +```python +from botanu import set_baggage + +set_baggage("botanu.custom_field", "my_value") +``` + +### get_current_span() + +Get the current active span. + +```python +from botanu import get_current_span + +span = get_current_span() +span.set_attribute("custom.attribute", "value") +``` + +## See Also + +- [LLM Tracking](../tracking/llm-tracking.md) - Detailed LLM tracking guide +- [Data Tracking](../tracking/data-tracking.md) - Data operation tracking +- [Outcomes](../tracking/outcomes.md) - Outcome recording diff --git a/docs/concepts/architecture.md b/docs/concepts/architecture.md new file mode 100644 index 0000000..2d87ccb --- /dev/null +++ b/docs/concepts/architecture.md @@ -0,0 +1,265 @@ +# Architecture + +Botanu SDK follows a "thin SDK, smart collector" architecture. The SDK does minimal work in your application's hot path, delegating heavy processing to the OpenTelemetry Collector. + +## Design Principles + +### 1. Minimal Hot-Path Overhead + +The SDK only performs lightweight operations during request processing: +- Generate UUIDv7 `run_id` +- Read/write W3C Baggage +- Record token counts as span attributes + +**Target overhead**: < 0.5ms per request + +### 2. OTel-Native + +Built on OpenTelemetry primitives, not alongside them: +- Uses standard `TracerProvider` +- Standard `SpanProcessor` for enrichment +- Standard OTLP export +- W3C Baggage for propagation + +### 3. Collector-Side Processing + +Heavy operations happen in the OTel Collector: +- PII redaction +- Cost calculation from token counts +- Vendor normalization +- Cardinality management +- Aggregation and sampling + +## Component Overview + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Your Application │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ @botanu_use_ │ │ track_llm_ │ │ track_db_ │ │ +│ │ case() │ │ call() │ │ operation() │ │ +│ └────────┬────────┘ └────────┬────────┘ └────────┬────────┘ │ +│ │ │ │ │ +│ └──────────────────────┼──────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────────────────────────────────────────────────────────────┐ │ +│ │ Botanu SDK Core │ │ +│ ├───────────────────────────────────────────────────────────────────────┤ │ +│ │ RunContext │ RunContextEnricher │ BotanuConfig │ │ +│ │ - generate_run_id() │ - on_start() │ - service_name │ │ +│ │ - to_baggage_dict() │ - reads baggage │ - otlp_endpoint │ │ +│ │ - to_span_attrs() │ - writes to spans │ - propagation_mode │ │ +│ └───────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────────────────────────────────────────────────────────────┐ │ +│ │ OpenTelemetry SDK │ │ +│ │ TracerProvider → BatchSpanProcessor → OTLPSpanExporter │ │ +│ └───────────────────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ + │ + │ OTLP (HTTP or gRPC) + ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ OpenTelemetry Collector │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ receivers: │ +│ otlp: │ +│ │ +│ processors: │ +│ transform: # Normalize vendor names │ +│ redaction: # Remove PII from gen_ai.content.* │ +│ attributes: # Cardinality limits │ +│ botanu/cost: # Calculate $ from tokens │ +│ │ +│ exporters: │ +│ clickhouse: # Or your preferred backend │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## SDK Components + +### BotanuConfig + +Central configuration for the SDK: + +```python +@dataclass +class BotanuConfig: + service_name: str + deployment_environment: str + otlp_endpoint: str + propagation_mode: str # "lean" or "full" + auto_instrument_packages: List[str] +``` + +### RunContext + +Holds run metadata and provides serialization: + +```python +@dataclass +class RunContext: + run_id: str + root_run_id: str + use_case: str + workflow: Optional[str] + attempt: int + # ... +``` + +### RunContextEnricher + +The only span processor in the SDK. Reads baggage, writes to spans: + +```python +class RunContextEnricher(SpanProcessor): + def on_start(self, span, parent_context): + for key in self._baggage_keys: + value = baggage.get_baggage(key, parent_context) + if value: + span.set_attribute(key, value) +``` + +### Tracking Helpers + +Context managers for manual instrumentation: + +- `track_llm_call()` - LLM/model operations +- `track_db_operation()` - Database operations +- `track_storage_operation()` - Object storage operations +- `track_messaging_operation()` - Message queue operations + +## Data Flow + +### 1. Run Initiation + +```python +@botanu_use_case("Customer Support") +def handle_ticket(): + pass +``` + +1. Generate UUIDv7 `run_id` +2. Create `RunContext` +3. Set baggage in current context +4. Start root span with run attributes + +### 2. Context Propagation + +```python +# Within the run +response = requests.get("https://api.example.com") +``` + +1. HTTP instrumentation reads current context +2. Baggage is injected into request headers +3. Downstream service extracts baggage +4. Context continues propagating + +### 3. Span Enrichment + +Every span (including auto-instrumented): + +1. `RunContextEnricher.on_start()` is called +2. Reads `botanu.run_id` from baggage +3. Writes to span attributes +4. Span is exported with run context + +### 4. Export and Processing + +1. `BatchSpanProcessor` batches spans +2. `OTLPSpanExporter` sends to collector +3. Collector processes (cost calc, PII redaction) +4. Spans written to backend + +## Why This Architecture? + +### SDK Stays Thin + +| Operation | Location | Reason | +|-----------|----------|--------| +| run_id generation | SDK | Must be synchronous | +| Baggage propagation | SDK | Process-local | +| Token counting | SDK | Available at call site | +| Cost calculation | Collector | Pricing tables change | +| PII redaction | Collector | Consistent policy | +| Aggregation | Collector | Reduces data volume | + +### No Vendor Lock-in + +- Standard OTel export format +- Any OTel-compatible backend works +- Collector processors are configurable + +### Minimal Dependencies + +Core SDK only requires `opentelemetry-api`: + +```toml +dependencies = [ + "opentelemetry-api >= 1.20.0", +] +``` + +Full SDK adds export capabilities: + +```toml +[project.optional-dependencies] +sdk = [ + "opentelemetry-sdk >= 1.20.0", + "opentelemetry-exporter-otlp-proto-http >= 1.20.0", +] +``` + +## Integration Points + +### Existing TracerProvider + +If you already have OTel configured: + +```python +from opentelemetry import trace +from botanu.processors.enricher import RunContextEnricher + +# Add our processor to your existing provider +provider = trace.get_tracer_provider() +provider.add_span_processor(RunContextEnricher()) +``` + +### Existing Instrumentation + +Botanu works alongside existing instrumentation: + +```python +# Your existing setup +from opentelemetry.instrumentation.requests import RequestsInstrumentor +RequestsInstrumentor().instrument() + +# Add Botanu +from botanu import init_botanu +init_botanu(service_name="my-service") + +# Both work together - requests are instrumented AND get run_id +``` + +## Performance Characteristics + +| Operation | Typical Latency | +|-----------|-----------------| +| `generate_run_id()` | < 0.01ms | +| `RunContextEnricher.on_start()` | < 0.05ms | +| `track_llm_call()` overhead | < 0.1ms | +| Baggage injection | < 0.01ms | + +Total SDK overhead per request: **< 0.5ms** + +## See Also + +- [Run Context](run-context.md) - RunContext model details +- [Context Propagation](context-propagation.md) - How context flows +- [Collector Configuration](../integration/collector.md) - Collector setup diff --git a/docs/concepts/context-propagation.md b/docs/concepts/context-propagation.md new file mode 100644 index 0000000..80bf319 --- /dev/null +++ b/docs/concepts/context-propagation.md @@ -0,0 +1,239 @@ +# Context Propagation + +Context propagation ensures that the `run_id` and other metadata flow through your entire application — across function calls, HTTP requests, message queues, and async workers. + +## How It Works + +Botanu uses **W3C Baggage** for context propagation, the same standard used by OpenTelemetry for distributed tracing. + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ HTTP Request Headers │ +├─────────────────────────────────────────────────────────────────┤ +│ traceparent: 00-{trace_id}-{span_id}-01 │ +│ baggage: botanu.run_id=019abc12...,botanu.use_case=Support │ +└─────────────────────────────────────────────────────────────────┘ +``` + +When you make an outbound HTTP request, the `botanu.run_id` travels in the `baggage` header alongside the trace context. + +## Propagation Modes + +### Lean Mode (Default) + +Only propagates essential fields to minimize header size: +- `botanu.run_id` +- `botanu.use_case` + +```python +# Lean mode baggage (~100 bytes) +baggage: botanu.run_id=019abc12-def3-7890-abcd-1234567890ab,botanu.use_case=Customer%20Support +``` + +### Full Mode + +Propagates all context fields: +- `botanu.run_id` +- `botanu.use_case` +- `botanu.workflow` +- `botanu.environment` +- `botanu.tenant_id` +- `botanu.parent_run_id` + +```python +# Enable full mode +import os +os.environ["BOTANU_PROPAGATION_MODE"] = "full" +``` + +## In-Process Propagation + +Within a single process, context is propagated via Python's `contextvars`: + +```python +from botanu import botanu_use_case + +@botanu_use_case("Customer Support") +def handle_ticket(ticket_id: str): + # Context is set here + + fetch_context(ticket_id) # Inherits context + call_llm() # Inherits context + save_result() # Inherits context +``` + +The `RunContextEnricher` span processor automatically reads baggage and writes to span attributes: + +```python +class RunContextEnricher(SpanProcessor): + def on_start(self, span, parent_context): + for key in ["botanu.run_id", "botanu.use_case"]: + value = baggage.get_baggage(key, parent_context) + if value: + span.set_attribute(key, value) +``` + +This ensures **every span** — including auto-instrumented ones — gets the `run_id`. + +## HTTP Propagation + +### Outbound Requests + +When using instrumented HTTP clients (`requests`, `httpx`, `urllib3`), baggage is automatically propagated: + +```python +import requests + +@botanu_use_case("Fetch Data") +def fetch_data(): + # Baggage is automatically added to headers + response = requests.get("https://api.example.com/data") +``` + +### Inbound Requests (Frameworks) + +For web frameworks (`FastAPI`, `Flask`, `Django`), use the middleware to extract context: + +```python +# FastAPI +from botanu.sdk.middleware import BotanuMiddleware + +app = FastAPI() +app.add_middleware(BotanuMiddleware) + +@app.post("/tickets") +def create_ticket(request: Request): + # RunContext is extracted from incoming baggage + # or created if not present + pass +``` + +## Message Queue Propagation + +For async messaging systems, you need to manually inject and extract context. + +### Injecting Context (Producer) + +```python +from botanu.sdk.context import get_current_run_context + +def publish_message(queue, payload): + ctx = get_current_run_context() + + message = { + "payload": payload, + "metadata": { + "baggage": ctx.to_baggage_dict() if ctx else {} + } + } + queue.publish(message) +``` + +### Extracting Context (Consumer) + +```python +from botanu.models.run_context import RunContext + +def process_message(message): + baggage = message.get("metadata", {}).get("baggage", {}) + ctx = RunContext.from_baggage(baggage) + + if ctx: + # Continue with existing context + with ctx.as_current(): + handle_message(message["payload"]) + else: + # Create new context + with RunContext.create(use_case="Message Processing").as_current(): + handle_message(message["payload"]) +``` + +## Cross-Service Propagation + +``` +┌──────────────┐ HTTP ┌──────────────┐ Kafka ┌──────────────┐ +│ Service A │ ────────────► │ Service B │ ────────────► │ Service C │ +│ │ baggage: │ │ message │ │ +│ run_id=X │ run_id=X │ run_id=X │ run_id=X │ run_id=X │ +└──────────────┘ └──────────────┘ └──────────────┘ +``` + +The same `run_id` flows through all services, enabling: +- End-to-end cost attribution +- Cross-service trace correlation +- Distributed debugging + +## Baggage Size Limits + +W3C Baggage has practical size limits. The SDK uses lean mode by default to stay well under these limits: + +| Mode | Typical Size | Recommendation | +|------|--------------|----------------| +| Lean | ~100 bytes | Use for most cases | +| Full | ~300 bytes | Use when you need all context downstream | + +## Propagation and Auto-Instrumentation + +The SDK works seamlessly with OTel auto-instrumentation: + +```python +from botanu import init_botanu + +init_botanu( + service_name="my-service", + auto_instrument=True, # Enable auto-instrumentation +) +``` + +Auto-instrumented libraries will automatically propagate baggage: +- `requests`, `httpx`, `urllib3` (HTTP clients) +- `fastapi`, `flask`, `django` (Web frameworks) +- `celery` (Task queues) +- `grpc` (gRPC) + +## Debugging Propagation + +### Check Current Context + +```python +from botanu.sdk.context import get_baggage, get_run_id + +run_id = get_run_id() +print(f"Current run_id: {run_id}") + +use_case = get_baggage("botanu.use_case") +print(f"Current use_case: {use_case}") +``` + +### Verify Header Propagation + +```python +# In your HTTP client +import httpx + +def debug_request(): + with httpx.Client() as client: + response = client.get( + "https://httpbin.org/headers", + ) + print(response.json()) + # Check for 'baggage' header in response +``` + +## Common Issues + +### Context Not Propagating + +1. **Missing initialization**: Ensure `init_botanu()` is called at startup +2. **Missing middleware**: Add `BotanuMiddleware` to your web framework +3. **Async context loss**: Use `contextvars`-aware async patterns + +### Duplicate run_ids + +1. **Multiple decorators**: Only use `@botanu_use_case` at the entry point +2. **Middleware + decorator**: Choose one, not both + +## See Also + +- [Run Context](run-context.md) - Understanding the RunContext model +- [Architecture](architecture.md) - Overall SDK architecture diff --git a/docs/concepts/run-context.md b/docs/concepts/run-context.md new file mode 100644 index 0000000..436be03 --- /dev/null +++ b/docs/concepts/run-context.md @@ -0,0 +1,188 @@ +# Run Context + +The Run Context is the core concept in Botanu SDK. It represents a single business transaction or workflow execution that you want to track for cost attribution. + +## What is a Run? + +A **run** is a logical unit of work that produces a business outcome. Examples: + +- Resolving a customer support ticket +- Processing a document +- Generating a report +- Handling a chatbot conversation + +A single run may involve: +- Multiple LLM calls (possibly to different providers) +- Database queries +- Storage operations +- External API calls +- Message queue operations + +## The run_id + +Every run is identified by a unique `run_id` — a UUIDv7 that is: + +- **Time-sortable**: IDs generated later sort after earlier ones +- **Globally unique**: No collisions across services +- **Propagated automatically**: Flows through your entire application via W3C Baggage + +```python +from botanu.models.run_context import generate_run_id + +run_id = generate_run_id() +# "019abc12-def3-7890-abcd-1234567890ab" +``` + +## RunContext Model + +The `RunContext` dataclass holds all metadata for a run: + +```python +from botanu.models.run_context import RunContext + +ctx = RunContext.create( + use_case="Customer Support", + workflow="handle_ticket", + environment="production", + tenant_id="tenant-123", +) + +print(ctx.run_id) # "019abc12-def3-7890-..." +print(ctx.root_run_id) # Same as run_id for top-level runs +print(ctx.attempt) # 1 (first attempt) +``` + +### Key Fields + +| Field | Description | +|-------|-------------| +| `run_id` | Unique identifier for this run (UUIDv7) | +| `root_run_id` | ID of the original run (for retries, same as `run_id` for first attempt) | +| `use_case` | Business use case name (e.g., "Customer Support") | +| `workflow` | Optional workflow/function name | +| `environment` | Deployment environment (production, staging, etc.) | +| `attempt` | Attempt number (1 for first, 2+ for retries) | +| `tenant_id` | Optional tenant identifier for multi-tenant systems | + +## Creating Runs + +### Using the Decorator (Recommended) + +```python +from botanu import botanu_use_case + +@botanu_use_case("Customer Support") +def handle_ticket(ticket_id: str): + # RunContext is automatically created and propagated + # All operations inside inherit the same run_id + pass +``` + +### Manual Creation + +```python +from botanu.models.run_context import RunContext + +ctx = RunContext.create( + use_case="Document Processing", + workflow="extract_entities", + tenant_id="acme-corp", +) + +# Use ctx.to_baggage_dict() to propagate via HTTP headers +# Use ctx.to_span_attributes() to add to spans +``` + +## Retry Handling + +When a run fails and is retried, use `create_retry()` to maintain lineage: + +```python +original = RunContext.create(use_case="Process Order") + +# First attempt fails... + +retry = RunContext.create_retry(original) +print(retry.attempt) # 2 +print(retry.retry_of_run_id) # Original run_id +print(retry.root_run_id) # Same as original.run_id +print(retry.run_id) # New unique ID +``` + +This enables: +- Tracking total attempts for a business operation +- Correlating retries back to the original request +- Calculating aggregate cost across all attempts + +## Deadlines and Cancellation + +RunContext supports deadline and cancellation tracking: + +```python +ctx = RunContext.create( + use_case="Long Running Task", + deadline_seconds=30.0, # 30 second deadline +) + +# Check deadline +if ctx.is_past_deadline(): + raise TimeoutError("Deadline exceeded") + +# Check remaining time +remaining = ctx.remaining_time_seconds() + +# Request cancellation +ctx.request_cancellation(reason="user") +if ctx.is_cancelled(): + # Clean up and exit + pass +``` + +## Serialization + +### To Baggage (for HTTP propagation) + +```python +# Lean mode (default): only run_id and use_case +baggage = ctx.to_baggage_dict() +# {"botanu.run_id": "...", "botanu.use_case": "..."} + +# Full mode: all fields +baggage = ctx.to_baggage_dict(lean_mode=False) +# Includes workflow, environment, tenant_id, etc. +``` + +### To Span Attributes + +```python +attrs = ctx.to_span_attributes() +# {"botanu.run_id": "...", "botanu.use_case": "...", ...} +``` + +### From Baggage (receiving side) + +```python +ctx = RunContext.from_baggage(baggage_dict) +if ctx is None: + # Required fields missing, create new context + ctx = RunContext.create(use_case="Unknown") +``` + +## Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `BOTANU_ENVIRONMENT` | Default environment | `"production"` | +| `BOTANU_PROPAGATION_MODE` | `"lean"` or `"full"` | `"lean"` | + +## Best Practices + +1. **One run per business outcome**: Don't create runs for internal operations +2. **Use descriptive use_case names**: They appear in dashboards and queries +3. **Leverage tenant_id**: Essential for multi-tenant cost attribution +4. **Handle retries properly**: Always use `create_retry()` for retry attempts + +## See Also + +- [Context Propagation](context-propagation.md) - How context flows through your application +- [Outcomes](../tracking/outcomes.md) - Recording business outcomes diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md new file mode 100644 index 0000000..48c8c1d --- /dev/null +++ b/docs/getting-started/configuration.md @@ -0,0 +1,271 @@ +# Configuration + +Botanu SDK can be configured through code, environment variables, or YAML files. + +## Configuration Precedence + +1. **Code arguments** (explicit values passed to `BotanuConfig`) +2. **Environment variables** (`BOTANU_*`, `OTEL_*`) +3. **YAML config file** (`botanu.yaml` or specified path) +4. **Built-in defaults** + +## Quick Configuration + +### Code-Based + +```python +from botanu import enable + +enable( + service_name="my-service", + otlp_endpoint="http://collector:4318/v1/traces", +) +``` + +### Environment Variables + +```bash +export OTEL_SERVICE_NAME=my-service +export OTEL_EXPORTER_OTLP_ENDPOINT=http://collector:4318 +export BOTANU_ENVIRONMENT=production +``` + +### YAML File + +```yaml +# botanu.yaml +service: + name: my-service + version: 1.0.0 + environment: production + +otlp: + endpoint: http://collector:4318/v1/traces + +propagation: + mode: lean +``` + +Load with: + +```python +from botanu.sdk.config import BotanuConfig + +config = BotanuConfig.from_yaml("botanu.yaml") +``` + +## Full Configuration Reference + +### BotanuConfig Fields + +```python +from dataclasses import dataclass + +@dataclass +class BotanuConfig: + # Service identification + service_name: str = None # OTEL_SERVICE_NAME + service_version: str = None # OTEL_SERVICE_VERSION + service_namespace: str = None # OTEL_SERVICE_NAMESPACE + deployment_environment: str = None # OTEL_DEPLOYMENT_ENVIRONMENT + + # Resource detection + auto_detect_resources: bool = True # BOTANU_AUTO_DETECT_RESOURCES + + # OTLP exporter + otlp_endpoint: str = None # OTEL_EXPORTER_OTLP_ENDPOINT + otlp_headers: dict = None # Custom headers for auth + + # Span export + max_export_batch_size: int = 512 + max_queue_size: int = 2048 + schedule_delay_millis: int = 5000 + + # Propagation mode + propagation_mode: str = "lean" # BOTANU_PROPAGATION_MODE + + # Auto-instrumentation + auto_instrument_packages: list = [...] +``` + +## Environment Variables + +### OpenTelemetry Standard Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `OTEL_SERVICE_NAME` | Service name | `unknown_service` | +| `OTEL_SERVICE_VERSION` | Service version | None | +| `OTEL_SERVICE_NAMESPACE` | Service namespace | None | +| `OTEL_DEPLOYMENT_ENVIRONMENT` | Environment name | `production` | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | OTLP collector base URL | `http://localhost:4318` | +| `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT` | OTLP traces endpoint (full URL) | None | + +### Botanu-Specific Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `BOTANU_ENVIRONMENT` | Fallback for environment | `production` | +| `BOTANU_PROPAGATION_MODE` | `lean` or `full` | `lean` | +| `BOTANU_AUTO_DETECT_RESOURCES` | Auto-detect cloud resources | `true` | +| `BOTANU_CONFIG_FILE` | Path to YAML config | None | + +## YAML Configuration + +### Full Example + +```yaml +# botanu.yaml - Full configuration example +service: + name: ${OTEL_SERVICE_NAME:-my-service} + version: ${APP_VERSION:-1.0.0} + namespace: production + environment: ${ENVIRONMENT:-production} + +resource: + auto_detect: true + +otlp: + endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT:-http://localhost:4318}/v1/traces + headers: + Authorization: Bearer ${OTLP_AUTH_TOKEN} + +export: + batch_size: 512 + queue_size: 2048 + delay_ms: 5000 + +propagation: + mode: lean + +auto_instrument_packages: + - requests + - httpx + - fastapi + - sqlalchemy + - openai_v2 +``` + +### Environment Variable Interpolation + +The YAML loader supports two interpolation patterns: + +```yaml +# Simple interpolation +endpoint: ${COLLECTOR_URL} + +# With default value +endpoint: ${COLLECTOR_URL:-http://localhost:4318} +``` + +### Loading Configuration + +```python +from botanu.sdk.config import BotanuConfig + +# Explicit path +config = BotanuConfig.from_yaml("config/botanu.yaml") + +# Auto-discover (searches botanu.yaml, config/botanu.yaml) +config = BotanuConfig.from_file_or_env() + +# Environment only +config = BotanuConfig() +``` + +## Propagation Modes + +### Lean Mode (Default) + +Propagates only essential fields to minimize header size: + +- `botanu.run_id` +- `botanu.use_case` + +Best for high-traffic systems where header size matters. + +### Full Mode + +Propagates all context fields: + +- `botanu.run_id` +- `botanu.use_case` +- `botanu.workflow` +- `botanu.environment` +- `botanu.tenant_id` +- `botanu.parent_run_id` + +Enable with: + +```bash +export BOTANU_PROPAGATION_MODE=full +``` + +Or: + +```python +enable(service_name="my-service", propagation_mode="full") +``` + +## Auto-Instrumentation + +### Default Packages + +By default, Botanu enables instrumentation for: + +```python +[ + # HTTP clients + "requests", "httpx", "urllib3", "aiohttp_client", + # Web frameworks + "fastapi", "flask", "django", "starlette", + # Databases + "sqlalchemy", "psycopg2", "asyncpg", "pymongo", "redis", + # Messaging + "celery", "kafka_python", + # gRPC + "grpc", + # GenAI + "openai_v2", "anthropic", "vertexai", "google_genai", "langchain", + # Runtime + "logging", +] +``` + +### Customizing Packages + +```python +from botanu import enable + +enable( + service_name="my-service", + auto_instrument_packages=["requests", "fastapi", "openai_v2"], +) +``` + +### Disabling Auto-Instrumentation + +```python +enable( + service_name="my-service", + auto_instrument_packages=[], # Empty list disables +) +``` + +## Exporting Configuration + +```python +config = BotanuConfig( + service_name="my-service", + deployment_environment="production", +) + +# Export as dictionary +print(config.to_dict()) +``` + +## See Also + +- [Architecture](../concepts/architecture.md) - SDK design principles +- [Collector Configuration](../integration/collector.md) - Collector setup +- [Existing OTel Setup](../integration/existing-otel.md) - Integration with existing OTel diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md new file mode 100644 index 0000000..3591b72 --- /dev/null +++ b/docs/getting-started/installation.md @@ -0,0 +1,80 @@ +# Installation + +## Requirements + +- Python 3.9 or later +- OpenTelemetry Collector (recommended for production) + +## Install + +```bash +pip install botanu +``` + +One install gives you everything: + +- **OTel SDK** + OTLP HTTP exporter +- **Auto-instrumentation** for 50+ libraries (HTTP, databases, messaging, GenAI, AWS, gRPC) + +Instrumentation packages are lightweight shims that silently no-op when the target library is not installed. Zero bloat. + +## Verify + +```python +import botanu +print(botanu.__version__) +``` + +## Package Managers + +### pip / requirements.txt + +```text +botanu>=0.1.0 +``` + +### Poetry + +```toml +[tool.poetry.dependencies] +botanu = "^0.1.0" +``` + +### Docker + +```dockerfile +FROM python:3.12-slim +WORKDIR /app +RUN pip install botanu +COPY . . +CMD ["python", "app.py"] +``` + +## Development + +For running tests and linting: + +```bash +pip install "botanu[dev]" +``` + +## Collector Setup + +The SDK sends traces to an OpenTelemetry Collector via OTLP HTTP (port 4318). Configure the endpoint via environment variable: + +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +``` + +Quick start with Docker: + +```bash +docker run -p 4318:4318 otel/opentelemetry-collector:latest +``` + +See [Collector Configuration](../integration/collector.md) for production setup. + +## Next Steps + +- [Quickstart](quickstart.md) - Your first instrumented application +- [Configuration](configuration.md) - Environment variables and YAML config diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md new file mode 100644 index 0000000..3acbf97 --- /dev/null +++ b/docs/getting-started/quickstart.md @@ -0,0 +1,98 @@ +# Quickstart + +Get run-level cost attribution working in 5 minutes. + +## Prerequisites + +- Python 3.9+ +- OpenTelemetry Collector running (see [Collector Configuration](../integration/collector.md)) + +## Step 1: Install + +```bash +pip install botanu +``` + +## Step 2: Set Environment Variables + +```bash +export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +export OTEL_SERVICE_NAME=my-service +``` + +Or in Docker / Kubernetes: + +```yaml +environment: + - OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318 + - OTEL_SERVICE_NAME=my-service +``` + +## Step 3: Enable SDK + +```python +from botanu import enable + +enable() +``` + +Call `enable()` once at application startup. It reads configuration from environment variables — no hardcoded values needed. + +## Step 4: Define Entry Point + +```python +from botanu import botanu_use_case + +@botanu_use_case(name="Customer Support") +async def handle_ticket(ticket_id: str): + data = await db.query(ticket_id) + result = await llm.complete(data) + return result +``` + +All LLM calls, database queries, and HTTP requests inside the function are automatically tracked with the same `run_id`. + +## Complete Example + +**Entry service** (`entry/app.py`): + +```python +from botanu import enable, botanu_use_case + +enable() + +@botanu_use_case(name="Customer Support") +async def handle_ticket(ticket_id: str): + data = await db.query(ticket_id) + result = await openai.chat.completions.create( + model="gpt-4", + messages=[{"role": "user", "content": data}] + ) + return result +``` + +**Downstream service** (`intermediate/app.py`): + +```python +from botanu import enable + +enable() # propagates run_id from incoming request — no decorator needed +``` + +## What Gets Tracked + +| Attribute | Example | Description | +|-----------|---------|-------------| +| `botanu.run_id` | `019abc12-...` | Unique run identifier (UUIDv7) | +| `botanu.use_case` | `Customer Support` | Business use case | +| `gen_ai.usage.input_tokens` | `150` | LLM input tokens | +| `gen_ai.usage.output_tokens` | `200` | LLM output tokens | +| `db.system` | `postgresql` | Database system | + +All spans across all services share the same `run_id`, enabling cost-per-transaction analytics. + +## Next Steps + +- [Configuration](configuration.md) - Environment variables and YAML config +- [Kubernetes Deployment](../integration/kubernetes.md) - Zero-code instrumentation at scale +- [Context Propagation](../concepts/context-propagation.md) - How run_id flows across services diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..1f77d25 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,65 @@ +# Botanu SDK Documentation + +Botanu SDK provides OpenTelemetry-native run-level cost attribution for AI workflows. + +## Overview + +Traditional observability tools trace individual requests. But AI workflows are different — a single business outcome (resolving a support ticket, processing an order) might span multiple LLM calls, retries, tool executions, and data operations across different vendors. + +Botanu introduces **run-level attribution**: a unique `run_id` that follows your entire workflow, enabling you to answer "How much did this outcome cost?" + +## Documentation + +### Getting Started + +- [Installation](getting-started/installation.md) - Install and configure the SDK +- [Quick Start](getting-started/quickstart.md) - Get up and running in 5 minutes +- [Configuration](getting-started/configuration.md) - Configuration options and environment variables + +### Core Concepts + +- [Run Context](concepts/run-context.md) - Understanding `run_id` and context propagation +- [Context Propagation](concepts/context-propagation.md) - How context flows through your application +- [Architecture](concepts/architecture.md) - SDK design and component overview + +### Tracking + +- [LLM Tracking](tracking/llm-tracking.md) - Track AI model calls and token usage +- [Data Tracking](tracking/data-tracking.md) - Track database, storage, and messaging operations +- [Outcomes](tracking/outcomes.md) - Record business outcomes for ROI calculation + +### Integration + +- [Auto-Instrumentation](integration/auto-instrumentation.md) - Automatic instrumentation for common libraries +- [Kubernetes Deployment](integration/kubernetes.md) - Zero-code instrumentation at scale +- [Existing OTel Setup](integration/existing-otel.md) - Integrate with existing OpenTelemetry deployments +- [Collector Configuration](integration/collector.md) - Configure the OpenTelemetry Collector + +### Patterns + +- [Best Practices](patterns/best-practices.md) - Recommended patterns for production use +- [Anti-Patterns](patterns/anti-patterns.md) - Common mistakes to avoid + +### API Reference + +- [Decorators](api/decorators.md) - `@botanu_use_case` and related decorators +- [Tracking API](api/tracking.md) - Manual tracking context managers +- [Configuration API](api/configuration.md) - `BotanuConfig` and initialization + +## Quick Example + +```python +from botanu import enable, botanu_use_case + +enable(service_name="my-service") + +@botanu_use_case(name="my_workflow") +def my_function(): + data = db.query(...) + result = llm.complete(...) + return result +``` + +## License + +Apache License 2.0. See [LICENSE](https://github.com/botanu-ai/botanu-sdk-python/blob/main/LICENSE). diff --git a/docs/integration/auto-instrumentation.md b/docs/integration/auto-instrumentation.md new file mode 100644 index 0000000..be504bc --- /dev/null +++ b/docs/integration/auto-instrumentation.md @@ -0,0 +1,138 @@ +# Auto-Instrumentation + +Botanu automatically instruments 50+ libraries with zero code changes. + +## How It Works + +When you call `enable()`, the SDK detects which libraries are installed in your environment and instruments them automatically. Libraries that aren't installed are silently skipped. + +```python +from botanu import enable + +enable() # auto-instruments everything that's installed +``` + +No configuration needed. No import order requirements. Just call `enable()` at startup. + +## Supported Libraries + +### LLM Providers + +| Provider | Instrumentation Package | +|----------|------------------------| +| OpenAI | `opentelemetry-instrumentation-openai-v2` | +| Anthropic | `opentelemetry-instrumentation-anthropic` | +| Vertex AI | `opentelemetry-instrumentation-vertexai` | +| Google GenAI | `opentelemetry-instrumentation-google-generativeai` | +| LangChain | `opentelemetry-instrumentation-langchain` | +| Ollama | `opentelemetry-instrumentation-ollama` | +| CrewAI | `opentelemetry-instrumentation-crewai` | + +### Web Frameworks + +| Framework | Instrumentation Package | +|-----------|------------------------| +| FastAPI | `opentelemetry-instrumentation-fastapi` | +| Flask | `opentelemetry-instrumentation-flask` | +| Django | `opentelemetry-instrumentation-django` | +| Starlette | `opentelemetry-instrumentation-starlette` | +| Falcon | `opentelemetry-instrumentation-falcon` | +| Pyramid | `opentelemetry-instrumentation-pyramid` | +| Tornado | `opentelemetry-instrumentation-tornado` | + +### HTTP Clients + +| Library | Instrumentation Package | +|---------|------------------------| +| requests | `opentelemetry-instrumentation-requests` | +| httpx | `opentelemetry-instrumentation-httpx` | +| urllib3 | `opentelemetry-instrumentation-urllib3` | +| urllib | `opentelemetry-instrumentation-urllib` | +| aiohttp (client) | `opentelemetry-instrumentation-aiohttp-client` | +| aiohttp (server) | `opentelemetry-instrumentation-aiohttp-server` | + +### Databases + +| Database | Instrumentation Package | +|----------|------------------------| +| SQLAlchemy | `opentelemetry-instrumentation-sqlalchemy` | +| psycopg2 | `opentelemetry-instrumentation-psycopg2` | +| psycopg3 | `opentelemetry-instrumentation-psycopg` | +| asyncpg | `opentelemetry-instrumentation-asyncpg` | +| aiopg | `opentelemetry-instrumentation-aiopg` | +| pymongo | `opentelemetry-instrumentation-pymongo` | +| redis | `opentelemetry-instrumentation-redis` | +| MySQL | `opentelemetry-instrumentation-mysql` | +| mysqlclient | `opentelemetry-instrumentation-mysqlclient` | +| PyMySQL | `opentelemetry-instrumentation-pymysql` | +| SQLite3 | `opentelemetry-instrumentation-sqlite3` | +| Elasticsearch | `opentelemetry-instrumentation-elasticsearch` | +| Cassandra | `opentelemetry-instrumentation-cassandra` | +| TortoiseORM | `opentelemetry-instrumentation-tortoiseorm` | +| pymemcache | `opentelemetry-instrumentation-pymemcache` | + +### Messaging & Task Queues + +| System | Instrumentation Package | +|--------|------------------------| +| Celery | `opentelemetry-instrumentation-celery` | +| kafka-python | `opentelemetry-instrumentation-kafka-python` | +| confluent-kafka | `opentelemetry-instrumentation-confluent-kafka` | +| aiokafka | `opentelemetry-instrumentation-aiokafka` | +| pika (RabbitMQ) | `opentelemetry-instrumentation-pika` | +| aio-pika | `opentelemetry-instrumentation-aio-pika` | + +### AWS + +| Service | Instrumentation Package | +|---------|------------------------| +| botocore | `opentelemetry-instrumentation-botocore` | +| boto3 SQS | `opentelemetry-instrumentation-boto3sqs` | + +### gRPC + +| Component | Instrumentation Package | +|-----------|------------------------| +| gRPC Client + Server | `opentelemetry-instrumentation-grpc` | + +### Runtime + +| Library | Instrumentation Package | +|---------|------------------------| +| logging | `opentelemetry-instrumentation-logging` | +| threading | `opentelemetry-instrumentation-threading` | +| asyncio | `opentelemetry-instrumentation-asyncio` | + +## Context Propagation + +HTTP clients automatically propagate `run_id` via W3C Baggage headers: + +``` +traceparent: 00-{trace_id}-{span_id}-01 +baggage: botanu.run_id=019abc12... +``` + +## Span Attributes + +OpenAI calls produce: + +``` +gen_ai.operation.name: chat +gen_ai.provider.name: openai +gen_ai.request.model: gpt-4 +gen_ai.usage.input_tokens: 10 +gen_ai.usage.output_tokens: 25 +``` + +Database calls produce: + +``` +db.system: postgresql +db.operation: SELECT +db.statement: SELECT * FROM orders WHERE id = ? +``` + +## See Also + +- [Kubernetes Deployment](kubernetes.md) - Zero-code instrumentation at scale +- [Collector Configuration](collector.md) - Collector setup diff --git a/docs/integration/collector.md b/docs/integration/collector.md new file mode 100644 index 0000000..ed85df9 --- /dev/null +++ b/docs/integration/collector.md @@ -0,0 +1,422 @@ +# Collector Configuration + +Set up the OpenTelemetry Collector for cost attribution processing. + +## Overview + +Botanu follows a "thin SDK, smart collector" architecture. The SDK captures raw telemetry; the collector handles: + +- **PII redaction** - Remove sensitive data from prompts/responses +- **Cost calculation** - Convert tokens to dollars using pricing tables +- **Vendor normalization** - Standardize provider names +- **Cardinality management** - Limit high-cardinality attributes +- **Aggregation** - Pre-aggregate metrics for dashboards + +## Quick Start + +### Docker + +```bash +docker run -p 4318:4318 -p 4317:4317 \ + -v $(pwd)/otel-config.yaml:/etc/otelcol/config.yaml \ + otel/opentelemetry-collector-contrib:latest +``` + +### Docker Compose + +```yaml +services: + collector: + image: otel/opentelemetry-collector-contrib:latest + ports: + - "4318:4318" # OTLP HTTP + - "4317:4317" # OTLP gRPC + volumes: + - ./otel-config.yaml:/etc/otelcol/config.yaml +``` + +## Basic Configuration + +```yaml +# otel-config.yaml +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + grpc: + endpoint: 0.0.0.0:4317 + +processors: + batch: + send_batch_size: 1000 + timeout: 10s + +exporters: + debug: + verbosity: detailed + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [debug] +``` + +## Cost Attribution Configuration + +### Full Pipeline + +```yaml +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + grpc: + endpoint: 0.0.0.0:4317 + +processors: + # Batch for efficiency + batch: + send_batch_size: 1000 + timeout: 10s + + # Normalize vendor names + transform/vendor: + trace_statements: + - context: span + statements: + # Normalize provider names to standard format + - set(attributes["botanu.vendor"], "openai") where attributes["gen_ai.provider.name"] == "openai" + - set(attributes["botanu.vendor"], "anthropic") where attributes["gen_ai.provider.name"] == "anthropic" + - set(attributes["botanu.vendor"], "azure.openai") where attributes["gen_ai.provider.name"] == "azure.openai" + - set(attributes["botanu.vendor"], "gcp.vertex_ai") where attributes["gen_ai.provider.name"] == "gcp.vertex_ai" + - set(attributes["botanu.vendor"], "aws.bedrock") where attributes["gen_ai.provider.name"] == "aws.bedrock" + + # Calculate costs from tokens + transform/cost: + trace_statements: + - context: span + statements: + # GPT-4 pricing (example: $30/$60 per 1M tokens) + - set(attributes["botanu.cost.input_usd"], + attributes["gen_ai.usage.input_tokens"] * 0.00003) + where attributes["gen_ai.request.model"] == "gpt-4" + - set(attributes["botanu.cost.output_usd"], + attributes["gen_ai.usage.output_tokens"] * 0.00006) + where attributes["gen_ai.request.model"] == "gpt-4" + + # GPT-4 Turbo pricing ($10/$30 per 1M tokens) + - set(attributes["botanu.cost.input_usd"], + attributes["gen_ai.usage.input_tokens"] * 0.00001) + where attributes["gen_ai.request.model"] == "gpt-4-turbo" + - set(attributes["botanu.cost.output_usd"], + attributes["gen_ai.usage.output_tokens"] * 0.00003) + where attributes["gen_ai.request.model"] == "gpt-4-turbo" + + # Claude 3 Opus pricing ($15/$75 per 1M tokens) + - set(attributes["botanu.cost.input_usd"], + attributes["gen_ai.usage.input_tokens"] * 0.000015) + where attributes["gen_ai.request.model"] == "claude-3-opus-20240229" + - set(attributes["botanu.cost.output_usd"], + attributes["gen_ai.usage.output_tokens"] * 0.000075) + where attributes["gen_ai.request.model"] == "claude-3-opus-20240229" + + # Calculate total + - set(attributes["botanu.cost.total_usd"], + attributes["botanu.cost.input_usd"] + attributes["botanu.cost.output_usd"]) + where attributes["botanu.cost.input_usd"] != nil + + # PII redaction for prompts/responses + redaction: + allow_all_keys: true + blocked_values: + # Email addresses + - "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b" + # Phone numbers + - "\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b" + # SSN + - "\\b\\d{3}-\\d{2}-\\d{4}\\b" + # Credit card numbers + - "\\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})\\b" + + # Cardinality limits + attributes: + actions: + - key: botanu.run_id + action: hash + # Keep first 16 chars of hash to reduce cardinality if needed + - key: gen_ai.content.prompt + action: delete + # Remove raw prompts (keep tokens for cost) + +exporters: + # ClickHouse for analytics + clickhouse: + endpoint: tcp://clickhouse:9000 + database: botanu + ttl: 90d + create_schema: true + + # Also send to your APM + otlp/apm: + endpoint: https://your-apm.example.com + headers: + Authorization: Bearer ${APM_TOKEN} + +service: + pipelines: + traces: + receivers: [otlp] + processors: + - batch + - transform/vendor + - transform/cost + - redaction + - attributes + exporters: [clickhouse, otlp/apm] +``` + +## PII Redaction + +### Using Redaction Processor + +```yaml +processors: + redaction: + allow_all_keys: true + blocked_values: + # Redact common PII patterns + - "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b" # Email + - "\\b\\d{3}[-.]?\\d{3}[-.]?\\d{4}\\b" # Phone + - "\\b\\d{3}-\\d{2}-\\d{4}\\b" # SSN + summary: debug # Log redaction summary +``` + +### Using Transform Processor + +```yaml +processors: + transform/pii: + trace_statements: + - context: span + statements: + # Remove prompt content entirely + - delete(attributes["gen_ai.content.prompt"]) + - delete(attributes["gen_ai.content.completion"]) + + # Or replace with placeholder + - replace_pattern(attributes["gen_ai.content.prompt"], + "\\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,}\\b", + "[REDACTED_EMAIL]") +``` + +## Pricing Tables + +Maintain pricing in the collector config: + +```yaml +processors: + transform/cost: + trace_statements: + - context: span + statements: + # OpenAI pricing (as of 2024) + # GPT-4 + - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.00003) + where attributes["gen_ai.request.model"] == "gpt-4" or attributes["gen_ai.request.model"] == "gpt-4-0613" + - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.00006) + where attributes["gen_ai.request.model"] == "gpt-4" or attributes["gen_ai.request.model"] == "gpt-4-0613" + + # GPT-4 Turbo + - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.00001) + where IsMatch(attributes["gen_ai.request.model"], "gpt-4-turbo.*") + - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.00003) + where IsMatch(attributes["gen_ai.request.model"], "gpt-4-turbo.*") + + # GPT-4o + - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.000005) + where IsMatch(attributes["gen_ai.request.model"], "gpt-4o.*") + - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.000015) + where IsMatch(attributes["gen_ai.request.model"], "gpt-4o.*") + + # GPT-3.5 Turbo + - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.0000005) + where IsMatch(attributes["gen_ai.request.model"], "gpt-3.5-turbo.*") + - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.0000015) + where IsMatch(attributes["gen_ai.request.model"], "gpt-3.5-turbo.*") + + # Claude 3 Opus + - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.000015) + where IsMatch(attributes["gen_ai.request.model"], "claude-3-opus.*") + - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.000075) + where IsMatch(attributes["gen_ai.request.model"], "claude-3-opus.*") + + # Claude 3 Sonnet + - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.000003) + where IsMatch(attributes["gen_ai.request.model"], "claude-3-sonnet.*") + - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.000015) + where IsMatch(attributes["gen_ai.request.model"], "claude-3-sonnet.*") + + # Claude 3 Haiku + - set(attributes["botanu.cost.input_usd"], attributes["gen_ai.usage.input_tokens"] * 0.00000025) + where IsMatch(attributes["gen_ai.request.model"], "claude-3-haiku.*") + - set(attributes["botanu.cost.output_usd"], attributes["gen_ai.usage.output_tokens"] * 0.00000125) + where IsMatch(attributes["gen_ai.request.model"], "claude-3-haiku.*") + + # Total cost + - set(attributes["botanu.cost.total_usd"], + attributes["botanu.cost.input_usd"] + attributes["botanu.cost.output_usd"]) + where attributes["botanu.cost.input_usd"] != nil and attributes["botanu.cost.output_usd"] != nil +``` + +## Backend Exporters + +### ClickHouse + +```yaml +exporters: + clickhouse: + endpoint: tcp://clickhouse:9000 + database: botanu + username: default + password: ${CLICKHOUSE_PASSWORD} + ttl: 90d + create_schema: true + logs_table_name: otel_logs + traces_table_name: otel_traces + metrics_table_name: otel_metrics +``` + +### PostgreSQL (via OTLP) + +Use the collector to forward to a service that writes to PostgreSQL: + +```yaml +exporters: + otlp: + endpoint: http://postgres-writer:4317 +``` + +### Prometheus (Metrics) + +```yaml +exporters: + prometheus: + endpoint: 0.0.0.0:8889 + namespace: botanu +``` + +### Grafana Tempo + +```yaml +exporters: + otlp: + endpoint: tempo:4317 + tls: + insecure: true +``` + +## Sampling + +For cost attribution, avoid sampling. If you must sample: + +```yaml +processors: + probabilistic_sampler: + sampling_percentage: 100 # Keep 100% for cost attribution + + # Or sample only non-LLM spans + tail_sampling: + decision_wait: 10s + policies: + # Always keep LLM calls + - name: always-sample-llm + type: string_attribute + string_attribute: + key: gen_ai.operation.name + values: [chat, text_completion, embeddings] + + # Sample other spans at 10% + - name: sample-other + type: probabilistic + probabilistic: + sampling_percentage: 10 +``` + +## High Availability + +### Load Balancing + +```yaml +# collector-1.yaml +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + +exporters: + loadbalancing: + protocol: + otlp: + tls: + insecure: true + resolver: + dns: + hostname: collector-pool.svc.cluster.local + port: 4317 +``` + +### Kubernetes Deployment + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: otel-collector +spec: + replicas: 3 + selector: + matchLabels: + app: otel-collector + template: + spec: + containers: + - name: collector + image: otel/opentelemetry-collector-contrib:latest + ports: + - containerPort: 4318 + - containerPort: 4317 + volumeMounts: + - name: config + mountPath: /etc/otelcol + volumes: + - name: config + configMap: + name: otel-collector-config +``` + +## Monitoring the Collector + +Enable internal telemetry: + +```yaml +service: + telemetry: + logs: + level: info + metrics: + level: detailed + address: 0.0.0.0:8888 +``` + +Access metrics at `http://collector:8888/metrics`. + +## See Also + +- [Architecture](../concepts/architecture.md) - SDK architecture +- [Auto-Instrumentation](auto-instrumentation.md) - Library instrumentation +- [Best Practices](../patterns/best-practices.md) - Configuration patterns diff --git a/docs/integration/existing-otel.md b/docs/integration/existing-otel.md new file mode 100644 index 0000000..a008cdb --- /dev/null +++ b/docs/integration/existing-otel.md @@ -0,0 +1,295 @@ +# Existing OpenTelemetry Setup + +Integrate Botanu with your existing OpenTelemetry configuration. + +## Overview + +If you already have OpenTelemetry configured (via Datadog, Splunk, New Relic, or custom setup), Botanu integrates seamlessly. You only need to add the `RunContextEnricher` span processor. + +## Minimal Integration + +Add just the span processor to your existing provider: + +```python +from opentelemetry import trace +from botanu.processors.enricher import RunContextEnricher + +# Your existing TracerProvider +provider = trace.get_tracer_provider() + +# Add Botanu's enricher +provider.add_span_processor(RunContextEnricher()) +``` + +That's it. All spans will now receive `run_id` from baggage. + +## With Existing Instrumentation + +Botanu works alongside any existing instrumentation: + +```python +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.requests import RequestsInstrumentor + +from botanu.processors.enricher import RunContextEnricher + +# Your existing setup +provider = TracerProvider() +provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter())) +trace.set_tracer_provider(provider) + +# Your existing instrumentation +RequestsInstrumentor().instrument() + +# Add Botanu enricher (order doesn't matter) +provider.add_span_processor(RunContextEnricher()) +``` + +## With Datadog + +```python +from ddtrace import tracer +from ddtrace.opentelemetry import TracerProvider +from opentelemetry import trace + +from botanu.processors.enricher import RunContextEnricher + +# Datadog's TracerProvider +provider = TracerProvider() +trace.set_tracer_provider(provider) + +# Add Botanu enricher +provider.add_span_processor(RunContextEnricher()) +``` + +## With Splunk + +```python +from splunk_otel.tracing import start_tracing +from opentelemetry import trace + +from botanu.processors.enricher import RunContextEnricher + +# Start Splunk tracing +start_tracing() + +# Add Botanu enricher +provider = trace.get_tracer_provider() +provider.add_span_processor(RunContextEnricher()) +``` + +## With New Relic + +```python +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter + +from botanu.processors.enricher import RunContextEnricher + +# New Relic OTLP endpoint +provider = TracerProvider() +provider.add_span_processor( + BatchSpanProcessor( + OTLPSpanExporter( + endpoint="https://otlp.nr-data.net/v1/traces", + headers={"api-key": "YOUR_LICENSE_KEY"}, + ) + ) +) +trace.set_tracer_provider(provider) + +# Add Botanu enricher +provider.add_span_processor(RunContextEnricher()) +``` + +## With Jaeger + +```python +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.exporter.jaeger.thrift import JaegerExporter + +from botanu.processors.enricher import RunContextEnricher + +# Jaeger setup +provider = TracerProvider() +provider.add_span_processor( + BatchSpanProcessor( + JaegerExporter( + agent_host_name="localhost", + agent_port=6831, + ) + ) +) +trace.set_tracer_provider(provider) + +# Add Botanu enricher +provider.add_span_processor(RunContextEnricher()) +``` + +## Multiple Exporters + +Send to both your APM and a cost-attribution backend: + +```python +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter + +from botanu.processors.enricher import RunContextEnricher + +provider = TracerProvider() + +# Your APM (e.g., Datadog) +provider.add_span_processor( + BatchSpanProcessor( + OTLPSpanExporter(endpoint="https://your-apm.example.com/v1/traces") + ) +) + +# Botanu collector for cost attribution +provider.add_span_processor( + BatchSpanProcessor( + OTLPSpanExporter(endpoint="http://botanu-collector:4318/v1/traces") + ) +) + +# Botanu enricher (adds run_id to all spans) +provider.add_span_processor(RunContextEnricher()) + +trace.set_tracer_provider(provider) +``` + +## How RunContextEnricher Works + +The enricher reads baggage and writes to span attributes: + +```python +class RunContextEnricher(SpanProcessor): + def on_start(self, span, parent_context): + # Read run_id from baggage + run_id = baggage.get_baggage("botanu.run_id", parent_context) + if run_id: + span.set_attribute("botanu.run_id", run_id) + + # Read use_case from baggage + use_case = baggage.get_baggage("botanu.use_case", parent_context) + if use_case: + span.set_attribute("botanu.use_case", use_case) +``` + +This means: +- Every span gets `run_id` if it exists in baggage +- Auto-instrumented spans are enriched automatically +- No code changes needed in your existing instrumentation + +## Using Botanu Decorators + +With the enricher in place, use Botanu decorators: + +```python +from botanu import botanu_use_case, emit_outcome + +@botanu_use_case("Customer Support") +async def handle_ticket(ticket_id: str): + # All spans created here (by any instrumentation) get run_id + context = requests.get(f"/api/tickets/{ticket_id}") + response = await openai_call(context) + await database.save(response) + + emit_outcome("success", value_type="tickets_resolved", value_amount=1) +``` + +## Without Botanu Bootstrap + +If you don't want to use `enable()`, manually set up propagation: + +```python +from opentelemetry import propagate +from opentelemetry.propagators.composite import CompositePropagator +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from opentelemetry.baggage.propagation import W3CBaggagePropagator + +# Ensure baggage propagation is enabled +propagate.set_global_textmap( + CompositePropagator([ + TraceContextTextMapPropagator(), + W3CBaggagePropagator(), + ]) +) +``` + +## Verifying Integration + +Check that run_id appears on spans: + +```python +from opentelemetry import trace, baggage, context + +# Set baggage (normally done by @botanu_use_case) +ctx = baggage.set_baggage("botanu.run_id", "test-123") +token = context.attach(ctx) + +try: + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span") as span: + # Check attribute was set + print(span.attributes.get("botanu.run_id")) # Should print "test-123" +finally: + context.detach(token) +``` + +## Processor Order + +Span processors are called in order. The enricher should be added after your span exporters: + +```python +# 1. Exporters (send spans to backends) +provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter())) + +# 2. Enrichers (modify spans before export) +provider.add_span_processor(RunContextEnricher()) +``` + +However, `RunContextEnricher` uses `on_start()`, so it runs before export regardless. + +## Troubleshooting + +### run_id Not Appearing + +1. Check enricher is added: + ```python + provider = trace.get_tracer_provider() + # Verify RunContextEnricher is in the list + ``` + +2. Check baggage is set: + ```python + from opentelemetry import baggage + print(baggage.get_baggage("botanu.run_id")) + ``` + +3. Ensure `@botanu_use_case` is used at entry points + +### Baggage Not Propagating + +Check propagators are configured: +```python +from opentelemetry import propagate +print(propagate.get_global_textmap()) +``` + +Should include `W3CBaggagePropagator`. + +## See Also + +- [Auto-Instrumentation](auto-instrumentation.md) - Library instrumentation +- [Collector Configuration](collector.md) - Collector setup +- [Architecture](../concepts/architecture.md) - SDK design diff --git a/docs/integration/kubernetes.md b/docs/integration/kubernetes.md new file mode 100644 index 0000000..c71cf4e --- /dev/null +++ b/docs/integration/kubernetes.md @@ -0,0 +1,382 @@ +# Kubernetes Deployment + +Zero-code instrumentation for large-scale deployments. + +## Overview + +For organizations with thousands of applications, modifying code in every repo is impractical. This guide covers zero-code instrumentation using Kubernetes-native approaches. + +## What Requires Code Changes + +| Service Type | Code Change | Config Change | +|--------------|-------------|---------------| +| **Entry point** | `@botanu_use_case` decorator (generates `run_id`) | K8s annotation | +| **Intermediate services** | None | K8s annotation only | + +**Entry point** = The service where the business transaction starts (API gateway, webhook handler, queue consumer). + +**Intermediate services** = All downstream services called by the entry point. + +## What Gets Auto-Instrumented + +With zero-code instrumentation, the following are automatically traced: + +- **HTTP clients** — requests, httpx, urllib3, aiohttp (including retries) +- **Frameworks** — FastAPI, Flask, Django, Starlette +- **Databases** — PostgreSQL, MySQL, MongoDB, Redis, SQLAlchemy +- **Messaging** — Celery, Kafka +- **LLM Providers** — OpenAI, Anthropic, Vertex AI + +**Retries are automatically captured.** Each HTTP call (including retries from libraries like `tenacity`, `urllib3.util.retry`, or `httpx` retry) creates a separate span. The `run_id` propagates via W3C Baggage headers on every request. + +## Architecture + +``` +┌────────────────────────────────────────────────────────────────┐ +│ Kubernetes Cluster │ +│ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ App A │ │ App B │ │ App C │ │ +│ │ (entry) │ │ (no change) │ │ (no change) │ │ +│ │ @use_case │ │ │ │ │ │ +│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │ +│ │ │ │ │ +│ │ OTel auto-injected via Operator │ +│ │ │ │ │ +│ └────────────────┼────────────────┘ │ +│ ▼ │ +│ ┌───────────────────────┐ │ +│ │ OTel Collector │ │ +│ │ (DaemonSet) │ │ +│ └───────────┬───────────┘ │ +└──────────────────────────┼──────────────────────────────────────┘ + │ OTLP + ▼ + Observability Backend +``` + +## Option 1: OTel Operator (Recommended) + +The OpenTelemetry Operator automatically injects instrumentation into pods. + +### Install Operator + +```bash +# Install cert-manager (required) +kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.0/cert-manager.yaml + +# Install OTel Operator +kubectl apply -f https://github.com/open-telemetry/opentelemetry-operator/releases/latest/download/opentelemetry-operator.yaml +``` + +### Create Instrumentation Resource + +```yaml +# instrumentation.yaml +apiVersion: opentelemetry.io/v1alpha1 +kind: Instrumentation +metadata: + name: botanu-instrumentation + namespace: default +spec: + exporter: + endpoint: http://otel-collector:4318 + propagators: + - tracecontext + - baggage + python: + image: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-python:latest + env: + - name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED + value: "true" +``` + +```bash +kubectl apply -f instrumentation.yaml +``` + +### Annotate Deployments + +Add a single annotation to enable instrumentation: + +```yaml +# deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: my-service +spec: + template: + metadata: + annotations: + instrumentation.opentelemetry.io/inject-python: "true" + spec: + containers: + - name: app + image: my-service:latest + env: + - name: OTEL_SERVICE_NAME + value: "my-service" +``` + +No code changes required. The operator injects instrumentation at pod startup. + +## Option 2: Environment Variables Only + +For apps without operator, use environment variables: + +```yaml +apiVersion: apps/v1 +kind: Deployment +spec: + template: + spec: + containers: + - name: app + image: my-service:latest + command: ["opentelemetry-instrument", "python", "app.py"] + env: + - name: OTEL_SERVICE_NAME + value: "my-service" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://otel-collector:4318" + - name: OTEL_EXPORTER_OTLP_PROTOCOL + value: "http/protobuf" + - name: OTEL_PROPAGATORS + value: "tracecontext,baggage" + - name: OTEL_TRACES_EXPORTER + value: "otlp" + - name: OTEL_METRICS_EXPORTER + value: "none" + - name: OTEL_LOGS_EXPORTER + value: "none" +``` + +Base image must include: +```dockerfile +RUN pip install opentelemetry-distro opentelemetry-exporter-otlp \ + opentelemetry-instrumentation-fastapi \ + opentelemetry-instrumentation-requests \ + opentelemetry-instrumentation-openai-v2 +``` + +## Option 3: Init Container + +Inject instrumentation via init container: + +```yaml +apiVersion: apps/v1 +kind: Deployment +spec: + template: + spec: + initContainers: + - name: otel-init + image: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-python:latest + command: ["/bin/sh", "-c"] + args: + - cp -r /autoinstrumentation/. /otel-auto-instrumentation/ + volumeMounts: + - name: otel-auto-instrumentation + mountPath: /otel-auto-instrumentation + containers: + - name: app + image: my-service:latest + env: + - name: PYTHONPATH + value: "/otel-auto-instrumentation" + - name: OTEL_SERVICE_NAME + value: "my-service" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://otel-collector:4318" + volumeMounts: + - name: otel-auto-instrumentation + mountPath: /otel-auto-instrumentation + volumes: + - name: otel-auto-instrumentation + emptyDir: {} +``` + +## OTel Collector Setup + +Deploy collector as DaemonSet: + +```yaml +# collector.yaml +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: otel-collector +spec: + mode: daemonset + config: | + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + + processors: + batch: + timeout: 5s + send_batch_size: 1000 + + # Extract run_id from baggage for querying + attributes: + actions: + - key: botanu.run_id + from_context: baggage + action: upsert + + exporters: + otlp: + endpoint: "your-backend:4317" + tls: + insecure: false + + service: + pipelines: + traces: + receivers: [otlp] + processors: [batch, attributes] + exporters: [otlp] +``` + +## Entry Point Service (Code Change Required) + +The entry point service is the **only** service that needs a code change. It must use `@botanu_use_case` to generate the `run_id`: + +```python +from botanu import enable, botanu_use_case + +enable(service_name="entry-service") + +@botanu_use_case(name="my_workflow") +def my_function(): + data = db.query(...) + result = llm.complete(...) + downstream_service.call(result) + return result +``` + +The `@botanu_use_case` decorator generates a `run_id` and propagates it via W3C Baggage to all downstream calls. + +**Downstream services (B, C, D, etc.) need zero code changes** — they just need the K8s annotation. + +## Helm Chart + +For production deployments, use the Botanu Helm chart: + +```bash +helm repo add botanu https://charts.botanu.ai +helm install botanu-collector botanu/collector \ + --set exporter.endpoint=your-backend:4317 +``` + +Values: + +```yaml +# values.yaml +collector: + mode: daemonset + resources: + limits: + cpu: 500m + memory: 512Mi + +instrumentation: + enabled: true + python: + enabled: true + propagators: + - tracecontext + - baggage + +exporter: + endpoint: "your-backend:4317" + tls: + enabled: true +``` + +## GitOps Integration + +Add annotations via Kustomize: + +```yaml +# kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +patches: + - patch: | + apiVersion: apps/v1 + kind: Deployment + metadata: + name: any + spec: + template: + metadata: + annotations: + instrumentation.opentelemetry.io/inject-python: "true" + target: + kind: Deployment + labelSelector: "instrumentation=enabled" +``` + +Label deployments to opt-in: + +```yaml +metadata: + labels: + instrumentation: enabled +``` + +## Environment Variables Reference + +| Variable | Description | Example | +|----------|-------------|---------| +| `OTEL_SERVICE_NAME` | Service name | `my-service` | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | Collector endpoint | `http://collector:4318` | +| `OTEL_PROPAGATORS` | Context propagators | `tracecontext,baggage` | +| `OTEL_TRACES_EXPORTER` | Trace exporter | `otlp` | +| `OTEL_RESOURCE_ATTRIBUTES` | Additional attributes | `deployment.environment=prod` | + +## Rollout Strategy + +For 2000+ applications: + +1. **Phase 1**: Deploy OTel Collector (DaemonSet) +2. **Phase 2**: Install OTel Operator +3. **Phase 3**: Create Instrumentation resource +4. **Phase 4**: Add annotations via GitOps (batch by team/namespace) +5. **Phase 5**: Instrument entry points with `@botanu_use_case` + +Each phase is independent. Annotations can be rolled out gradually. + +## Troubleshooting + +### Verify Injection + +```bash +kubectl describe pod my-pod | grep -A5 "Init Containers" +``` + +### Check Instrumentation Logs + +```bash +kubectl logs my-pod -c opentelemetry-auto-instrumentation +``` + +### Verify Collector Receiving + +```bash +kubectl logs -l app=otel-collector | grep "TracesExporter" +``` + +## See Also + +- [Collector Configuration](collector.md) +- [Auto-Instrumentation](auto-instrumentation.md) +- [Context Propagation](../concepts/context-propagation.md) diff --git a/docs/patterns/anti-patterns.md b/docs/patterns/anti-patterns.md new file mode 100644 index 0000000..1e09f23 --- /dev/null +++ b/docs/patterns/anti-patterns.md @@ -0,0 +1,490 @@ +# Anti-Patterns + +Common mistakes to avoid when using Botanu SDK. + +## Run Design Anti-Patterns + +### Creating Runs for Internal Operations + +**Don't** create runs for internal functions: + +```python +# BAD - Too many runs +@botanu_use_case("Fetch Context") # Don't do this +async def fetch_context(ticket_id): + return await db.query(...) + +@botanu_use_case("Generate Response") # Or this +async def generate_response(context): + return await llm.complete(...) + +@botanu_use_case("Customer Support") +async def handle_ticket(ticket_id): + context = await fetch_context(ticket_id) + response = await generate_response(context) + return response +``` + +**Do** use a single run at the entry point: + +```python +# GOOD - One run for the business outcome +@botanu_use_case("Customer Support") +async def handle_ticket(ticket_id): + context = await fetch_context(ticket_id) # Not decorated + response = await generate_response(context) # Not decorated + emit_outcome("success", value_type="tickets_resolved", value_amount=1) + return response +``` + +### Nesting @botanu_use_case Decorators + +**Don't** nest use case decorators: + +```python +# BAD - Nested runs create confusion +@botanu_use_case("Outer") +async def outer(): + await inner() # Creates a second run + +@botanu_use_case("Inner") # Don't do this +async def inner(): + ... +``` + +**Do** use @botanu_use_case only at entry points: + +```python +# GOOD - Only entry point is decorated +@botanu_use_case("Main Workflow") +async def main(): + await step_one() # No decorator + await step_two() # No decorator +``` + +### Generic Use Case Names + +**Don't** use vague names: + +```python +# BAD - Meaningless in dashboards +@botanu_use_case("Process") +@botanu_use_case("Handle") +@botanu_use_case("Main") +@botanu_use_case("DoWork") +``` + +**Do** use descriptive business names: + +```python +# GOOD - Clear in reports +@botanu_use_case("Customer Support") +@botanu_use_case("Invoice Processing") +@botanu_use_case("Lead Qualification") +@botanu_use_case("Document Analysis") +``` + +## Outcome Anti-Patterns + +### Forgetting to Emit Outcomes + +**Don't** leave runs without outcomes: + +```python +# BAD - No outcome recorded +@botanu_use_case("Process Order") +async def process_order(order_id): + result = await process(order_id) + return result # Where's the outcome? +``` + +**Do** always emit an outcome: + +```python +# GOOD - Explicit outcome +@botanu_use_case("Process Order") +async def process_order(order_id): + try: + result = await process(order_id) + emit_outcome("success", value_type="orders_processed", value_amount=1) + return result + except Exception as e: + emit_outcome("failed", reason=type(e).__name__) + raise +``` + +### Multiple Outcomes Per Run + +**Don't** emit multiple outcomes: + +```python +# BAD - Multiple outcomes are confusing +@botanu_use_case("Batch Processing") +async def process_batch(items): + for item in items: + await process(item) + emit_outcome("success", value_type="item_processed") # Don't do this +``` + +**Do** emit one summary outcome: + +```python +# GOOD - One outcome at the end +@botanu_use_case("Batch Processing") +async def process_batch(items): + processed = 0 + for item in items: + await process(item) + processed += 1 + emit_outcome("success", value_type="items_processed", value_amount=processed) +``` + +### Missing Failure Reasons + +**Don't** emit failures without reasons: + +```python +# BAD - No context for debugging +except Exception: + emit_outcome("failed") # Why did it fail? + raise +``` + +**Do** include the failure reason: + +```python +# GOOD - Reason helps debugging +except ValidationError: + emit_outcome("failed", reason="validation_error") + raise +except RateLimitError: + emit_outcome("failed", reason="rate_limit_exceeded") + raise +except Exception as e: + emit_outcome("failed", reason=type(e).__name__) + raise +``` + +## LLM Tracking Anti-Patterns + +### Not Recording Tokens + +**Don't** skip token recording: + +```python +# BAD - No cost data +with track_llm_call(provider="openai", model="gpt-4"): + response = await client.chat.completions.create(...) + # Token usage not recorded +``` + +**Do** always record tokens: + +```python +# GOOD - Tokens enable cost calculation +with track_llm_call(provider="openai", model="gpt-4") as tracker: + response = await client.chat.completions.create(...) + tracker.set_tokens( + input_tokens=response.usage.prompt_tokens, + output_tokens=response.usage.completion_tokens, + ) +``` + +### Ignoring Cached Tokens + +**Don't** forget cache tokens (they have different pricing): + +```python +# BAD - Missing cache data +tracker.set_tokens( + input_tokens=response.usage.prompt_tokens, + output_tokens=response.usage.completion_tokens, +) +``` + +**Do** include cache breakdown: + +```python +# GOOD - Full token breakdown +tracker.set_tokens( + input_tokens=response.usage.prompt_tokens, + output_tokens=response.usage.completion_tokens, + cache_read_tokens=response.usage.cache_read_tokens, + cache_write_tokens=response.usage.cache_write_tokens, +) +``` + +### Wrong Provider Names + +**Don't** use inconsistent provider names: + +```python +# BAD - Inconsistent naming +track_llm_call(provider="OpenAI", ...) # Mixed case +track_llm_call(provider="open-ai", ...) # Wrong format +track_llm_call(provider="gpt", ...) # Model as provider +``` + +**Do** use standard provider names (auto-normalized): + +```python +# GOOD - Standard names (or let SDK normalize) +track_llm_call(provider="openai", ...) +track_llm_call(provider="anthropic", ...) +track_llm_call(provider="azure_openai", ...) +``` + +## Configuration Anti-Patterns + +### Sampling for Cost Attribution + +### Hardcoding Configuration + +**Don't** hardcode production values: + +```python +# BAD - Hardcoded +enable( + service_name="my-service", + otlp_endpoint="http://prod-collector.internal:4318", +) +``` + +**Do** use environment variables: + +```python +# GOOD - Environment-based +enable(service_name=os.environ["OTEL_SERVICE_NAME"]) + +# Or use YAML with interpolation +# botanu.yaml +# otlp: +# endpoint: ${COLLECTOR_ENDPOINT} +``` + +### Disabling Auto-Instrumentation Unnecessarily + +**Don't** disable auto-instrumentation without reason: + +```python +# BAD - Missing automatic tracing +enable( + service_name="my-service", + auto_instrument_packages=[], # Why? +) +``` + +**Do** keep defaults or be selective: + +```python +# GOOD - Default instrumentation +enable(service_name="my-service") + +# Or selective +enable( + service_name="my-service", + auto_instrument_packages=["fastapi", "openai_v2", "sqlalchemy"], +) +``` + +## Context Propagation Anti-Patterns + +### Losing Context in Async Code + +**Don't** spawn tasks without context: + +```python +# BAD - Context lost +@botanu_use_case("Parallel Processing") +async def process(): + # These tasks don't inherit context + await asyncio.gather( + task_one(), + task_two(), + ) +``` + +**Do** ensure context propagates: + +```python +# GOOD - Context flows through asyncio +@botanu_use_case("Parallel Processing") +async def process(): + # asyncio with contextvars works correctly + await asyncio.gather( + task_one(), # Inherits context + task_two(), # Inherits context + ) +``` + +### Not Extracting Context in Consumers + +**Don't** ignore incoming context: + +```python +# BAD - Context not extracted +def process_message(message): + # run_id from producer is lost + handle_payload(message["payload"]) +``` + +**Do** extract and use context: + +```python +# GOOD - Context continues +def process_message(message): + baggage = message.get("baggage", {}) + ctx = RunContext.from_baggage(baggage) + if ctx: + with ctx.as_current(): + handle_payload(message["payload"]) +``` + +## Data Tracking Anti-Patterns + +### Not Tracking Data Operations + +**Don't** ignore database/storage costs: + +```python +# BAD - Only LLM tracked +@botanu_use_case("Analysis") +async def analyze(): + data = await snowflake.query(expensive_query) # Not tracked! + with track_llm_call(...) as tracker: + result = await llm.complete(data) + tracker.set_tokens(...) +``` + +**Do** track all cost-generating operations: + +```python +# GOOD - Complete cost picture +@botanu_use_case("Analysis") +async def analyze(): + with track_db_operation(system="snowflake", operation="SELECT") as db: + data = await snowflake.query(expensive_query) + db.set_bytes_scanned(data.bytes_scanned) + + with track_llm_call(...) as tracker: + result = await llm.complete(data) + tracker.set_tokens(...) +``` + +### Missing Bytes for Pay-Per-Scan + +**Don't** forget bytes for warehouses: + +```python +# BAD - Missing cost driver +with track_db_operation(system="bigquery", operation="SELECT") as db: + result = await bq.query(sql) + db.set_result(rows_returned=len(result)) # Rows don't determine cost! +``` + +**Do** include bytes scanned: + +```python +# GOOD - Bytes scanned is the cost driver +with track_db_operation(system="bigquery", operation="SELECT") as db: + result = await bq.query(sql) + db.set_bytes_scanned(result.bytes_processed) + db.set_result(rows_returned=len(result)) +``` + +## Error Handling Anti-Patterns + +### Swallowing Errors + +**Don't** hide errors: + +```python +# BAD - Error hidden +with track_llm_call(...) as tracker: + try: + response = await llm.complete(...) + except Exception: + pass # Silently fails - no error recorded +``` + +**Do** record and propagate errors: + +```python +# GOOD - Error tracked and raised +with track_llm_call(...) as tracker: + try: + response = await llm.complete(...) + except Exception as e: + tracker.set_error(e) + emit_outcome("failed", reason=type(e).__name__) + raise +``` + +### Ignoring Partial Successes + +**Don't** mark all-or-nothing: + +```python +# BAD - All items fail if one fails +@botanu_use_case("Batch") +async def process_batch(items): + for item in items: + await process(item) # If one fails, no outcome + emit_outcome("success", value_amount=len(items)) +``` + +**Do** track partial success: + +```python +# GOOD - Partial success recorded +@botanu_use_case("Batch") +async def process_batch(items): + processed = 0 + failed = 0 + for item in items: + try: + await process(item) + processed += 1 + except Exception: + failed += 1 + + if failed == 0: + emit_outcome("success", value_type="items_processed", value_amount=processed) + elif processed > 0: + emit_outcome("partial", value_type="items_processed", value_amount=processed, + reason=f"failed_{failed}_of_{len(items)}") + else: + emit_outcome("failed", reason="all_items_failed") +``` + +## Testing Anti-Patterns + +### Testing with Real Exporters + +**Don't** send telemetry during tests: + +```python +# BAD - Tests hit real collector +def test_workflow(): + enable(service_name="test") # Sends to real endpoint! + await my_workflow() +``` + +**Do** use NoOp or in-memory exporters: + +```python +# GOOD - Tests are isolated +from opentelemetry.trace import NoOpTracerProvider + +def setup_test(): + trace.set_tracer_provider(NoOpTracerProvider()) + +def test_workflow(): + await my_workflow() # No external calls +``` + +## See Also + +- [Best Practices](best-practices.md) - What to do +- [Quickstart](../getting-started/quickstart.md) - Getting started guide +- [Outcomes](../tracking/outcomes.md) - Outcome recording details diff --git a/docs/patterns/best-practices.md b/docs/patterns/best-practices.md new file mode 100644 index 0000000..26372d1 --- /dev/null +++ b/docs/patterns/best-practices.md @@ -0,0 +1,416 @@ +# Best Practices + +Patterns for effective cost attribution with Botanu SDK. + +## Run Design + +### One Run Per Business Outcome + +A run should represent a complete business transaction: + +```python +# GOOD - One run for one business outcome +@botanu_use_case("Customer Support") +async def resolve_ticket(ticket_id: str): + context = await fetch_context(ticket_id) + response = await generate_response(context) + await send_response(ticket_id, response) + emit_outcome("success", value_type="tickets_resolved", value_amount=1) +``` + +```python +# BAD - Multiple runs for one outcome +@botanu_use_case("Fetch Context") +async def fetch_context(ticket_id: str): + ... + +@botanu_use_case("Generate Response") # Don't do this +async def generate_response(context): + ... +``` + +### Use Descriptive Use Case Names + +Use cases appear in dashboards and queries. Choose names carefully: + +```python +# GOOD - Clear, descriptive names +@botanu_use_case("Customer Support") +@botanu_use_case("Document Analysis") +@botanu_use_case("Lead Qualification") + +# BAD - Generic or technical names +@botanu_use_case("HandleRequest") +@botanu_use_case("Process") +@botanu_use_case("Main") +``` + +### Include Workflow Names + +Workflow names help distinguish different paths within a use case: + +```python +@botanu_use_case("Customer Support", workflow="ticket_resolution") +async def resolve_ticket(): + ... + +@botanu_use_case("Customer Support", workflow="escalation") +async def escalate_ticket(): + ... +``` + +## Outcome Recording + +### Always Record Outcomes + +Every run should have an explicit outcome: + +```python +@botanu_use_case("Data Processing") +async def process_data(data_id: str): + try: + result = await process(data_id) + emit_outcome("success", value_type="records_processed", value_amount=result.count) + return result + except ValidationError: + emit_outcome("failed", reason="validation_error") + raise + except TimeoutError: + emit_outcome("failed", reason="timeout") + raise +``` + +### Quantify Value When Possible + +Include value amounts for better ROI analysis: + +```python +# GOOD - Quantified outcomes +emit_outcome("success", value_type="emails_sent", value_amount=50) +emit_outcome("success", value_type="revenue_generated", value_amount=1299.99) +emit_outcome("success", value_type="documents_processed", value_amount=10) + +# LESS USEFUL - No quantity +emit_outcome("success") +``` + +### Use Consistent Value Types + +Standardize your value types across the organization: + +```python +# Define standard value types +class ValueTypes: + TICKETS_RESOLVED = "tickets_resolved" + DOCUMENTS_PROCESSED = "documents_processed" + LEADS_QUALIFIED = "leads_qualified" + EMAILS_SENT = "emails_sent" + REVENUE_GENERATED = "revenue_generated" + +# Use consistently +emit_outcome("success", value_type=ValueTypes.TICKETS_RESOLVED, value_amount=1) +``` + +### Include Reasons for Failures + +Always explain why something failed: + +```python +emit_outcome("failed", reason="rate_limit_exceeded") +emit_outcome("failed", reason="invalid_input") +emit_outcome("failed", reason="model_unavailable") +emit_outcome("failed", reason="context_too_long") +``` + +## LLM Tracking + +### Always Record Token Usage + +Tokens are the primary cost driver for LLMs: + +```python +with track_llm_call(provider="openai", model="gpt-4") as tracker: + response = await client.chat.completions.create(...) + # Always set tokens + tracker.set_tokens( + input_tokens=response.usage.prompt_tokens, + output_tokens=response.usage.completion_tokens, + ) +``` + +### Record Provider Request IDs + +Request IDs enable reconciliation with provider invoices: + +```python +tracker.set_request_id( + provider_request_id=response.id, # From provider + client_request_id=uuid.uuid4().hex, # Your internal ID +) +``` + +### Track Retries + +Record attempt numbers for accurate cost per success: + +```python +for attempt in range(max_retries): + with track_llm_call(provider="openai", model="gpt-4") as tracker: + tracker.set_attempt(attempt + 1) + try: + response = await client.chat.completions.create(...) + break + except RateLimitError: + if attempt == max_retries - 1: + raise + await asyncio.sleep(backoff) +``` + +### Use Correct Operation Types + +Specify the operation type for accurate categorization: + +```python +from botanu.tracking.llm import track_llm_call, ModelOperation + +# Chat completion +with track_llm_call(provider="openai", model="gpt-4", operation=ModelOperation.CHAT): + ... + +# Embeddings +with track_llm_call(provider="openai", model="text-embedding-3-small", operation=ModelOperation.EMBEDDINGS): + ... +``` + +## Data Tracking + +### Track All Cost-Generating Operations + +Include databases, storage, and messaging: + +```python +@botanu_use_case("ETL Pipeline") +async def run_etl(): + # Track warehouse query (billed by bytes scanned) + with track_db_operation(system="snowflake", operation="SELECT") as db: + db.set_bytes_scanned(result.bytes_scanned) + db.set_query_id(result.query_id) + + # Track storage operations (billed by requests + data) + with track_storage_operation(system="s3", operation="PUT") as storage: + storage.set_result(bytes_written=len(data)) + + # Track messaging (billed by message count) + with track_messaging_operation(system="sqs", operation="publish", destination="queue") as msg: + msg.set_result(message_count=batch_size) +``` + +### Include Bytes for Pay-Per-Scan Services + +For data warehouses billed by data scanned: + +```python +with track_db_operation(system="bigquery", operation="SELECT") as db: + result = await bq_client.query(sql) + db.set_bytes_scanned(result.total_bytes_processed) + db.set_result(rows_returned=result.num_rows) +``` + +## Context Propagation + +### Use Middleware for Web Services + +Extract context from incoming requests: + +```python +from fastapi import FastAPI +from botanu.sdk.middleware import BotanuMiddleware + +app = FastAPI() +app.add_middleware(BotanuMiddleware) +``` + +### Propagate Context in Message Queues + +Inject and extract context manually for async messaging: + +```python +# Producer +def publish_message(payload): + ctx = get_current_run_context() + message = { + "payload": payload, + "baggage": ctx.to_baggage_dict() if ctx else {} + } + queue.publish(message) + +# Consumer +def process_message(message): + baggage = message.get("baggage", {}) + ctx = RunContext.from_baggage(baggage) + with ctx.as_current(): + handle_payload(message["payload"]) +``` + +### Use Lean Mode for High-Traffic Systems + +Default lean mode minimizes header overhead: + +```python +# Lean mode: ~100 bytes of baggage +# Propagates: run_id, use_case + +# Full mode: ~300 bytes of baggage +# Propagates: run_id, use_case, workflow, environment, tenant_id, parent_run_id +``` + +## Configuration + +### Use Environment Variables in Production + +Keep configuration out of code: + +```bash +export OTEL_SERVICE_NAME=my-service +export OTEL_EXPORTER_OTLP_ENDPOINT=http://collector:4318 +export BOTANU_ENVIRONMENT=production +``` + +### Use YAML for Complex Configuration + +For multi-environment setups: + +```yaml +# config/production.yaml +service: + name: ${OTEL_SERVICE_NAME} + environment: production + +otlp: + endpoint: ${COLLECTOR_ENDPOINT} + +propagation: + mode: lean +``` + +## Multi-Tenant Systems + +### Always Include Tenant ID + +For accurate per-tenant cost attribution: + +```python +@botanu_use_case("Customer Support", tenant_id=request.tenant_id) +async def handle_ticket(request): + ... +``` + +### Use Business Context + +Add additional attribution dimensions: + +```python +set_business_context( + customer_id=request.customer_id, + team="engineering", + cost_center="R&D", + region="us-west-2", +) +``` + +## Error Handling + +### Record Errors Explicitly + +Don't lose error context: + +```python +with track_llm_call(provider="openai", model="gpt-4") as tracker: + try: + response = await client.chat.completions.create(...) + except openai.APIError as e: + tracker.set_error(e) # Records error type and message + raise +``` + +### Emit Outcomes for Errors + +Even failed runs should have outcomes: + +```python +@botanu_use_case("Data Processing") +async def process(data_id): + try: + await process_data(data_id) + emit_outcome("success", value_type="items_processed", value_amount=1) + except ValidationError: + emit_outcome("failed", reason="validation_error") + raise + except Exception as e: + emit_outcome("failed", reason=type(e).__name__) + raise +``` + +## Performance + +### Use Async Tracking + +For async applications, ensure tracking is non-blocking: + +```python +# The SDK uses span events, not separate API calls +# This is already non-blocking +with track_llm_call(provider="openai", model="gpt-4") as tracker: + response = await async_llm_call() + tracker.set_tokens(...) # Immediate, non-blocking +``` + +### Batch Database Tracking + +For batch operations, track at batch level: + +```python +# GOOD - Batch tracking +with track_db_operation(system="postgresql", operation="INSERT") as db: + await cursor.executemany(insert_sql, batch_of_1000_rows) + db.set_result(rows_affected=1000) + +# LESS EFFICIENT - Per-row tracking +for row in batch_of_1000_rows: + with track_db_operation(system="postgresql", operation="INSERT") as db: + await cursor.execute(insert_sql, row) + db.set_result(rows_affected=1) +``` + +## Testing + +### Mock Tracing in Tests + +Use the NoOp tracer for unit tests: + +```python +from opentelemetry import trace +from opentelemetry.trace import NoOpTracerProvider + +def setup_test_tracing(): + trace.set_tracer_provider(NoOpTracerProvider()) +``` + +### Test Outcome Recording + +Verify outcomes are emitted correctly: + +```python +from unittest.mock import patch + +def test_successful_outcome(): + with patch("botanu.sdk.span_helpers.emit_outcome") as mock_emit: + result = await handle_ticket("123") + mock_emit.assert_called_with("success", value_type="tickets_resolved", value_amount=1) +``` + +## See Also + +- [Anti-Patterns](anti-patterns.md) - What to avoid +- [Architecture](../concepts/architecture.md) - SDK design principles +- [Configuration](../getting-started/configuration.md) - Configuration options diff --git a/docs/tracking/data-tracking.md b/docs/tracking/data-tracking.md new file mode 100644 index 0000000..9c066a8 --- /dev/null +++ b/docs/tracking/data-tracking.md @@ -0,0 +1,412 @@ +# Data Tracking + +Track database, storage, and messaging operations for complete cost visibility. + +## Overview + +Data operations often contribute significantly to AI workflow costs. Botanu provides tracking for: + +- **Databases** - SQL, NoSQL, data warehouses +- **Object Storage** - S3, GCS, Azure Blob +- **Messaging** - SQS, Kafka, Pub/Sub + +## Database Tracking + +### Basic Usage + +```python +from botanu.tracking.data import track_db_operation + +with track_db_operation(system="postgresql", operation="SELECT") as db: + result = await cursor.execute("SELECT * FROM users WHERE active = true") + db.set_result(rows_returned=len(result)) +``` + +### DBTracker Methods + +#### set_result() + +Record query results: + +```python +db.set_result( + rows_returned=100, # For SELECT queries + rows_affected=5, # For INSERT/UPDATE/DELETE + bytes_read=10240, # Data read + bytes_written=2048, # Data written +) +``` + +#### set_table() + +Record table information: + +```python +db.set_table("users", schema="public") +``` + +#### set_query_id() + +For data warehouses with query IDs: + +```python +db.set_query_id("01abc-def-...") +``` + +#### set_bytes_scanned() + +For pay-per-query warehouses: + +```python +db.set_bytes_scanned(1073741824) # 1 GB +``` + +#### set_error() + +Record errors (automatically called on exceptions): + +```python +db.set_error(exception) +``` + +#### add_metadata() + +Add custom attributes: + +```python +db.add_metadata( + query_type="aggregation", + cache_hit=True, +) +``` + +### Database Operations + +Use `DBOperation` constants: + +```python +from botanu.tracking.data import track_db_operation, DBOperation + +with track_db_operation(system="postgresql", operation=DBOperation.SELECT): + ... + +with track_db_operation(system="postgresql", operation=DBOperation.INSERT): + ... +``` + +Available operations: + +| Constant | Description | +|----------|-------------| +| `SELECT` | Read queries | +| `INSERT` | Insert data | +| `UPDATE` | Update data | +| `DELETE` | Delete data | +| `UPSERT` | Insert or update | +| `MERGE` | Merge operations | +| `CREATE` | Create tables/indexes | +| `DROP` | Drop objects | +| `ALTER` | Alter schema | +| `INDEX` | Index operations | +| `TRANSACTION` | Transaction control | +| `BATCH` | Batch operations | + +### System Normalization + +Database systems are automatically normalized: + +| Input | Normalized | +|-------|------------| +| `postgresql`, `postgres`, `pg` | `postgresql` | +| `mysql` | `mysql` | +| `mongodb`, `mongo` | `mongodb` | +| `dynamodb` | `dynamodb` | +| `redis` | `redis` | +| `elasticsearch` | `elasticsearch` | +| `snowflake` | `snowflake` | +| `bigquery` | `bigquery` | +| `redshift` | `redshift` | + +## Storage Tracking + +### Basic Usage + +```python +from botanu.tracking.data import track_storage_operation + +with track_storage_operation(system="s3", operation="PUT") as storage: + await s3_client.put_object(Bucket="my-bucket", Key="file.txt", Body=data) + storage.set_result(bytes_written=len(data)) +``` + +### StorageTracker Methods + +#### set_result() + +Record operation results: + +```python +storage.set_result( + objects_count=10, # Number of objects + bytes_read=1048576, # Data downloaded + bytes_written=2097152, # Data uploaded +) +``` + +#### set_bucket() + +Record bucket name: + +```python +storage.set_bucket("my-data-bucket") +``` + +#### set_error() + +Record errors: + +```python +storage.set_error(exception) +``` + +#### add_metadata() + +Add custom attributes: + +```python +storage.add_metadata( + storage_class="GLACIER", + encryption="AES256", +) +``` + +### Storage Operations + +| Constant | Description | +|----------|-------------| +| `GET` | Download object | +| `PUT` | Upload object | +| `DELETE` | Delete object | +| `LIST` | List objects | +| `HEAD` | Get metadata | +| `COPY` | Copy object | +| `MULTIPART_UPLOAD` | Multipart upload | + +### System Normalization + +| Input | Normalized | +|-------|------------| +| `s3`, `aws_s3` | `s3` | +| `gcs`, `google_cloud_storage` | `gcs` | +| `blob`, `azure_blob` | `azure_blob` | +| `minio` | `minio` | + +## Messaging Tracking + +### Basic Usage + +```python +from botanu.tracking.data import track_messaging_operation + +with track_messaging_operation(system="sqs", operation="publish", destination="my-queue") as msg: + await sqs_client.send_message(QueueUrl=queue_url, MessageBody=message) + msg.set_result(message_count=1, bytes_transferred=len(message)) +``` + +### MessagingTracker Methods + +#### set_result() + +Record operation results: + +```python +msg.set_result( + message_count=10, + bytes_transferred=4096, +) +``` + +#### set_error() + +Record errors: + +```python +msg.set_error(exception) +``` + +#### add_metadata() + +Add custom attributes: + +```python +msg.add_metadata( + message_group_id="group-1", + deduplication_id="dedup-123", +) +``` + +### Messaging Operations + +| Constant | Description | +|----------|-------------| +| `publish` | Send message | +| `consume` | Receive and process message | +| `receive` | Receive message | +| `send` | Send message (alias for publish) | +| `subscribe` | Subscribe to topic | + +### System Normalization + +| Input | Normalized | +|-------|------------| +| `sqs`, `aws_sqs` | `sqs` | +| `sns` | `sns` | +| `kinesis` | `kinesis` | +| `pubsub`, `google_pubsub` | `pubsub` | +| `kafka` | `kafka` | +| `rabbitmq` | `rabbitmq` | +| `celery` | `celery` | + +## Standalone Helpers + +### set_data_metrics() + +Set data metrics on the current span: + +```python +from botanu.tracking.data import set_data_metrics + +set_data_metrics( + rows_returned=100, + rows_affected=5, + bytes_read=10240, + bytes_written=2048, + objects_count=10, +) +``` + +### set_warehouse_metrics() + +For data warehouse queries: + +```python +from botanu.tracking.data import set_warehouse_metrics + +set_warehouse_metrics( + query_id="01abc-def-...", + bytes_scanned=1073741824, + rows_returned=1000, + partitions_scanned=5, +) +``` + +## Example: Complete Data Pipeline + +```python +from botanu import botanu_use_case, emit_outcome +from botanu.tracking.data import ( + track_db_operation, + track_storage_operation, + track_messaging_operation, + DBOperation, +) +from botanu.tracking.llm import track_llm_call + +@botanu_use_case("ETL Pipeline") +async def process_batch(batch_id: str): + """Complete ETL pipeline with cost tracking.""" + + # 1. Read from data warehouse + with track_db_operation(system="snowflake", operation=DBOperation.SELECT) as db: + db.set_query_id(batch_id) + rows = await snowflake_client.execute( + "SELECT * FROM raw_data WHERE batch_id = %s", + batch_id + ) + db.set_result(rows_returned=len(rows)) + db.set_bytes_scanned(rows.bytes_scanned) + + # 2. Process with LLM + processed = [] + for row in rows: + with track_llm_call(provider="openai", model="gpt-4") as llm: + result = await analyze_row(row) + llm.set_tokens(input_tokens=result.input_tokens, output_tokens=result.output_tokens) + processed.append(result) + + # 3. Write to storage + with track_storage_operation(system="s3", operation="PUT") as storage: + storage.set_bucket("processed-data") + await s3_client.put_object( + Bucket="processed-data", + Key=f"batch/{batch_id}.json", + Body=json.dumps(processed) + ) + storage.set_result(bytes_written=len(json.dumps(processed))) + + # 4. Write to database + with track_db_operation(system="postgresql", operation=DBOperation.INSERT) as db: + await pg_client.executemany( + "INSERT INTO processed_data VALUES (%s, %s, %s)", + [(r.id, r.result, r.score) for r in processed] + ) + db.set_result(rows_affected=len(processed)) + + # 5. Publish completion event + with track_messaging_operation(system="sqs", operation="publish", destination="batch-complete") as msg: + await sqs_client.send_message( + QueueUrl=queue_url, + MessageBody=json.dumps({"batch_id": batch_id, "count": len(processed)}) + ) + msg.set_result(message_count=1) + + emit_outcome("success", value_type="batches_processed", value_amount=1) + return processed +``` + +## Span Attributes + +### Database Spans + +| Attribute | Description | +|-----------|-------------| +| `db.system` | Database system (normalized) | +| `db.operation` | Operation type | +| `db.name` | Database name | +| `db.collection.name` | Table/collection name | +| `botanu.vendor` | Vendor for cost attribution | +| `botanu.data.rows_returned` | Rows returned | +| `botanu.data.rows_affected` | Rows modified | +| `botanu.data.bytes_read` | Bytes read | +| `botanu.data.bytes_written` | Bytes written | +| `botanu.warehouse.query_id` | Warehouse query ID | +| `botanu.warehouse.bytes_scanned` | Bytes scanned | + +### Storage Spans + +| Attribute | Description | +|-----------|-------------| +| `botanu.storage.system` | Storage system | +| `botanu.storage.operation` | Operation type | +| `botanu.storage.bucket` | Bucket name | +| `botanu.vendor` | Vendor for cost attribution | +| `botanu.data.objects_count` | Objects processed | +| `botanu.data.bytes_read` | Bytes downloaded | +| `botanu.data.bytes_written` | Bytes uploaded | + +### Messaging Spans + +| Attribute | Description | +|-----------|-------------| +| `messaging.system` | Messaging system | +| `messaging.operation` | Operation type | +| `messaging.destination.name` | Queue/topic name | +| `botanu.vendor` | Vendor for cost attribution | +| `botanu.messaging.message_count` | Messages processed | +| `botanu.messaging.bytes_transferred` | Bytes transferred | + +## See Also + +- [LLM Tracking](llm-tracking.md) - AI model tracking +- [Outcomes](outcomes.md) - Recording business outcomes +- [Best Practices](../patterns/best-practices.md) - Tracking best practices diff --git a/docs/tracking/llm-tracking.md b/docs/tracking/llm-tracking.md new file mode 100644 index 0000000..138cd7f --- /dev/null +++ b/docs/tracking/llm-tracking.md @@ -0,0 +1,332 @@ +# LLM Tracking + +Track AI model usage for accurate cost attribution across providers. + +## Overview + +Botanu provides LLM tracking that aligns with [OpenTelemetry GenAI Semantic Conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/). This ensures compatibility with standard observability tooling while enabling detailed cost analysis. + +## Basic Usage + +### Context Manager (Recommended) + +```python +from botanu.tracking.llm import track_llm_call + +with track_llm_call(provider="openai", model="gpt-4") as tracker: + response = await openai.chat.completions.create( + model="gpt-4", + messages=[{"role": "user", "content": "Hello"}] + ) + tracker.set_tokens( + input_tokens=response.usage.prompt_tokens, + output_tokens=response.usage.completion_tokens, + ) + tracker.set_request_id(response.id) +``` + +### What Gets Recorded + +| Attribute | Example | Description | +|-----------|---------|-------------| +| `gen_ai.operation.name` | `chat` | Type of operation | +| `gen_ai.provider.name` | `openai` | Normalized provider name | +| `gen_ai.request.model` | `gpt-4` | Requested model | +| `gen_ai.response.model` | `gpt-4-0613` | Actual model used | +| `gen_ai.usage.input_tokens` | `150` | Input/prompt tokens | +| `gen_ai.usage.output_tokens` | `200` | Output/completion tokens | +| `gen_ai.response.id` | `chatcmpl-...` | Provider request ID | + +## LLMTracker Methods + +### set_tokens() + +Record token usage from the response: + +```python +tracker.set_tokens( + input_tokens=150, + output_tokens=200, + cached_tokens=50, # For providers with caching + cache_read_tokens=50, # Anthropic-style cache read + cache_write_tokens=100, # Anthropic-style cache write +) +``` + +### set_request_id() + +Record provider and client request IDs for billing reconciliation: + +```python +tracker.set_request_id( + provider_request_id=response.id, # From provider response + client_request_id="my-client-123", # Your tracking ID +) +``` + +### set_response_model() + +When the response uses a different model than requested: + +```python +tracker.set_response_model("gpt-4-0613") +``` + +### set_request_params() + +Record request parameters for analysis: + +```python +tracker.set_request_params( + temperature=0.7, + top_p=0.9, + max_tokens=1000, + stop_sequences=["END"], + frequency_penalty=0.5, + presence_penalty=0.3, +) +``` + +### set_streaming() + +Mark as a streaming request: + +```python +tracker.set_streaming(True) +``` + +### set_cache_hit() + +Mark as a cache hit (for semantic caching): + +```python +tracker.set_cache_hit(True) +``` + +### set_attempt() + +Track retry attempts: + +```python +tracker.set_attempt(2) # Second attempt +``` + +### set_finish_reason() + +Record the stop reason: + +```python +tracker.set_finish_reason("stop") # or "length", "content_filter", etc. +``` + +### set_error() + +Record errors (automatically called on exceptions): + +```python +try: + response = await client.chat(...) +except openai.RateLimitError as e: + tracker.set_error(e) + raise +``` + +### add_metadata() + +Add custom attributes: + +```python +tracker.add_metadata( + prompt_version="v2.1", + experiment_id="exp-123", +) +``` + +## Operation Types + +Use `ModelOperation` constants for the `operation` parameter: + +```python +from botanu.tracking.llm import track_llm_call, ModelOperation + +# Chat completion +with track_llm_call(provider="openai", model="gpt-4", operation=ModelOperation.CHAT): + ... + +# Embeddings +with track_llm_call(provider="openai", model="text-embedding-3-small", operation=ModelOperation.EMBEDDINGS): + ... + +# Text completion (legacy) +with track_llm_call(provider="openai", model="davinci", operation=ModelOperation.TEXT_COMPLETION): + ... +``` + +Available operations: + +| Constant | Value | Use Case | +|----------|-------|----------| +| `CHAT` | `chat` | Chat completions (default) | +| `TEXT_COMPLETION` | `text_completion` | Legacy completions | +| `EMBEDDINGS` | `embeddings` | Embedding generation | +| `GENERATE_CONTENT` | `generate_content` | Generic content generation | +| `EXECUTE_TOOL` | `execute_tool` | Tool/function execution | +| `CREATE_AGENT` | `create_agent` | Agent creation | +| `INVOKE_AGENT` | `invoke_agent` | Agent invocation | +| `RERANK` | `rerank` | Reranking | +| `IMAGE_GENERATION` | `image_generation` | Image generation | +| `SPEECH_TO_TEXT` | `speech_to_text` | Transcription | +| `TEXT_TO_SPEECH` | `text_to_speech` | Speech synthesis | + +## Provider Normalization + +Provider names are automatically normalized: + +| Input | Normalized | +|-------|------------| +| `openai`, `OpenAI` | `openai` | +| `azure_openai`, `azure-openai` | `azure.openai` | +| `anthropic`, `claude` | `anthropic` | +| `bedrock`, `aws_bedrock` | `aws.bedrock` | +| `vertex`, `vertexai`, `gemini` | `gcp.vertex_ai` | +| `cohere` | `cohere` | +| `mistral`, `mistralai` | `mistral` | +| `together`, `togetherai` | `together` | +| `groq` | `groq` | + +## Tool/Function Tracking + +Track tool calls triggered by LLMs: + +```python +from botanu.tracking.llm import track_tool_call + +with track_tool_call(tool_name="search_database", tool_call_id="call_abc123") as tool: + results = await search_database(query) + tool.set_result( + success=True, + items_returned=len(results), + bytes_processed=1024, + ) +``` + +### ToolTracker Methods + +```python +# Set execution result +tool.set_result( + success=True, + items_returned=10, + bytes_processed=2048, +) + +# Set tool call ID from LLM response +tool.set_tool_call_id("call_abc123") + +# Record error +tool.set_error(exception) + +# Add custom metadata +tool.add_metadata(query_type="semantic") +``` + +## Standalone Helpers + +For cases where you can't use context managers: + +### set_llm_attributes() + +```python +from botanu.tracking.llm import set_llm_attributes + +set_llm_attributes( + provider="openai", + model="gpt-4", + operation="chat", + input_tokens=150, + output_tokens=200, + streaming=True, + provider_request_id="chatcmpl-...", +) +``` + +### set_token_usage() + +```python +from botanu.tracking.llm import set_token_usage + +set_token_usage( + input_tokens=150, + output_tokens=200, + cached_tokens=50, +) +``` + +## Decorator for Auto-Instrumentation + +For wrapping existing client methods: + +```python +from botanu.tracking.llm import llm_instrumented + +class MyOpenAIClient: + @llm_instrumented(provider="openai", tokens_from_response=True) + def chat(self, model: str, messages: list): + return openai.chat.completions.create(model=model, messages=messages) +``` + +## Metrics + +The SDK automatically records these metrics: + +| Metric | Type | Description | +|--------|------|-------------| +| `gen_ai.client.token.usage` | Histogram | Token counts by type | +| `gen_ai.client.operation.duration` | Histogram | Operation duration in seconds | +| `botanu.gen_ai.attempts` | Counter | Request attempts (including retries) | + +## Example: Multi-Provider Workflow + +```python +from botanu import botanu_use_case, emit_outcome +from botanu.tracking.llm import track_llm_call + +@botanu_use_case("Document Analysis") +async def analyze_with_fallback(document: str): + """Try Claude first, fall back to GPT-4.""" + + try: + with track_llm_call(provider="anthropic", model="claude-3-opus") as tracker: + tracker.set_attempt(1) + response = await anthropic_client.messages.create( + model="claude-3-opus-20240229", + messages=[{"role": "user", "content": document}] + ) + tracker.set_tokens( + input_tokens=response.usage.input_tokens, + output_tokens=response.usage.output_tokens, + ) + emit_outcome("success", value_type="analyses_completed", value_amount=1) + return response.content[0].text + + except anthropic.RateLimitError: + # Fallback to OpenAI + with track_llm_call(provider="openai", model="gpt-4") as tracker: + tracker.set_attempt(2) + response = await openai_client.chat.completions.create( + model="gpt-4", + messages=[{"role": "user", "content": document}] + ) + tracker.set_tokens( + input_tokens=response.usage.prompt_tokens, + output_tokens=response.usage.completion_tokens, + ) + emit_outcome("success", value_type="analyses_completed", value_amount=1) + return response.choices[0].message.content +``` + +## See Also + +- [Auto-Instrumentation](../integration/auto-instrumentation.md) - Automatic LLM tracking +- [Data Tracking](data-tracking.md) - Database and storage tracking +- [Outcomes](outcomes.md) - Recording business outcomes diff --git a/docs/tracking/outcomes.md b/docs/tracking/outcomes.md new file mode 100644 index 0000000..0e974ae --- /dev/null +++ b/docs/tracking/outcomes.md @@ -0,0 +1,363 @@ +# Outcomes + +Record business outcomes to enable cost-per-outcome analysis. + +## Overview + +Outcomes connect infrastructure costs to business value. By recording what was achieved (tickets resolved, documents processed, leads qualified), you can calculate the true ROI of your AI workflows. + +## Basic Usage + +```python +from botanu import botanu_use_case, emit_outcome + +@botanu_use_case("Customer Support") +async def handle_ticket(ticket_id: str): + # ... process ticket ... + + # Record the business outcome + emit_outcome("success", value_type="tickets_resolved", value_amount=1) +``` + +## emit_outcome() Parameters + +```python +emit_outcome( + status: str, # Required: "success", "partial", "failed" + value_type: str = None, # What was achieved + value_amount: float = None, # How much + confidence: float = None, # Confidence score (0.0-1.0) + reason: str = None, # Why (especially for failures) +) +``` + +### status + +The outcome status: + +| Status | Description | Use Case | +|--------|-------------|----------| +| `success` | Fully achieved goal | Ticket resolved, document processed | +| `partial` | Partially achieved | 3 of 5 items processed | +| `failed` | Did not achieve goal | Error, timeout, rejection | + +### value_type + +A descriptive label for what was achieved: + +```python +emit_outcome("success", value_type="tickets_resolved", value_amount=1) +emit_outcome("success", value_type="documents_processed", value_amount=5) +emit_outcome("success", value_type="leads_qualified", value_amount=1) +emit_outcome("success", value_type="revenue_generated", value_amount=499.99) +``` + +### value_amount + +The quantified value: + +```python +# Count +emit_outcome("success", value_type="emails_sent", value_amount=100) + +# Revenue +emit_outcome("success", value_type="order_value", value_amount=1299.99) + +# Score +emit_outcome("success", value_type="satisfaction_score", value_amount=4.5) +``` + +### confidence + +For probabilistic outcomes: + +```python +emit_outcome( + "success", + value_type="intent_classified", + value_amount=1, + confidence=0.92, +) +``` + +### reason + +Explain the outcome (especially for failures): + +```python +emit_outcome("failed", reason="rate_limit_exceeded") +emit_outcome("failed", reason="invalid_input") +emit_outcome("partial", reason="timeout_partial_results", value_amount=3) +``` + +## Outcome Patterns + +### Success with Value + +```python +@botanu_use_case("Order Processing") +async def process_order(order_id: str): + order = await fetch_order(order_id) + await fulfill_order(order) + + emit_outcome( + "success", + value_type="orders_fulfilled", + value_amount=1, + ) +``` + +### Success with Revenue + +```python +@botanu_use_case("Sales Bot") +async def handle_inquiry(inquiry_id: str): + result = await process_sale(inquiry_id) + + if result.sale_completed: + emit_outcome( + "success", + value_type="revenue_generated", + value_amount=result.order_total, + ) + else: + emit_outcome( + "partial", + value_type="leads_qualified", + value_amount=1, + ) +``` + +### Partial Success + +```python +@botanu_use_case("Batch Processing") +async def process_batch(items: list): + processed = 0 + for item in items: + try: + await process_item(item) + processed += 1 + except Exception: + continue + + if processed == len(items): + emit_outcome("success", value_type="items_processed", value_amount=processed) + elif processed > 0: + emit_outcome( + "partial", + value_type="items_processed", + value_amount=processed, + reason=f"processed_{processed}_of_{len(items)}", + ) + else: + emit_outcome("failed", reason="no_items_processed") +``` + +### Failure with Reason + +```python +@botanu_use_case("Document Analysis") +async def analyze_document(doc_id: str): + try: + document = await fetch_document(doc_id) + if not document: + emit_outcome("failed", reason="document_not_found") + return None + + result = await analyze(document) + emit_outcome("success", value_type="documents_analyzed", value_amount=1) + return result + + except RateLimitError: + emit_outcome("failed", reason="rate_limit_exceeded") + raise + except TimeoutError: + emit_outcome("failed", reason="analysis_timeout") + raise +``` + +### Classification with Confidence + +```python +@botanu_use_case("Intent Classification") +async def classify_intent(message: str): + result = await classifier.predict(message) + + emit_outcome( + "success", + value_type="intents_classified", + value_amount=1, + confidence=result.confidence, + ) + + return result.intent +``` + +## Automatic Outcomes + +The `@botanu_use_case` decorator automatically emits outcomes: + +```python +@botanu_use_case("My Use Case", auto_outcome_on_success=True) # Default +async def my_function(): + # If no exception and no explicit emit_outcome, emits "success" + return result +``` + +If an exception is raised, it automatically emits `"failed"` with the exception class as the reason. + +To disable: + +```python +@botanu_use_case("My Use Case", auto_outcome_on_success=False) +async def my_function(): + # Must call emit_outcome explicitly + emit_outcome("success") +``` + +## @botanu_outcome Decorator + +For sub-functions within a use case: + +```python +from botanu import botanu_use_case, botanu_outcome + +@botanu_use_case("Data Pipeline") +async def run_pipeline(): + await step_one() + await step_two() + +@botanu_outcome() +async def step_one(): + # Emits "success" on completion, "failed" on exception + await process_data() + +@botanu_outcome(success="data_extracted", failed="extraction_failed") +async def step_two(): + # Custom outcome labels + await extract_data() +``` + +## Span Attributes + +Outcomes are recorded as span attributes: + +| Attribute | Description | +|-----------|-------------| +| `botanu.outcome` | Status (success/partial/failed) | +| `botanu.outcome.value_type` | What was achieved | +| `botanu.outcome.value_amount` | Quantified value | +| `botanu.outcome.confidence` | Confidence score | +| `botanu.outcome.reason` | Reason for outcome | + +## Span Events + +An event is also emitted for timeline visibility: + +```python +# Event: botanu.outcome_emitted +# Attributes: +# status: "success" +# value_type: "tickets_resolved" +# value_amount: 1 +``` + +## Cost-Per-Outcome Analysis + +With outcomes recorded, you can calculate: + +```sql +-- Cost per successful ticket resolution +SELECT + AVG(total_cost) as avg_cost_per_resolution +FROM runs +WHERE use_case = 'Customer Support' + AND outcome_status = 'success' + AND outcome_value_type = 'tickets_resolved'; + +-- ROI by use case +SELECT + use_case, + SUM(outcome_value_amount * value_per_unit) as total_value, + SUM(total_cost) as total_cost, + (SUM(outcome_value_amount * value_per_unit) - SUM(total_cost)) / SUM(total_cost) as roi +FROM runs +GROUP BY use_case; +``` + +## Best Practices + +### 1. Always Record Outcomes + +Every use case should emit an outcome: + +```python +@botanu_use_case("My Use Case") +async def my_function(): + try: + result = await do_work() + emit_outcome("success", value_type="items_processed", value_amount=result.count) + return result + except Exception as e: + emit_outcome("failed", reason=type(e).__name__) + raise +``` + +### 2. Use Consistent Value Types + +Define standard value types for your organization: + +```python +# Good - consistent naming +emit_outcome("success", value_type="tickets_resolved", value_amount=1) +emit_outcome("success", value_type="documents_processed", value_amount=1) + +# Bad - inconsistent +emit_outcome("success", value_type="ticket_done", value_amount=1) +emit_outcome("success", value_type="doc processed", value_amount=1) +``` + +### 3. Quantify When Possible + +Include amounts for better analysis: + +```python +# Good - quantified +emit_outcome("success", value_type="emails_sent", value_amount=50) + +# Less useful - no amount +emit_outcome("success") +``` + +### 4. Include Reasons for Failures + +Always explain why something failed: + +```python +emit_outcome("failed", reason="api_rate_limit") +emit_outcome("failed", reason="invalid_input_format") +emit_outcome("failed", reason="model_unavailable") +``` + +### 5. One Outcome Per Run + +Emit only one outcome per use case execution: + +```python +@botanu_use_case("Process Items") +async def process_items(items): + successful = 0 + for item in items: + if await process(item): + successful += 1 + + # One outcome at the end + emit_outcome("success", value_type="items_processed", value_amount=successful) +``` + +## See Also + +- [Run Context](../concepts/run-context.md) - Understanding runs +- [LLM Tracking](llm-tracking.md) - Tracking LLM costs +- [Best Practices](../patterns/best-practices.md) - More patterns diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..224eb49 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,242 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +# --------------------------------------------------------------------------- +# Project metadata (PEP 621) +# --------------------------------------------------------------------------- +[project] +name = "botanu" +dynamic = ["version"] +description = "OpenTelemetry-native run-level cost attribution for AI workflows" +readme = "README.md" +license = "Apache-2.0" +requires-python = ">=3.9" +authors = [ + { name = "The Botanu Authors", email = "oss@botanu.ai" }, +] +keywords = [ + "opentelemetry", + "tracing", + "observability", + "ai", + "llm", + "cost-attribution", + "mlops", +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: System :: Monitoring", + "Typing :: Typed", +] + +# All dependencies included in base install. OTel instrumentation packages +# are lightweight shims (~few KB each) that silently no-op when the target +# library is not installed, so there is zero bloat for unused instrumentations. +dependencies = [ + # ── OTel core + SDK + exporter ───────────────────────────────── + "opentelemetry-api >= 1.20.0", + "opentelemetry-sdk >= 1.20.0", + "opentelemetry-exporter-otlp-proto-http >= 1.20.0", + "opentelemetry-instrumentation >= 0.41b0", + + # ── HTTP clients ─────────────────────────────────────────────── + "opentelemetry-instrumentation-httpx >= 0.41b0", + "opentelemetry-instrumentation-requests >= 0.41b0", + "opentelemetry-instrumentation-urllib3 >= 0.41b0", + "opentelemetry-instrumentation-urllib >= 0.41b0", + "opentelemetry-instrumentation-aiohttp-client >= 0.41b0", + "opentelemetry-instrumentation-aiohttp-server >= 0.41b0", + + # ── Web frameworks ───────────────────────────────────────────── + "opentelemetry-instrumentation-fastapi >= 0.41b0", + "opentelemetry-instrumentation-flask >= 0.41b0", + "opentelemetry-instrumentation-django >= 0.41b0", + "opentelemetry-instrumentation-starlette >= 0.41b0", + "opentelemetry-instrumentation-falcon >= 0.41b0", + "opentelemetry-instrumentation-pyramid >= 0.41b0", + "opentelemetry-instrumentation-tornado >= 0.41b0", + + # ── gRPC ─────────────────────────────────────────────────────── + "opentelemetry-instrumentation-grpc >= 0.41b0", + + # ── Databases ────────────────────────────────────────────────── + "opentelemetry-instrumentation-sqlalchemy >= 0.41b0", + "opentelemetry-instrumentation-psycopg2 >= 0.41b0", + "opentelemetry-instrumentation-psycopg >= 0.41b0", + "opentelemetry-instrumentation-asyncpg >= 0.41b0", + "opentelemetry-instrumentation-aiopg >= 0.41b0", + "opentelemetry-instrumentation-pymongo >= 0.41b0", + "opentelemetry-instrumentation-redis >= 0.41b0", + "opentelemetry-instrumentation-mysql >= 0.41b0", + "opentelemetry-instrumentation-mysqlclient >= 0.41b0", + "opentelemetry-instrumentation-pymysql >= 0.41b0", + "opentelemetry-instrumentation-sqlite3 >= 0.41b0", + "opentelemetry-instrumentation-elasticsearch >= 0.41b0", + "opentelemetry-instrumentation-cassandra >= 0.41b0", + "opentelemetry-instrumentation-tortoiseorm >= 0.41b0", + "opentelemetry-instrumentation-pymemcache >= 0.41b0", + + # ── Messaging / Task queues ──────────────────────────────────── + "opentelemetry-instrumentation-celery >= 0.41b0", + "opentelemetry-instrumentation-kafka-python >= 0.41b0", + "opentelemetry-instrumentation-confluent-kafka >= 0.41b0", + "opentelemetry-instrumentation-aiokafka >= 0.41b0", + "opentelemetry-instrumentation-pika >= 0.41b0", + "opentelemetry-instrumentation-aio-pika >= 0.41b0", + + # ── AWS ──────────────────────────────────────────────────────── + "opentelemetry-instrumentation-botocore >= 0.41b0", + "opentelemetry-instrumentation-boto3sqs >= 0.41b0", + + # ── GenAI / AI ───────────────────────────────────────────────── + "opentelemetry-instrumentation-openai-v2 >= 2.0b0", + "opentelemetry-instrumentation-anthropic >= 0.1b0", + "opentelemetry-instrumentation-vertexai >= 0.1b0", + "opentelemetry-instrumentation-google-generativeai >= 0.1b0", + "opentelemetry-instrumentation-langchain >= 0.1b0", + "opentelemetry-instrumentation-ollama >= 0.1b0", + "opentelemetry-instrumentation-crewai >= 0.1b0", + + # ── Runtime / Concurrency ────────────────────────────────────── + "opentelemetry-instrumentation-logging >= 0.41b0", + "opentelemetry-instrumentation-threading >= 0.41b0", +] + +[project.urls] +Homepage = "https://github.com/botanu-ai/botanu-sdk-python" +Documentation = "https://docs.botanu.ai" +Repository = "https://github.com/botanu-ai/botanu-sdk-python" +Changelog = "https://github.com/botanu-ai/botanu-sdk-python/blob/main/CHANGELOG.md" +Issues = "https://github.com/botanu-ai/botanu-sdk-python/issues" + +# --------------------------------------------------------------------------- +# Optional extras (dev only — base install includes everything) +# --------------------------------------------------------------------------- +[project.optional-dependencies] +dev = [ + "pytest >= 7.4.0", + "pytest-asyncio >= 0.21.0", + "pytest-cov >= 4.1.0", + "coverage[toml] >= 7.0", + "httpx >= 0.24.0", + "starlette >= 0.27.0, < 0.30.0; python_version < '3.10'", + "starlette >= 0.27.0; python_version >= '3.10'", + "ruff >= 0.4.0", + "mypy >= 1.7.0", + "pre-commit >= 3.5.0", +] + +# --------------------------------------------------------------------------- +# Hatch — build targets & versioning +# --------------------------------------------------------------------------- +[tool.hatch.version] +source = "vcs" + +[tool.hatch.version.raw-options] +version_scheme = "guess-next-dev" +local_scheme = "no-local-version" + +[tool.hatch.build.targets.sdist] +include = ["src/botanu/**", "LICENSE", "NOTICE", "README.md"] + +[tool.hatch.build.targets.wheel] +packages = ["src/botanu"] + +# --------------------------------------------------------------------------- +# Ruff (linter + formatter) +# --------------------------------------------------------------------------- +[tool.ruff] +line-length = 120 +target-version = "py39" +src = ["src"] + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "B", # flake8-bugbear + "UP", # pyupgrade + "S", # flake8-bandit (security) + "RUF", # ruff-specific +] +ignore = [ + "E501", # line too long — handled by formatter + "S101", # assert in tests is fine + "S110", # try-except-pass is intentional in resource detection + "UP006", # dict vs Dict — keep Dict[] for 3.9 compat + "UP007", # X | Y syntax — keep Optional[] for 3.9 compat + "UP035", # typing.Dict deprecated — keep for 3.9 compat + "UP045", # X | None vs Optional — keep Optional[] for 3.9 compat + "RUF002", # ambiguous dash — intentional in docstrings + "RUF022", # __all__ not sorted — grouped logically +] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["S101", "S106"] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +line-ending = "auto" + +# --------------------------------------------------------------------------- +# mypy +# --------------------------------------------------------------------------- +[tool.mypy] +python_version = "3.9" +warn_return_any = false +warn_unused_configs = true +ignore_missing_imports = true +strict = false +# OTel SDK types are not always precise; runtime behavior is correct +disable_error_code = ["arg-type", "attr-defined", "operator", "misc"] + +# --------------------------------------------------------------------------- +# pytest +# --------------------------------------------------------------------------- +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] +addopts = [ + "--strict-markers", + "--tb=short", +] +markers = [ + "integration: marks tests that require external services", +] + +# --------------------------------------------------------------------------- +# coverage +# --------------------------------------------------------------------------- +[tool.coverage.run] +source = ["botanu"] +branch = true + +[tool.coverage.report] +show_missing = true +fail_under = 70 +exclude_lines = [ + "pragma: no cover", + "if TYPE_CHECKING:", + "if __name__ == .__main__.", +] +# Exclude integration-heavy modules that require full OTel SDK setup +omit = [ + "src/botanu/sdk/bootstrap.py", + "src/botanu/sdk/middleware.py", +] diff --git a/src/botanu/__init__.py b/src/botanu/__init__.py new file mode 100644 index 0000000..527714b --- /dev/null +++ b/src/botanu/__init__.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Botanu SDK - OpenTelemetry-native cost attribution for AI workflows. + +Quick Start:: + + from botanu import enable, botanu_use_case, emit_outcome + + enable() # reads config from OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT env vars + + @botanu_use_case(name="Customer Support") + async def handle_request(data): + result = await process(data) + emit_outcome("success", value_type="tickets_resolved", value_amount=1) + return result +""" + +from __future__ import annotations + +from botanu._version import __version__ + +# Run context model +from botanu.models.run_context import RunContext, RunOutcome, RunStatus + +# Bootstrap +from botanu.sdk.bootstrap import ( + disable, + enable, + is_enabled, +) + +# Configuration +from botanu.sdk.config import BotanuConfig + +# Context helpers (core — no SDK dependency) +from botanu.sdk.context import ( + get_baggage, + get_current_span, + get_run_id, + get_use_case, + set_baggage, +) + +# Decorators (primary integration point) +from botanu.sdk.decorators import botanu_outcome, botanu_use_case, use_case + +# Span helpers +from botanu.sdk.span_helpers import emit_outcome, set_business_context + +__all__ = [ + "__version__", + # Bootstrap + "enable", + "disable", + "is_enabled", + # Configuration + "BotanuConfig", + # Decorators + "botanu_use_case", + "use_case", + "botanu_outcome", + # Span helpers + "emit_outcome", + "set_business_context", + "get_current_span", + # Context + "get_run_id", + "get_use_case", + "set_baggage", + "get_baggage", + # Run context + "RunContext", + "RunStatus", + "RunOutcome", +] diff --git a/src/botanu/_version.py b/src/botanu/_version.py new file mode 100644 index 0000000..e7fea48 --- /dev/null +++ b/src/botanu/_version.py @@ -0,0 +1,13 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Dynamic version from package metadata (set by hatch-vcs at build time).""" + +from __future__ import annotations + +try: + from importlib.metadata import version + + __version__: str = version("botanu") +except Exception: + __version__ = "0.0.0.dev0" diff --git a/src/botanu/models/__init__.py b/src/botanu/models/__init__.py new file mode 100644 index 0000000..2fa20c3 --- /dev/null +++ b/src/botanu/models/__init__.py @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Botanu data models.""" + +from __future__ import annotations + +from botanu.models.run_context import RunContext, RunOutcome, RunStatus + +__all__ = ["RunContext", "RunOutcome", "RunStatus"] diff --git a/src/botanu/models/run_context.py b/src/botanu/models/run_context.py new file mode 100644 index 0000000..264801f --- /dev/null +++ b/src/botanu/models/run_context.py @@ -0,0 +1,320 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Run Context - The core data model for Botanu runs. + +A "Run" is orthogonal to tracing: +- Trace context (W3C): ties distributed spans together (trace_id, span_id) +- Run context (Botanu): ties business execution together (run_id, use_case, outcome) + +Invariant: A run can span multiple traces (retries, async fanout). +The run_id must remain stable across those boundaries. +""" + +from __future__ import annotations + +import os +import time +from dataclasses import dataclass, field +from datetime import datetime, timezone +from enum import Enum +from typing import Dict, Optional, Union + + +def generate_run_id() -> str: + """Generate a UUIDv7-style sortable run ID. + + UUIDv7 provides: + - Sortable by time (first 48 bits are millisecond timestamp) + - Globally unique + - Compatible with UUID format + + Uses ``os.urandom()`` for ~2x faster generation than ``secrets``. + """ + timestamp_ms = int(time.time() * 1000) + + uuid_bytes = bytearray(16) + uuid_bytes[0] = (timestamp_ms >> 40) & 0xFF + uuid_bytes[1] = (timestamp_ms >> 32) & 0xFF + uuid_bytes[2] = (timestamp_ms >> 24) & 0xFF + uuid_bytes[3] = (timestamp_ms >> 16) & 0xFF + uuid_bytes[4] = (timestamp_ms >> 8) & 0xFF + uuid_bytes[5] = timestamp_ms & 0xFF + + random_bytes = os.urandom(10) + uuid_bytes[6] = 0x70 | (random_bytes[0] & 0x0F) + uuid_bytes[7] = random_bytes[1] + uuid_bytes[8] = 0x80 | (random_bytes[2] & 0x3F) + uuid_bytes[9:16] = random_bytes[3:10] + + hex_str = uuid_bytes.hex() + return f"{hex_str[:8]}-{hex_str[8:12]}-{hex_str[12:16]}-{hex_str[16:20]}-{hex_str[20:]}" + + +class RunStatus(str, Enum): + """Run outcome status.""" + + SUCCESS = "success" + FAILURE = "failure" + PARTIAL = "partial" + TIMEOUT = "timeout" + CANCELED = "canceled" + + +@dataclass +class RunOutcome: + """Outcome attached at run completion.""" + + status: RunStatus + reason_code: Optional[str] = None + error_class: Optional[str] = None + value_type: Optional[str] = None + value_amount: Optional[float] = None + confidence: Optional[float] = None + + +@dataclass +class RunContext: + """Canonical run context data model. + + Propagated via W3C Baggage and stored as span attributes. + + Retry model: + Each attempt gets a NEW run_id for clean cost accounting. + ``root_run_id`` stays stable across all attempts. + """ + + run_id: str + use_case: str + environment: str + workflow: Optional[str] = None + workflow_version: Optional[str] = None + tenant_id: Optional[str] = None + parent_run_id: Optional[str] = None + root_run_id: Optional[str] = None + attempt: int = 1 + retry_of_run_id: Optional[str] = None + start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + deadline: Optional[float] = None + cancelled: bool = False + cancelled_at: Optional[float] = None + outcome: Optional[RunOutcome] = None + + def __post_init__(self) -> None: + if self.root_run_id is None: + object.__setattr__(self, "root_run_id", self.run_id) + + # ------------------------------------------------------------------ + # Factory + # ------------------------------------------------------------------ + + @classmethod + def create( + cls, + use_case: str, + workflow: Optional[str] = None, + workflow_version: Optional[str] = None, + environment: Optional[str] = None, + tenant_id: Optional[str] = None, + parent_run_id: Optional[str] = None, + root_run_id: Optional[str] = None, + attempt: int = 1, + retry_of_run_id: Optional[str] = None, + deadline_seconds: Optional[float] = None, + ) -> RunContext: + """Create a new RunContext with auto-generated run_id.""" + env = environment or os.getenv("BOTANU_ENVIRONMENT") or os.getenv("DEPLOYMENT_ENVIRONMENT") or "production" + run_id = generate_run_id() + deadline = None + if deadline_seconds is not None: + deadline = time.time() + deadline_seconds + + return cls( + run_id=run_id, + use_case=use_case, + environment=env, + workflow=workflow, + workflow_version=workflow_version, + tenant_id=tenant_id, + parent_run_id=parent_run_id, + root_run_id=root_run_id or run_id, + attempt=attempt, + retry_of_run_id=retry_of_run_id, + deadline=deadline, + ) + + @classmethod + def create_retry(cls, previous: RunContext) -> RunContext: + """Create a new RunContext for a retry attempt.""" + return cls.create( + use_case=previous.use_case, + workflow=previous.workflow, + workflow_version=previous.workflow_version, + environment=previous.environment, + tenant_id=previous.tenant_id, + parent_run_id=previous.parent_run_id, + root_run_id=previous.root_run_id, + attempt=previous.attempt + 1, + retry_of_run_id=previous.run_id, + ) + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + def is_past_deadline(self) -> bool: + if self.deadline is None: + return False + return time.time() > self.deadline + + def is_cancelled(self) -> bool: + return self.cancelled or self.is_past_deadline() + + def request_cancellation(self, reason: str = "user") -> None: + self.cancelled = True + self.cancelled_at = time.time() + + def remaining_time_seconds(self) -> Optional[float]: + if self.deadline is None: + return None + return max(0.0, self.deadline - time.time()) + + def complete( + self, + status: RunStatus, + reason_code: Optional[str] = None, + error_class: Optional[str] = None, + value_type: Optional[str] = None, + value_amount: Optional[float] = None, + confidence: Optional[float] = None, + ) -> None: + self.outcome = RunOutcome( + status=status, + reason_code=reason_code, + error_class=error_class, + value_type=value_type, + value_amount=value_amount, + confidence=confidence, + ) + + @property + def duration_ms(self) -> Optional[float]: + if self.outcome is None: + return None + return (datetime.now(timezone.utc) - self.start_time).total_seconds() * 1000 + + # ------------------------------------------------------------------ + # Serialisation + # ------------------------------------------------------------------ + + def to_baggage_dict(self, lean_mode: Optional[bool] = None) -> Dict[str, str]: + """Convert to dict for W3C Baggage propagation.""" + if lean_mode is None: + env_mode = os.getenv("BOTANU_PROPAGATION_MODE", "lean") + lean_mode = env_mode != "full" + + baggage: Dict[str, str] = { + "botanu.run_id": self.run_id, + "botanu.use_case": self.use_case, + } + if lean_mode: + return baggage + + baggage["botanu.environment"] = self.environment + if self.workflow: + baggage["botanu.workflow"] = self.workflow + if self.tenant_id: + baggage["botanu.tenant_id"] = self.tenant_id + if self.parent_run_id: + baggage["botanu.parent_run_id"] = self.parent_run_id + if self.root_run_id and self.root_run_id != self.run_id: + baggage["botanu.root_run_id"] = self.root_run_id + if self.attempt > 1: + baggage["botanu.attempt"] = str(self.attempt) + if self.retry_of_run_id: + baggage["botanu.retry_of_run_id"] = self.retry_of_run_id + if self.deadline is not None: + baggage["botanu.deadline"] = str(int(self.deadline * 1000)) + if self.cancelled: + baggage["botanu.cancelled"] = "true" + return baggage + + def to_span_attributes(self) -> Dict[str, Union[str, float, int, bool]]: + """Convert to dict for span attributes.""" + attrs: Dict[str, Union[str, float, int, bool]] = { + "botanu.run_id": self.run_id, + "botanu.use_case": self.use_case, + "botanu.environment": self.environment, + "botanu.run.start_time": self.start_time.isoformat(), + } + if self.workflow: + attrs["botanu.workflow"] = self.workflow + if self.workflow_version: + attrs["botanu.workflow.version"] = self.workflow_version + if self.tenant_id: + attrs["botanu.tenant_id"] = self.tenant_id + if self.parent_run_id: + attrs["botanu.parent_run_id"] = self.parent_run_id + attrs["botanu.root_run_id"] = self.root_run_id or self.run_id + attrs["botanu.attempt"] = self.attempt + if self.retry_of_run_id: + attrs["botanu.retry_of_run_id"] = self.retry_of_run_id + if self.deadline is not None: + attrs["botanu.run.deadline_ts"] = self.deadline + if self.cancelled: + attrs["botanu.run.cancelled"] = True + if self.cancelled_at: + attrs["botanu.run.cancelled_at"] = self.cancelled_at + if self.outcome: + attrs["botanu.outcome.status"] = self.outcome.status.value + if self.outcome.reason_code: + attrs["botanu.outcome.reason_code"] = self.outcome.reason_code + if self.outcome.error_class: + attrs["botanu.outcome.error_class"] = self.outcome.error_class + if self.outcome.value_type: + attrs["botanu.outcome.value_type"] = self.outcome.value_type + if self.outcome.value_amount is not None: + attrs["botanu.outcome.value_amount"] = self.outcome.value_amount + if self.outcome.confidence is not None: + attrs["botanu.outcome.confidence"] = self.outcome.confidence + if self.duration_ms is not None: + attrs["botanu.run.duration_ms"] = self.duration_ms + return attrs + + @classmethod + def from_baggage(cls, baggage: Dict[str, str]) -> Optional[RunContext]: + """Reconstruct RunContext from baggage dict.""" + run_id = baggage.get("botanu.run_id") + use_case = baggage.get("botanu.use_case") + if not run_id or not use_case: + return None + + attempt_str = baggage.get("botanu.attempt", "1") + try: + attempt = int(attempt_str) + except ValueError: + attempt = 1 + + deadline: Optional[float] = None + deadline_str = baggage.get("botanu.deadline") + if deadline_str: + try: + deadline = float(deadline_str) / 1000.0 + except ValueError: + pass + + cancelled = baggage.get("botanu.cancelled", "").lower() == "true" + + return cls( + run_id=run_id, + use_case=use_case, + environment=baggage.get("botanu.environment", "unknown"), + workflow=baggage.get("botanu.workflow"), + tenant_id=baggage.get("botanu.tenant_id"), + parent_run_id=baggage.get("botanu.parent_run_id"), + root_run_id=baggage.get("botanu.root_run_id") or run_id, + attempt=attempt, + retry_of_run_id=baggage.get("botanu.retry_of_run_id"), + deadline=deadline, + cancelled=cancelled, + ) diff --git a/src/botanu/processors/__init__.py b/src/botanu/processors/__init__.py new file mode 100644 index 0000000..680a413 --- /dev/null +++ b/src/botanu/processors/__init__.py @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Botanu span processors. + +Only :class:`RunContextEnricher` is needed in the SDK. +All other processing should happen in the OTel Collector. +""" + +from botanu.processors.enricher import RunContextEnricher + +__all__ = ["RunContextEnricher"] diff --git a/src/botanu/processors/enricher.py b/src/botanu/processors/enricher.py new file mode 100644 index 0000000..85b3f78 --- /dev/null +++ b/src/botanu/processors/enricher.py @@ -0,0 +1,81 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""RunContextEnricher — the only span processor needed in the SDK. + +Why this MUST be in SDK (not collector): +- Baggage is process-local (not sent over the wire). +- Only the SDK can read baggage and write it to span attributes. +- The collector only sees spans after they're exported. + +All heavy processing should happen in the OTel Collector: +- PII redaction → ``redactionprocessor`` +- Cardinality limits → ``attributesprocessor`` +- Vendor detection → ``transformprocessor`` +""" + +from __future__ import annotations + +import logging +from typing import ClassVar, List, Optional + +from opentelemetry import baggage, context +from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor +from opentelemetry.trace import Span + +logger = logging.getLogger(__name__) + + +class RunContextEnricher(SpanProcessor): + """Enriches ALL spans with run context from baggage. + + This ensures that every span (including auto-instrumented ones) + gets ``botanu.run_id``, ``botanu.use_case``, etc. attributes. + + Without this processor, only the root ``botanu.run`` span would + have these attributes. + + In ``lean_mode`` (default), only ``run_id`` and ``use_case`` are + propagated to minimise per-span overhead. + """ + + BAGGAGE_KEYS_FULL: ClassVar[List[str]] = [ + "botanu.run_id", + "botanu.use_case", + "botanu.workflow", + "botanu.environment", + "botanu.tenant_id", + "botanu.parent_run_id", + ] + + BAGGAGE_KEYS_LEAN: ClassVar[List[str]] = [ + "botanu.run_id", + "botanu.use_case", + ] + + def __init__(self, lean_mode: bool = True) -> None: + self._lean_mode = lean_mode + self._baggage_keys = self.BAGGAGE_KEYS_LEAN if lean_mode else self.BAGGAGE_KEYS_FULL + + def on_start( + self, + span: Span, + parent_context: Optional[context.Context] = None, + ) -> None: + """Called when a span starts — enrich with run context from baggage.""" + ctx = parent_context or context.get_current() + + for key in self._baggage_keys: + value = baggage.get_baggage(key, ctx) + if value: + if not span.attributes or key not in span.attributes: + span.set_attribute(key, value) + + def on_end(self, span: ReadableSpan) -> None: + pass + + def shutdown(self) -> None: + pass + + def force_flush(self, timeout_millis: int = 30000) -> bool: + return True diff --git a/src/botanu/py.typed b/src/botanu/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/botanu/resources/__init__.py b/src/botanu/resources/__init__.py new file mode 100644 index 0000000..474c051 --- /dev/null +++ b/src/botanu/resources/__init__.py @@ -0,0 +1,8 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Botanu resource detection.""" + +from botanu.resources.detector import detect_all_resources, get_resource_attributes + +__all__ = ["detect_all_resources", "get_resource_attributes"] diff --git a/src/botanu/resources/detector.py b/src/botanu/resources/detector.py new file mode 100644 index 0000000..1a6bf50 --- /dev/null +++ b/src/botanu/resources/detector.py @@ -0,0 +1,366 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Resource Detector — auto-detect execution environment for cost attribution. + +Detects attributes from: +- Kubernetes (``k8s.*``) +- Cloud providers (``cloud.*``, ``aws.*``, ``gcp.*``, ``azure.*``) +- Host / VM (``host.*``, ``os.*``) +- Container (``container.*``) +- Serverless / FaaS (``faas.*``) +- Process (``process.*``) +""" + +from __future__ import annotations + +import os +import platform +import socket +import sys +from functools import lru_cache +from typing import Any, Dict, Optional + +# ========================================================================= +# Environment Variable Mappings +# ========================================================================= + +K8S_ENV_MAPPINGS: Dict[str, Optional[str]] = { + "KUBERNETES_SERVICE_HOST": None, + "HOSTNAME": "k8s.pod.name", + "K8S_POD_NAME": "k8s.pod.name", + "K8S_POD_UID": "k8s.pod.uid", + "K8S_NAMESPACE": "k8s.namespace.name", + "K8S_NODE_NAME": "k8s.node.name", + "K8S_CLUSTER_NAME": "k8s.cluster.name", + "K8S_DEPLOYMENT_NAME": "k8s.deployment.name", + "K8S_STATEFULSET_NAME": "k8s.statefulset.name", + "K8S_CONTAINER_NAME": "k8s.container.name", +} + +AWS_ENV_MAPPINGS: Dict[str, Optional[str]] = { + "AWS_REGION": "cloud.region", + "AWS_DEFAULT_REGION": "cloud.region", + "AWS_ACCOUNT_ID": "cloud.account.id", + "ECS_CONTAINER_METADATA_URI": None, + "ECS_CLUSTER": "aws.ecs.cluster.name", + "ECS_TASK_ARN": "aws.ecs.task.arn", + "ECS_TASK_DEFINITION_FAMILY": "aws.ecs.task.family", + "AWS_LAMBDA_FUNCTION_NAME": "faas.name", + "AWS_LAMBDA_FUNCTION_VERSION": "faas.version", + "AWS_LAMBDA_LOG_GROUP_NAME": "aws.lambda.log_group", + "AWS_LAMBDA_FUNCTION_MEMORY_SIZE": "faas.max_memory", +} + +GCP_ENV_MAPPINGS: Dict[str, Optional[str]] = { + "GOOGLE_CLOUD_PROJECT": "cloud.account.id", + "GCLOUD_PROJECT": "cloud.account.id", + "GCP_PROJECT": "cloud.account.id", + "GOOGLE_CLOUD_REGION": "cloud.region", + "K_SERVICE": "faas.name", + "K_REVISION": "faas.version", + "K_CONFIGURATION": "gcp.cloud_run.configuration", + "FUNCTION_NAME": "faas.name", + "FUNCTION_TARGET": "faas.trigger", + "FUNCTION_SIGNATURE_TYPE": "gcp.function.signature_type", +} + +AZURE_ENV_MAPPINGS: Dict[str, Optional[str]] = { + "AZURE_SUBSCRIPTION_ID": "cloud.account.id", + "AZURE_RESOURCE_GROUP": "azure.resource_group", + "WEBSITE_SITE_NAME": "faas.name", + "FUNCTIONS_EXTENSION_VERSION": "azure.functions.version", + "WEBSITE_INSTANCE_ID": "faas.instance", + "REGION_NAME": "cloud.region", +} + + +# ========================================================================= +# Detection Functions +# ========================================================================= + + +def detect_kubernetes() -> Dict[str, Any]: + attrs: Dict[str, Any] = {} + if not os.environ.get("KUBERNETES_SERVICE_HOST"): + return attrs + + for env_var, attr_name in K8S_ENV_MAPPINGS.items(): + value = os.environ.get(env_var) + if attr_name and value: + attrs[attr_name] = value + + if "k8s.pod.name" not in attrs: + hostname = os.environ.get("HOSTNAME", socket.gethostname()) + if hostname: + attrs["k8s.pod.name"] = hostname + + namespace_file = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" + if "k8s.namespace.name" not in attrs and os.path.exists(namespace_file): + try: + with open(namespace_file) as fh: + attrs["k8s.namespace.name"] = fh.read().strip() + except OSError: + pass + + return attrs + + +def detect_cloud_provider() -> Dict[str, Any]: + attrs: Dict[str, Any] = {} + + if _is_aws(): + attrs["cloud.provider"] = "aws" + for env_var, attr_name in AWS_ENV_MAPPINGS.items(): + value = os.environ.get(env_var) + if attr_name and value: + attrs[attr_name] = value + + if os.environ.get("AWS_LAMBDA_FUNCTION_NAME"): + attrs["faas.id"] = ( + f"arn:aws:lambda:{attrs.get('cloud.region', 'unknown')}:" + f"{attrs.get('cloud.account.id', 'unknown')}:" + f"function:{os.environ['AWS_LAMBDA_FUNCTION_NAME']}" + ) + + az = _get_aws_availability_zone() + if az: + attrs["cloud.availability_zone"] = az + if "cloud.region" not in attrs: + attrs["cloud.region"] = az[:-1] + + elif _is_gcp(): + attrs["cloud.provider"] = "gcp" + for env_var, attr_name in GCP_ENV_MAPPINGS.items(): + value = os.environ.get(env_var) + if attr_name and value: + attrs[attr_name] = value + if os.environ.get("K_SERVICE"): + attrs["faas.trigger"] = "http" + elif os.environ.get("FUNCTION_NAME"): + attrs["faas.trigger"] = os.environ.get("FUNCTION_TRIGGER_TYPE", "unknown") + + elif _is_azure(): + attrs["cloud.provider"] = "azure" + for env_var, attr_name in AZURE_ENV_MAPPINGS.items(): + value = os.environ.get(env_var) + if attr_name and value: + attrs[attr_name] = value + + return attrs + + +def _is_aws() -> bool: + indicators = [ + "AWS_REGION", + "AWS_DEFAULT_REGION", + "AWS_LAMBDA_FUNCTION_NAME", + "ECS_CONTAINER_METADATA_URI", + "AWS_EXECUTION_ENV", + ] + return any(os.environ.get(var) for var in indicators) + + +def _is_gcp() -> bool: + indicators = [ + "GOOGLE_CLOUD_PROJECT", + "GCLOUD_PROJECT", + "GCP_PROJECT", + "K_SERVICE", + "FUNCTION_NAME", + ] + return any(os.environ.get(var) for var in indicators) + + +def _is_azure() -> bool: + indicators = [ + "WEBSITE_SITE_NAME", + "AZURE_FUNCTIONS_ENVIRONMENT", + "AZURE_SUBSCRIPTION_ID", + ] + return any(os.environ.get(var) for var in indicators) + + +def _get_aws_availability_zone() -> Optional[str]: + """Get AWS availability zone from EC2 instance metadata. + + Uses IMDS (Instance Metadata Service) which is only accessible from within EC2. + Configure via environment variables: + - AWS_EC2_METADATA_SERVICE_ENDPOINT: Override the metadata endpoint + - AWS_EC2_METADATA_DISABLED: Set to 'true' to disable metadata calls + """ + if os.environ.get("AWS_LAMBDA_FUNCTION_NAME"): + return None + + # Respect AWS SDK standard env vars for disabling/configuring metadata + if os.environ.get("AWS_EC2_METADATA_DISABLED", "").lower() == "true": + return None + + # Use AWS SDK standard endpoint override, or default to standard IMDS address + endpoint = os.environ.get("AWS_EC2_METADATA_SERVICE_ENDPOINT", "http://169.254.169.254") + if not endpoint or not endpoint.startswith(("http://", "https://")): + return None + + try: + import urllib.request + + url = f"{endpoint}/latest/meta-data/placement/availability-zone" + req = urllib.request.Request(url, headers={"Accept": "text/plain"}) # noqa: S310 + with urllib.request.urlopen(req, timeout=0.5) as resp: # noqa: S310 + return resp.read().decode("utf-8").strip() + except Exception: + return None + + +def detect_host() -> Dict[str, Any]: + attrs: Dict[str, Any] = {} + try: + hostname = socket.gethostname() + if hostname: + attrs["host.name"] = hostname + except Exception: + pass + + host_id = os.environ.get("HOST_ID") or os.environ.get("INSTANCE_ID") + if host_id: + attrs["host.id"] = host_id + elif "host.name" in attrs: + attrs["host.id"] = attrs["host.name"] + + attrs["os.type"] = sys.platform + attrs["host.arch"] = platform.machine() + return attrs + + +def detect_container() -> Dict[str, Any]: + attrs: Dict[str, Any] = {} + container_id = _get_container_id() + if container_id: + attrs["container.id"] = container_id + + if os.path.exists("/.dockerenv"): + attrs["container.runtime"] = "docker" + elif os.environ.get("KUBERNETES_SERVICE_HOST"): + attrs["container.runtime"] = "containerd" + return attrs + + +def _get_container_id() -> Optional[str]: + container_id = os.environ.get("CONTAINER_ID") or os.environ.get("HOSTNAME") + + cgroup_path = "/proc/self/cgroup" + if os.path.exists(cgroup_path): + try: + with open(cgroup_path) as fh: + for line in fh: + if "docker" in line or "kubepods" in line: + parts = line.strip().split("/") + if parts: + last = parts[-1] + if last.startswith("cri-containerd-"): + last = last[15:] + if len(last) >= 12: + return last[:64] + except OSError: + pass + + return container_id if container_id and len(container_id) >= 12 else None + + +def detect_process() -> Dict[str, Any]: + attrs: Dict[str, Any] = {} + attrs["process.pid"] = os.getpid() + attrs["process.runtime.name"] = "python" + attrs["process.runtime.version"] = sys.version.split()[0] + if sys.argv: + attrs["process.command"] = sys.argv[0][:200] + return attrs + + +def detect_serverless() -> Dict[str, Any]: + attrs: Dict[str, Any] = {} + + if os.environ.get("AWS_LAMBDA_FUNCTION_NAME"): + attrs["faas.name"] = os.environ["AWS_LAMBDA_FUNCTION_NAME"] + version = os.environ.get("AWS_LAMBDA_FUNCTION_VERSION") + if version: + attrs["faas.version"] = version + memory = os.environ.get("AWS_LAMBDA_FUNCTION_MEMORY_SIZE") + if memory: + attrs["faas.max_memory"] = int(memory) * 1024 * 1024 + + elif os.environ.get("K_SERVICE"): + attrs["faas.name"] = os.environ["K_SERVICE"] + revision = os.environ.get("K_REVISION") + if revision: + attrs["faas.version"] = revision + + elif os.environ.get("FUNCTION_NAME"): + attrs["faas.name"] = os.environ["FUNCTION_NAME"] + target = os.environ.get("FUNCTION_TARGET") + if target: + attrs["faas.trigger"] = target + + elif os.environ.get("WEBSITE_SITE_NAME"): + attrs["faas.name"] = os.environ["WEBSITE_SITE_NAME"] + instance = os.environ.get("WEBSITE_INSTANCE_ID") + if instance: + attrs["faas.instance"] = instance + + return attrs + + +# ========================================================================= +# Main Detection +# ========================================================================= + + +@lru_cache(maxsize=1) +def detect_all_resources() -> Dict[str, Any]: + """Detect all environment resource attributes. + + Results are cached (environment doesn't change during runtime). + """ + attrs: Dict[str, Any] = {} + attrs.update(detect_host()) + attrs.update(detect_process()) + attrs.update(detect_container()) + attrs.update(detect_cloud_provider()) + attrs.update(detect_kubernetes()) + attrs.update(detect_serverless()) + + if "service.instance.id" not in attrs: + container_id = attrs.get("container.id") + if container_id: + attrs["service.instance.id"] = container_id[:12] + elif pod_name := attrs.get("k8s.pod.name"): + attrs["service.instance.id"] = pod_name + elif host_id := attrs.get("host.id"): + attrs["service.instance.id"] = host_id + + return attrs + + +def get_resource_attributes( + include_host: bool = True, + include_process: bool = True, + include_container: bool = True, + include_cloud: bool = True, + include_k8s: bool = True, + include_faas: bool = True, +) -> Dict[str, Any]: + """Get resource attributes with selective detection.""" + attrs: Dict[str, Any] = {} + if include_host: + attrs.update(detect_host()) + if include_process: + attrs.update(detect_process()) + if include_container: + attrs.update(detect_container()) + if include_cloud: + attrs.update(detect_cloud_provider()) + if include_k8s: + attrs.update(detect_kubernetes()) + if include_faas: + attrs.update(detect_serverless()) + return attrs diff --git a/src/botanu/sdk/__init__.py b/src/botanu/sdk/__init__.py new file mode 100644 index 0000000..2a6229d --- /dev/null +++ b/src/botanu/sdk/__init__.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Botanu SDK core components.""" + +from __future__ import annotations + +from botanu.sdk.bootstrap import disable, enable, get_config, is_enabled +from botanu.sdk.config import BotanuConfig +from botanu.sdk.context import ( + get_baggage, + get_current_span, + get_run_id, + get_use_case, + get_workflow, + set_baggage, +) +from botanu.sdk.decorators import botanu_outcome, botanu_use_case, use_case +from botanu.sdk.span_helpers import emit_outcome, set_business_context + +__all__ = [ + "BotanuConfig", + "botanu_outcome", + "botanu_use_case", + "disable", + "emit_outcome", + "enable", + "get_baggage", + "get_config", + "get_current_span", + "get_run_id", + "get_use_case", + "get_workflow", + "is_enabled", + "set_baggage", + "set_business_context", + "use_case", +] diff --git a/src/botanu/sdk/bootstrap.py b/src/botanu/sdk/bootstrap.py new file mode 100644 index 0000000..879bffd --- /dev/null +++ b/src/botanu/sdk/bootstrap.py @@ -0,0 +1,381 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Botanu Bootstrap — one-switch enablement for OTEL auto-instrumentation. + +This is the "Botanu OTel Distribution" — a curated bundle that: + +1. Configures OTEL SDK with OTLP exporter +2. Enables OTEL auto-instrumentation for popular libraries +3. Adds :class:`~botanu.processors.enricher.RunContextEnricher` + (propagates ``run_id`` to all spans) +4. Sets up W3C TraceContext + Baggage propagators + +Usage:: + + from botanu import enable + enable() # reads OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT from env +""" + +from __future__ import annotations + +import logging +import os +import threading +from typing import TYPE_CHECKING, List, Optional + +if TYPE_CHECKING: + from botanu.sdk.config import BotanuConfig + +logger = logging.getLogger(__name__) + +_lock = threading.RLock() +_initialized = False +_current_config: Optional[BotanuConfig] = None + + +def enable( + service_name: Optional[str] = None, + otlp_endpoint: Optional[str] = None, + environment: Optional[str] = None, + auto_instrumentation: bool = True, + propagators: Optional[List[str]] = None, + log_level: str = "INFO", + config: Optional[BotanuConfig] = None, + config_file: Optional[str] = None, +) -> bool: + """Enable Botanu SDK with OTEL auto-instrumentation. + + This is the ONE function customers need to call to get full observability. + + Args: + service_name: Service name. + otlp_endpoint: OTLP collector endpoint. + environment: Deployment environment. + auto_instrumentation: Enable OTEL auto-instrumentation (default: ``True``). + propagators: List of propagators (default: ``["tracecontext", "baggage"]``). + log_level: Logging level (default: ``"INFO"``). + config: Full :class:`BotanuConfig` (overrides individual params). + config_file: Path to YAML config file. + + Returns: + ``True`` if successfully initialized, ``False`` if already initialized. + """ + global _initialized, _current_config + + with _lock: + if _initialized: + logger.warning("Botanu SDK already initialized") + return False + + logging.basicConfig(level=getattr(logging, log_level.upper())) + + from botanu.sdk.config import BotanuConfig as ConfigClass + + if config is not None: + cfg = config + elif config_file is not None: + cfg = ConfigClass.from_yaml(config_file) + else: + cfg = ConfigClass.from_file_or_env() + + if service_name is not None: + cfg.service_name = service_name + if otlp_endpoint is not None: + cfg.otlp_endpoint = otlp_endpoint + if environment is not None: + cfg.deployment_environment = environment + + _current_config = cfg + + traces_endpoint = cfg.otlp_endpoint + if traces_endpoint and not traces_endpoint.endswith("/v1/traces"): + traces_endpoint = f"{traces_endpoint.rstrip('/')}/v1/traces" + + otel_sampler_env = os.getenv("OTEL_TRACES_SAMPLER") + if otel_sampler_env and otel_sampler_env != "always_on": + logger.warning( + "OTEL_TRACES_SAMPLER=%s is set but Botanu enforces ALWAYS_ON. No spans will be sampled or dropped.", + otel_sampler_env, + ) + + logger.info( + "Initializing Botanu SDK: service=%s, env=%s, endpoint=%s", + cfg.service_name, + cfg.deployment_environment, + traces_endpoint, + ) + + try: + from opentelemetry import trace + from opentelemetry.baggage.propagation import W3CBaggagePropagator + from opentelemetry.propagate import set_global_textmap + from opentelemetry.propagators.composite import CompositePropagator + from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator + except ImportError as exc: + logger.error("Missing opentelemetry-api. Install with: pip install botanu") + raise ImportError("opentelemetry-api is required. Install with: pip install botanu") from exc + + try: + from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter + from opentelemetry.sdk.resources import Resource + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + from opentelemetry.sdk.trace.sampling import ALWAYS_ON + except ImportError as exc: + logger.error("Missing OTel SDK dependencies. Install with: pip install botanu") + raise ImportError("OTel SDK and exporter required for enable(). Install with: pip install botanu") from exc + + try: + from botanu._version import __version__ + from botanu.processors import RunContextEnricher + from botanu.resources.detector import detect_all_resources + + resource_attrs = { + "service.name": cfg.service_name, + "deployment.environment": cfg.deployment_environment, + "telemetry.sdk.name": "botanu", + "telemetry.sdk.version": __version__, + } + if cfg.service_version: + resource_attrs["service.version"] = cfg.service_version + if cfg.service_namespace: + resource_attrs["service.namespace"] = cfg.service_namespace + + if cfg.auto_detect_resources: + detected = detect_all_resources() + for key, value in detected.items(): + if key not in resource_attrs: + resource_attrs[key] = value + if detected: + logger.debug("Auto-detected resources: %s", list(detected.keys())) + + resource = Resource.create(resource_attrs) + + existing = trace.get_tracer_provider() + if isinstance(existing, TracerProvider): + provider = existing + logger.info("Reusing existing TracerProvider — adding Botanu processors") + else: + provider = TracerProvider(resource=resource, sampler=ALWAYS_ON) + trace.set_tracer_provider(provider) + + lean_mode = cfg.propagation_mode == "lean" + provider.add_span_processor(RunContextEnricher(lean_mode=lean_mode)) + + exporter = OTLPSpanExporter( + endpoint=traces_endpoint, + headers=cfg.otlp_headers or {}, + ) + provider.add_span_processor( + BatchSpanProcessor( + exporter, + max_export_batch_size=cfg.max_export_batch_size, + max_queue_size=cfg.max_queue_size, + schedule_delay_millis=cfg.schedule_delay_millis, + export_timeout_millis=cfg.export_timeout_millis, + ) + ) + + set_global_textmap( + CompositePropagator( + [ + TraceContextTextMapPropagator(), + W3CBaggagePropagator(), + ] + ) + ) + + logger.info("Botanu SDK tracing initialized") + + if auto_instrumentation: + _enable_auto_instrumentation() + + _initialized = True + return True + + except Exception as exc: + logger.error("Failed to initialize Botanu SDK: %s", exc, exc_info=True) + return False + + +def _enable_auto_instrumentation() -> None: + """Enable OTEL auto-instrumentation for common libraries. + + Each instrumentation is optional — if the underlying library or + instrumentation package isn't installed, it is silently skipped. + """ + enabled: List[str] = [] + failed: List[tuple[str, str]] = [] + + # ── HTTP clients ────────────────────────────────────────────── + _try_instrument(enabled, failed, "httpx", "opentelemetry.instrumentation.httpx", "HTTPXClientInstrumentor") + _try_instrument(enabled, failed, "requests", "opentelemetry.instrumentation.requests", "RequestsInstrumentor") + _try_instrument(enabled, failed, "urllib3", "opentelemetry.instrumentation.urllib3", "URLLib3Instrumentor") + _try_instrument(enabled, failed, "urllib", "opentelemetry.instrumentation.urllib", "URLLibInstrumentor") + _try_instrument( + enabled, failed, "aiohttp_client", "opentelemetry.instrumentation.aiohttp_client", "AioHttpClientInstrumentor" + ) + _try_instrument( + enabled, failed, "aiohttp_server", "opentelemetry.instrumentation.aiohttp_server", "AioHttpServerInstrumentor" + ) + + # ── Web frameworks ──────────────────────────────────────────── + _try_instrument(enabled, failed, "fastapi", "opentelemetry.instrumentation.fastapi", "FastAPIInstrumentor") + _try_instrument(enabled, failed, "flask", "opentelemetry.instrumentation.flask", "FlaskInstrumentor") + _try_instrument(enabled, failed, "django", "opentelemetry.instrumentation.django", "DjangoInstrumentor") + _try_instrument(enabled, failed, "starlette", "opentelemetry.instrumentation.starlette", "StarletteInstrumentor") + _try_instrument(enabled, failed, "falcon", "opentelemetry.instrumentation.falcon", "FalconInstrumentor") + _try_instrument(enabled, failed, "pyramid", "opentelemetry.instrumentation.pyramid", "PyramidInstrumentor") + _try_instrument(enabled, failed, "tornado", "opentelemetry.instrumentation.tornado", "TornadoInstrumentor") + + # ── Databases ───────────────────────────────────────────────── + _try_instrument(enabled, failed, "sqlalchemy", "opentelemetry.instrumentation.sqlalchemy", "SQLAlchemyInstrumentor") + _try_instrument(enabled, failed, "psycopg2", "opentelemetry.instrumentation.psycopg2", "Psycopg2Instrumentor") + _try_instrument(enabled, failed, "psycopg", "opentelemetry.instrumentation.psycopg", "PsycopgInstrumentor") + _try_instrument(enabled, failed, "asyncpg", "opentelemetry.instrumentation.asyncpg", "AsyncPGInstrumentor") + _try_instrument(enabled, failed, "aiopg", "opentelemetry.instrumentation.aiopg", "AiopgInstrumentor") + _try_instrument(enabled, failed, "pymongo", "opentelemetry.instrumentation.pymongo", "PymongoInstrumentor") + _try_instrument(enabled, failed, "redis", "opentelemetry.instrumentation.redis", "RedisInstrumentor") + _try_instrument(enabled, failed, "mysql", "opentelemetry.instrumentation.mysql", "MySQLInstrumentor") + _try_instrument( + enabled, failed, "mysqlclient", "opentelemetry.instrumentation.mysqlclient", "MySQLClientInstrumentor" + ) + _try_instrument(enabled, failed, "pymysql", "opentelemetry.instrumentation.pymysql", "PyMySQLInstrumentor") + _try_instrument(enabled, failed, "sqlite3", "opentelemetry.instrumentation.sqlite3", "SQLite3Instrumentor") + _try_instrument( + enabled, failed, "elasticsearch", "opentelemetry.instrumentation.elasticsearch", "ElasticsearchInstrumentor" + ) + _try_instrument(enabled, failed, "cassandra", "opentelemetry.instrumentation.cassandra", "CassandraInstrumentor") + _try_instrument( + enabled, failed, "tortoise_orm", "opentelemetry.instrumentation.tortoiseorm", "TortoiseORMInstrumentor" + ) + + # ── Caching ─────────────────────────────────────────────────── + _try_instrument(enabled, failed, "pymemcache", "opentelemetry.instrumentation.pymemcache", "PymemcacheInstrumentor") + + # ── Messaging / Task queues ─────────────────────────────────── + _try_instrument(enabled, failed, "celery", "opentelemetry.instrumentation.celery", "CeleryInstrumentor") + _try_instrument(enabled, failed, "kafka-python", "opentelemetry.instrumentation.kafka_python", "KafkaInstrumentor") + _try_instrument( + enabled, + failed, + "confluent-kafka", + "opentelemetry.instrumentation.confluent_kafka", + "ConfluentKafkaInstrumentor", + ) + _try_instrument(enabled, failed, "aiokafka", "opentelemetry.instrumentation.aiokafka", "AioKafkaInstrumentor") + _try_instrument(enabled, failed, "pika", "opentelemetry.instrumentation.pika", "PikaInstrumentor") + _try_instrument(enabled, failed, "aio-pika", "opentelemetry.instrumentation.aio_pika", "AioPikaInstrumentor") + + # ── AWS ─────────────────────────────────────────────────────── + _try_instrument(enabled, failed, "botocore", "opentelemetry.instrumentation.botocore", "BotocoreInstrumentor") + _try_instrument(enabled, failed, "boto3sqs", "opentelemetry.instrumentation.boto3sqs", "Boto3SQSInstrumentor") + + # ── gRPC ────────────────────────────────────────────────────── + _try_instrument_grpc(enabled, failed) + + # ── GenAI / AI ──────────────────────────────────────────────── + _try_instrument(enabled, failed, "openai", "opentelemetry.instrumentation.openai_v2", "OpenAIInstrumentor") + _try_instrument(enabled, failed, "anthropic", "opentelemetry.instrumentation.anthropic", "AnthropicInstrumentor") + _try_instrument(enabled, failed, "vertexai", "opentelemetry.instrumentation.vertexai", "VertexAIInstrumentor") + _try_instrument( + enabled, + failed, + "google_genai", + "opentelemetry.instrumentation.google_generativeai", + "GoogleGenerativeAIInstrumentor", + ) + _try_instrument(enabled, failed, "langchain", "opentelemetry.instrumentation.langchain", "LangchainInstrumentor") + _try_instrument(enabled, failed, "ollama", "opentelemetry.instrumentation.ollama", "OllamaInstrumentor") + _try_instrument(enabled, failed, "crewai", "opentelemetry.instrumentation.crewai", "CrewAIInstrumentor") + + # ── Runtime / Concurrency ───────────────────────────────────── + _try_instrument(enabled, failed, "logging", "opentelemetry.instrumentation.logging", "LoggingInstrumentor") + _try_instrument(enabled, failed, "threading", "opentelemetry.instrumentation.threading", "ThreadingInstrumentor") + _try_instrument(enabled, failed, "asyncio", "opentelemetry.instrumentation.asyncio", "AsyncioInstrumentor") + + if enabled: + logger.info("Auto-instrumentation enabled: %s", ", ".join(enabled)) + if failed: + for name, error in failed: + logger.warning("Auto-instrumentation failed for %s: %s", name, error) + + +def _try_instrument( + enabled: List[str], + failed: List[tuple[str, str]], + name: str, + module_path: str, + class_name: str, +) -> None: + """Try to import and instrument a single library.""" + try: + import importlib + + mod = importlib.import_module(module_path) + instrumentor_cls = getattr(mod, class_name) + instrumentor_cls().instrument() + enabled.append(name) + except ImportError: + pass + except Exception as exc: + failed.append((name, str(exc))) + + +def _try_instrument_grpc( + enabled: List[str], + failed: List[tuple[str, str]], +) -> None: + """Try to instrument gRPC (client + server).""" + try: + from opentelemetry.instrumentation.grpc import ( + GrpcInstrumentorClient, + GrpcInstrumentorServer, + ) + + GrpcInstrumentorClient().instrument() + GrpcInstrumentorServer().instrument() + enabled.append("grpc") + except ImportError: + pass + except Exception as exc: + failed.append(("grpc", str(exc))) + + +def is_enabled() -> bool: + """Check if Botanu SDK is initialized.""" + return _initialized + + +def get_config() -> Optional[BotanuConfig]: + """Get the current Botanu configuration.""" + return _current_config + + +def disable() -> None: + """Disable Botanu SDK and shutdown OTEL. + + Call on application shutdown for clean exit. + """ + global _initialized, _current_config + + with _lock: + if not _initialized: + return + + try: + from opentelemetry import trace + + provider = trace.get_tracer_provider() + if hasattr(provider, "force_flush"): + provider.force_flush(timeout_millis=5000) + if hasattr(provider, "shutdown"): + provider.shutdown() + + _initialized = False + _current_config = None + logger.info("Botanu SDK shutdown complete") + + except Exception as exc: + logger.error("Error during Botanu SDK shutdown: %s", exc) diff --git a/src/botanu/sdk/config.py b/src/botanu/sdk/config.py new file mode 100644 index 0000000..10b6646 --- /dev/null +++ b/src/botanu/sdk/config.py @@ -0,0 +1,330 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Configuration for Botanu SDK. + +The SDK is intentionally minimal on the hot path. Heavy processing happens in +the OpenTelemetry Collector, not in the application: + +- **SDK responsibility**: Generate run_id, propagate minimal context (run_id, use_case) +- **Collector responsibility**: PII redaction, vendor detection, attribute enrichment + +Configuration precedence (highest to lowest): +1. Code arguments (explicit values passed to BotanuConfig) +2. Environment variables (BOTANU_*, OTEL_*) +3. YAML config file (botanu.yaml or specified path) +4. Built-in defaults +""" + +from __future__ import annotations + +import logging +import os +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional + +logger = logging.getLogger(__name__) + + +@dataclass +class BotanuConfig: + """Configuration for Botanu SDK and OpenTelemetry. + + The SDK is a thin wrapper on OpenTelemetry. PII redaction, cardinality + limits, and vendor enrichment are handled by the OTel Collector — not here. + + Typically configured via environment variables (no hardcoded values):: + + >>> # Reads from OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT, etc. + >>> config = BotanuConfig() + + >>> # Or load from YAML + >>> config = BotanuConfig.from_yaml("config/botanu.yaml") + """ + + # Service identification + service_name: Optional[str] = None + service_version: Optional[str] = None + service_namespace: Optional[str] = None + deployment_environment: Optional[str] = None + + # Resource detection + auto_detect_resources: bool = True + + # OTLP exporter configuration + otlp_endpoint: Optional[str] = None + otlp_headers: Optional[Dict[str, str]] = None + + # Span export configuration + # Large queue prevents span loss under burst traffic. + # At ~1KB/span, 65536 spans ≈ 64MB memory ceiling. + max_export_batch_size: int = 512 + max_queue_size: int = 65536 + schedule_delay_millis: int = 5000 + export_timeout_millis: int = 30000 + + # Propagation mode: "lean" (run_id + use_case only) or "full" (all context) + propagation_mode: str = "lean" + + # Auto-instrumentation packages to enable + auto_instrument_packages: List[str] = field( + default_factory=lambda: [ + # HTTP clients + "requests", + "httpx", + "urllib3", + "aiohttp_client", + # Web frameworks + "fastapi", + "flask", + "django", + "starlette", + # Databases + "sqlalchemy", + "psycopg2", + "asyncpg", + "pymongo", + "redis", + # Messaging + "celery", + "kafka_python", + # gRPC + "grpc", + # GenAI / AI + "openai_v2", + "anthropic", + "vertexai", + "google_genai", + "langchain", + # Runtime + "logging", + ] + ) + + # Config file path (for tracking where config was loaded from) + _config_file: Optional[str] = field(default=None, repr=False) + + def __post_init__(self) -> None: + """Apply environment variable defaults. + + Precedence: BOTANU_* > OTEL_* > defaults + """ + if self.service_name is None: + self.service_name = os.getenv( + "BOTANU_SERVICE_NAME", + os.getenv("OTEL_SERVICE_NAME", "unknown_service"), + ) + + if self.service_version is None: + self.service_version = os.getenv("OTEL_SERVICE_VERSION") + + if self.service_namespace is None: + self.service_namespace = os.getenv("OTEL_SERVICE_NAMESPACE") + + env_auto_detect = os.getenv("BOTANU_AUTO_DETECT_RESOURCES") + if env_auto_detect is not None: + self.auto_detect_resources = env_auto_detect.lower() in ("true", "1", "yes") + + if self.deployment_environment is None: + self.deployment_environment = os.getenv( + "BOTANU_ENVIRONMENT", + os.getenv("OTEL_DEPLOYMENT_ENVIRONMENT", "production"), + ) + + if self.otlp_endpoint is None: + # Check BOTANU_COLLECTOR_ENDPOINT first, then OTEL_* vars + botanu_endpoint = os.getenv("BOTANU_COLLECTOR_ENDPOINT") + if botanu_endpoint: + self.otlp_endpoint = botanu_endpoint + else: + env_endpoint = os.getenv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT") + if env_endpoint: + self.otlp_endpoint = env_endpoint + else: + base = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4318") + self.otlp_endpoint = base + + env_propagation_mode = os.getenv("BOTANU_PROPAGATION_MODE") + if env_propagation_mode and env_propagation_mode in ("lean", "full"): + self.propagation_mode = env_propagation_mode + + # Export tuning via env vars + env_queue_size = os.getenv("BOTANU_MAX_QUEUE_SIZE") + if env_queue_size: + try: + self.max_queue_size = int(env_queue_size) + except ValueError: + pass + + env_batch_size = os.getenv("BOTANU_MAX_EXPORT_BATCH_SIZE") + if env_batch_size: + try: + self.max_export_batch_size = int(env_batch_size) + except ValueError: + pass + + env_export_timeout = os.getenv("BOTANU_EXPORT_TIMEOUT_MILLIS") + if env_export_timeout: + try: + self.export_timeout_millis = int(env_export_timeout) + except ValueError: + pass + + # ------------------------------------------------------------------ + # YAML loading + # ------------------------------------------------------------------ + + @classmethod + def from_yaml(cls, path: Optional[str] = None) -> BotanuConfig: + """Load configuration from a YAML file. + + Supports environment variable interpolation using ``${VAR_NAME}`` syntax. + + Args: + path: Path to YAML config file. + + Raises: + FileNotFoundError: If config file doesn't exist. + ValueError: If YAML is malformed. + """ + if path is None: + raise FileNotFoundError("No config file path provided") + + resolved = Path(path) + if not resolved.exists(): + raise FileNotFoundError(f"Config file not found: {resolved}") + + try: + import yaml # type: ignore[import-untyped] + except ImportError as err: + raise ImportError("PyYAML required for YAML config. Install with: pip install pyyaml") from err + + with open(resolved) as fh: + raw_content = fh.read() + + content = _interpolate_env_vars(raw_content) + + try: + data = yaml.safe_load(content) + except yaml.YAMLError as exc: + raise ValueError(f"Invalid YAML in {resolved}: {exc}") from exc + + if data is None: + data = {} + + return cls._from_dict(data, config_file=str(resolved)) + + @classmethod + def from_file_or_env(cls, path: Optional[str] = None) -> BotanuConfig: + """Load config from file if exists, otherwise use environment variables. + + Search order: + 1. Explicit *path* argument + 2. ``BOTANU_CONFIG_FILE`` env var + 3. ``./botanu.yaml`` + 4. ``./config/botanu.yaml`` + 5. Falls back to env-only config + """ + search_paths: List[Path] = [] + + if path: + search_paths.append(Path(path)) + + env_path = os.getenv("BOTANU_CONFIG_FILE") + if env_path: + search_paths.append(Path(env_path)) + + search_paths.extend( + [ + Path("botanu.yaml"), + Path("botanu.yml"), + Path("config/botanu.yaml"), + Path("config/botanu.yml"), + ] + ) + + for candidate in search_paths: + if candidate.exists(): + logger.info("Loading config from: %s", candidate) + return cls.from_yaml(str(candidate)) + + logger.debug("No config file found, using environment variables only") + return cls() + + @classmethod + def _from_dict( + cls, + data: Dict[str, Any], + config_file: Optional[str] = None, + ) -> BotanuConfig: + """Create config from dictionary (parsed YAML).""" + service = data.get("service", {}) + otlp = data.get("otlp", {}) + export = data.get("export", {}) + propagation = data.get("propagation", {}) + resource = data.get("resource", {}) + auto_packages = data.get("auto_instrument_packages") + + return cls( + service_name=service.get("name"), + service_version=service.get("version"), + service_namespace=service.get("namespace"), + deployment_environment=service.get("environment"), + auto_detect_resources=resource.get("auto_detect", True), + otlp_endpoint=otlp.get("endpoint"), + otlp_headers=otlp.get("headers"), + max_export_batch_size=export.get("batch_size", 512), + max_queue_size=export.get("queue_size", 65536), + schedule_delay_millis=export.get("delay_ms", 5000), + export_timeout_millis=export.get("export_timeout_ms", 30000), + propagation_mode=propagation.get("mode", "lean"), + auto_instrument_packages=(auto_packages if auto_packages else BotanuConfig().auto_instrument_packages), + _config_file=config_file, + ) + + def to_dict(self) -> Dict[str, Any]: + """Export configuration as dictionary.""" + return { + "service": { + "name": self.service_name, + "version": self.service_version, + "namespace": self.service_namespace, + "environment": self.deployment_environment, + }, + "resource": { + "auto_detect": self.auto_detect_resources, + }, + "otlp": { + "endpoint": self.otlp_endpoint, + "headers": self.otlp_headers, + }, + "export": { + "batch_size": self.max_export_batch_size, + "queue_size": self.max_queue_size, + "delay_ms": self.schedule_delay_millis, + "export_timeout_ms": self.export_timeout_millis, + }, + "propagation": { + "mode": self.propagation_mode, + }, + "auto_instrument_packages": self.auto_instrument_packages, + } + + +def _interpolate_env_vars(content: str) -> str: + """Interpolate ``${VAR_NAME}`` and ``${VAR_NAME:-default}`` in *content*.""" + pattern = re.compile(r"\$\{([A-Za-z_][A-Za-z0-9_]*)(?::-([^}]*))?\}") + + def _replace(match: re.Match) -> str: # type: ignore[type-arg] + var_name = match.group(1) + default = match.group(2) + value = os.getenv(var_name) + if value is not None: + return value + if default is not None: + return default + return match.group(0) + + return pattern.sub(_replace, content) diff --git a/src/botanu/sdk/context.py b/src/botanu/sdk/context.py new file mode 100644 index 0000000..5a75e3f --- /dev/null +++ b/src/botanu/sdk/context.py @@ -0,0 +1,78 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Context and baggage helpers for Botanu SDK. + +Uses OpenTelemetry Context and Baggage for propagation. +""" + +from __future__ import annotations + +from typing import Optional, cast + +from opentelemetry import baggage, trace +from opentelemetry.context import attach, get_current + + +def set_baggage(key: str, value: str) -> object: + """Set a baggage value and attach the new context. + + Baggage is automatically propagated across service boundaries via + W3C Baggage header. + + .. warning:: + + Each call pushes a new context onto the stack. The returned token + **must** be passed to ``opentelemetry.context.detach()`` when the + scope ends, otherwise the context stack grows unboundedly (memory + leak in long-running processes). + + For setting multiple keys, prefer building the context manually + and attaching once — see ``decorators.py`` for the pattern. + + Args: + key: Baggage key (e.g., ``"botanu.run_id"``). + value: Baggage value. + + Returns: + Token for detaching the context later. + """ + ctx = baggage.set_baggage(key, value, context=get_current()) + return attach(ctx) + + +def get_baggage(key: str) -> Optional[str]: + """Get a baggage value from the current context. + + Args: + key: Baggage key (e.g., ``"botanu.run_id"``). + + Returns: + Baggage value or ``None`` if not set. + """ + value = baggage.get_baggage(key, context=get_current()) + return cast(Optional[str], value) + + +def get_current_span() -> trace.Span: + """Get the current active span. + + Returns: + Current span (may be non-recording if no span is active). + """ + return trace.get_current_span() + + +def get_run_id() -> Optional[str]: + """Get the current ``run_id`` from baggage.""" + return get_baggage("botanu.run_id") + + +def get_use_case() -> Optional[str]: + """Get the current ``use_case`` from baggage.""" + return get_baggage("botanu.use_case") + + +def get_workflow() -> Optional[str]: + """Get the current ``workflow`` from baggage.""" + return get_baggage("botanu.workflow") diff --git a/src/botanu/sdk/decorators.py b/src/botanu/sdk/decorators.py new file mode 100644 index 0000000..4bffa6c --- /dev/null +++ b/src/botanu/sdk/decorators.py @@ -0,0 +1,294 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Decorators for automatic run span creation and context propagation. + +The ``@botanu_use_case`` decorator is the primary integration point. +It creates a "run span" that: +- Generates a UUIDv7 run_id +- Emits ``run.started`` and ``run.completed`` events +- Propagates run context via W3C Baggage +- Records outcome at completion +""" + +from __future__ import annotations + +import functools +import hashlib +import inspect +from collections.abc import Mapping +from datetime import datetime, timezone +from typing import Any, Callable, Dict, Optional, TypeVar, Union + +from opentelemetry import baggage as otel_baggage +from opentelemetry import trace +from opentelemetry.context import attach, detach, get_current +from opentelemetry.trace import SpanKind, Status, StatusCode + +from botanu.models.run_context import RunContext, RunStatus +from botanu.sdk.context import get_baggage + +T = TypeVar("T") + +tracer = trace.get_tracer("botanu_sdk") + + +def _compute_workflow_version(func: Callable[..., Any]) -> str: + try: + source = inspect.getsource(func) + code_hash = hashlib.sha256(source.encode()).hexdigest() + return f"v:{code_hash[:12]}" + except (OSError, TypeError): + return "v:unknown" + + +def _get_parent_run_id() -> Optional[str]: + return get_baggage("botanu.run_id") + + +def botanu_use_case( + name: str, + workflow: Optional[str] = None, + *, + environment: Optional[str] = None, + tenant_id: Optional[str] = None, + auto_outcome_on_success: bool = True, + span_kind: SpanKind = SpanKind.SERVER, +) -> Callable[[Callable[..., T]], Callable[..., T]]: + """Decorator to create a run span with automatic context propagation. + + This is the primary integration point. It: + + 1. Creates a UUIDv7 ``run_id`` (sortable, globally unique) + 2. Creates a ``botanu.run`` span as the root of the run + 3. Emits ``run.started`` event + 4. Propagates run context via W3C Baggage + 5. On completion: emits ``run.completed`` event with outcome + + Args: + name: Use case name (low cardinality, e.g. ``"Customer Support"``). + workflow: Workflow name (defaults to function qualified name). + environment: Deployment environment. + tenant_id: Tenant identifier for multi-tenant apps. + auto_outcome_on_success: Emit ``"success"`` if no exception. + span_kind: OpenTelemetry span kind (default: ``SERVER``). + + Example:: + + @botanu_use_case("Customer Support") + async def handle_ticket(ticket_id: str): + result = await process_ticket(ticket_id) + emit_outcome("success", value_type="tickets_resolved", value_amount=1) + return result + """ + + def decorator(func: Callable[..., T]) -> Callable[..., T]: + workflow_name = workflow or func.__qualname__ + workflow_version = _compute_workflow_version(func) + is_async = inspect.iscoroutinefunction(func) + + @functools.wraps(func) + async def async_wrapper(*args: Any, **kwargs: Any) -> T: + parent_run_id = _get_parent_run_id() + run_ctx = RunContext.create( + use_case=name, + workflow=workflow_name, + workflow_version=workflow_version, + environment=environment, + tenant_id=tenant_id, + parent_run_id=parent_run_id, + ) + + with tracer.start_as_current_span( + name=f"botanu.run/{name}", + kind=span_kind, + ) as span: + for key, value in run_ctx.to_span_attributes().items(): + span.set_attribute(key, value) + + span.add_event( + "botanu.run.started", + attributes={ + "run_id": run_ctx.run_id, + "use_case": run_ctx.use_case, + "workflow": workflow_name, + }, + ) + + ctx = get_current() + for key, value in run_ctx.to_baggage_dict().items(): + ctx = otel_baggage.set_baggage(key, value, context=ctx) + baggage_token = attach(ctx) + + try: + result = await func(*args, **kwargs) + + span_attrs = getattr(span, "attributes", None) + existing_outcome = ( + span_attrs.get("botanu.outcome.status") if isinstance(span_attrs, Mapping) else None + ) + + if existing_outcome is None and auto_outcome_on_success: + run_ctx.complete(RunStatus.SUCCESS) + + span.set_status(Status(StatusCode.OK)) + _emit_run_completed(span, run_ctx, RunStatus.SUCCESS) + return result + + except Exception as exc: + span.set_status(Status(StatusCode.ERROR, str(exc))) + span.record_exception(exc) + run_ctx.complete(RunStatus.FAILURE, error_class=exc.__class__.__name__) + _emit_run_completed( + span, + run_ctx, + RunStatus.FAILURE, + error_class=exc.__class__.__name__, + ) + raise + finally: + detach(baggage_token) + + @functools.wraps(func) + def sync_wrapper(*args: Any, **kwargs: Any) -> T: + parent_run_id = _get_parent_run_id() + run_ctx = RunContext.create( + use_case=name, + workflow=workflow_name, + workflow_version=workflow_version, + environment=environment, + tenant_id=tenant_id, + parent_run_id=parent_run_id, + ) + + with tracer.start_as_current_span( + name=f"botanu.run/{name}", + kind=span_kind, + ) as span: + for key, value in run_ctx.to_span_attributes().items(): + span.set_attribute(key, value) + + span.add_event( + "botanu.run.started", + attributes={ + "run_id": run_ctx.run_id, + "use_case": run_ctx.use_case, + "workflow": workflow_name, + }, + ) + + ctx = get_current() + for key, value in run_ctx.to_baggage_dict().items(): + ctx = otel_baggage.set_baggage(key, value, context=ctx) + baggage_token = attach(ctx) + + try: + result = func(*args, **kwargs) + + span_attrs = getattr(span, "attributes", None) + existing_outcome = ( + span_attrs.get("botanu.outcome.status") if isinstance(span_attrs, Mapping) else None + ) + + if existing_outcome is None and auto_outcome_on_success: + run_ctx.complete(RunStatus.SUCCESS) + + span.set_status(Status(StatusCode.OK)) + _emit_run_completed(span, run_ctx, RunStatus.SUCCESS) + return result + + except Exception as exc: + span.set_status(Status(StatusCode.ERROR, str(exc))) + span.record_exception(exc) + run_ctx.complete(RunStatus.FAILURE, error_class=exc.__class__.__name__) + _emit_run_completed( + span, + run_ctx, + RunStatus.FAILURE, + error_class=exc.__class__.__name__, + ) + raise + finally: + detach(baggage_token) + + if is_async: + return async_wrapper # type: ignore[return-value] + return sync_wrapper # type: ignore[return-value] + + return decorator + + +def _emit_run_completed( + span: trace.Span, + run_ctx: RunContext, + status: RunStatus, + error_class: Optional[str] = None, +) -> None: + duration_ms = (datetime.now(timezone.utc) - run_ctx.start_time).total_seconds() * 1000 + + event_attrs: Dict[str, Union[str, float]] = { + "run_id": run_ctx.run_id, + "use_case": run_ctx.use_case, + "status": status.value, + "duration_ms": duration_ms, + } + if error_class: + event_attrs["error_class"] = error_class + if run_ctx.outcome and run_ctx.outcome.value_type: + event_attrs["value_type"] = run_ctx.outcome.value_type + if run_ctx.outcome and run_ctx.outcome.value_amount is not None: + event_attrs["value_amount"] = run_ctx.outcome.value_amount + + span.add_event("botanu.run.completed", attributes=event_attrs) + + span.set_attribute("botanu.outcome.status", status.value) + span.set_attribute("botanu.run.duration_ms", duration_ms) + + +use_case = botanu_use_case + + +def botanu_outcome( + success: Optional[str] = None, + partial: Optional[str] = None, + failed: Optional[str] = None, +) -> Callable[[Callable[..., T]], Callable[..., T]]: + """Decorator to automatically emit outcomes based on function result. + + This is a convenience decorator for sub-functions within a use case. + It does NOT create a new run — use ``@botanu_use_case`` for that. + """ + from botanu.sdk.span_helpers import emit_outcome + + def decorator(func: Callable[..., T]) -> Callable[..., T]: + is_async = inspect.iscoroutinefunction(func) + + @functools.wraps(func) + async def async_wrapper(*args: Any, **kwargs: Any) -> T: + try: + result = await func(*args, **kwargs) + span = trace.get_current_span() + if not span.attributes or "botanu.outcome.status" not in span.attributes: + emit_outcome("success") + return result + except Exception as exc: + emit_outcome("failed", reason=exc.__class__.__name__) + raise + + @functools.wraps(func) + def sync_wrapper(*args: Any, **kwargs: Any) -> T: + try: + result = func(*args, **kwargs) + span = trace.get_current_span() + if not span.attributes or "botanu.outcome.status" not in span.attributes: + emit_outcome("success") + return result + except Exception as exc: + emit_outcome("failed", reason=exc.__class__.__name__) + raise + + if is_async: + return async_wrapper # type: ignore[return-value] + return sync_wrapper # type: ignore[return-value] + + return decorator diff --git a/src/botanu/sdk/middleware.py b/src/botanu/sdk/middleware.py new file mode 100644 index 0000000..83eb742 --- /dev/null +++ b/src/botanu/sdk/middleware.py @@ -0,0 +1,106 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""FastAPI / Starlette middleware for span enrichment. + +This middleware works alongside OpenTelemetry's FastAPIInstrumentor to enrich +spans with Botanu-specific context. +""" + +from __future__ import annotations + +import uuid +from typing import Optional + +from opentelemetry import baggage as otel_baggage +from opentelemetry import trace +from opentelemetry.context import attach, detach, get_current +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +from starlette.responses import Response + + +class BotanuMiddleware(BaseHTTPMiddleware): + """FastAPI middleware to enrich spans with Botanu context. + + This middleware should be used **after** OpenTelemetry's + ``FastAPIInstrumentor``. It extracts Botanu context from incoming + requests and enriches the current span with Botanu attributes. + + Example:: + + from fastapi import FastAPI + from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor + from botanu.sdk.middleware import BotanuMiddleware + + app = FastAPI() + FastAPIInstrumentor.instrument_app(app) + app.add_middleware( + BotanuMiddleware, + use_case="customer_support", + workflow="ticket_api", + ) + """ + + def __init__( + self, + app: object, + *, + use_case: str, + workflow: Optional[str] = None, + auto_generate_run_id: bool = True, + ) -> None: + super().__init__(app) # type: ignore[arg-type] + self.use_case = use_case + self.workflow = workflow or use_case + self.auto_generate_run_id = auto_generate_run_id + + async def dispatch(self, request: Request, call_next: object) -> Response: # type: ignore[override] + """Process request and enrich span with Botanu context.""" + span = trace.get_current_span() + + run_id = otel_baggage.get_baggage("botanu.run_id") + if not run_id: + run_id = request.headers.get("x-botanu-run-id") + + if not run_id and self.auto_generate_run_id: + run_id = str(uuid.uuid4()) + + use_case = ( + otel_baggage.get_baggage("botanu.use_case") or request.headers.get("x-botanu-use-case") or self.use_case + ) + workflow = ( + otel_baggage.get_baggage("botanu.workflow") or request.headers.get("x-botanu-workflow") or self.workflow + ) + customer_id = otel_baggage.get_baggage("botanu.customer_id") or request.headers.get("x-botanu-customer-id") + + if run_id: + span.set_attribute("botanu.run_id", run_id) + span.set_attribute("botanu.use_case", use_case) + span.set_attribute("botanu.workflow", workflow) + if customer_id: + span.set_attribute("botanu.customer_id", customer_id) + + span.set_attribute("http.route", request.url.path) + span.set_attribute("http.method", request.method) + + ctx = get_current() + if run_id: + ctx = otel_baggage.set_baggage("botanu.run_id", run_id, context=ctx) + ctx = otel_baggage.set_baggage("botanu.use_case", use_case, context=ctx) + ctx = otel_baggage.set_baggage("botanu.workflow", workflow, context=ctx) + if customer_id: + ctx = otel_baggage.set_baggage("botanu.customer_id", customer_id, context=ctx) + + baggage_token = attach(ctx) + try: + response = await call_next(request) # type: ignore[misc] + finally: + detach(baggage_token) + + if run_id: + response.headers["x-botanu-run-id"] = run_id + response.headers["x-botanu-use-case"] = use_case + response.headers["x-botanu-workflow"] = workflow + + return response diff --git a/src/botanu/sdk/span_helpers.py b/src/botanu/sdk/span_helpers.py new file mode 100644 index 0000000..f7388ff --- /dev/null +++ b/src/botanu/sdk/span_helpers.py @@ -0,0 +1,92 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Helper functions for working with OpenTelemetry spans. + +These functions add Botanu-specific attributes to the current span. +""" + +from __future__ import annotations + +from typing import Optional + +from opentelemetry import trace + + +def emit_outcome( + status: str, + *, + value_type: Optional[str] = None, + value_amount: Optional[float] = None, + confidence: Optional[float] = None, + reason: Optional[str] = None, +) -> None: + """Emit an outcome for the current span. + + Sets span attributes for outcome tracking and ROI calculation. + + Args: + status: Outcome status (``"success"``, ``"partial"``, ``"failed"``). + value_type: Type of business value (e.g., ``"tickets_resolved"``). + value_amount: Quantified value amount. + confidence: Confidence score (0.0–1.0). + reason: Optional reason for the outcome. + + Example:: + + >>> emit_outcome("success", value_type="tickets_resolved", value_amount=1) + >>> emit_outcome("failed", reason="missing_context") + """ + span = trace.get_current_span() + + span.set_attribute("botanu.outcome", status) + + if value_type: + span.set_attribute("botanu.outcome.value_type", value_type) + + if value_amount is not None: + span.set_attribute("botanu.outcome.value_amount", value_amount) + + if confidence is not None: + span.set_attribute("botanu.outcome.confidence", confidence) + + if reason: + span.set_attribute("botanu.outcome.reason", reason) + + event_attrs: dict[str, object] = {"status": status} + if value_type: + event_attrs["value_type"] = value_type + if value_amount is not None: + event_attrs["value_amount"] = value_amount + + span.add_event("botanu.outcome_emitted", event_attrs) + + +def set_business_context( + *, + customer_id: Optional[str] = None, + team: Optional[str] = None, + cost_center: Optional[str] = None, + region: Optional[str] = None, +) -> None: + """Set business context attributes on the current span. + + Args: + customer_id: Customer identifier for multi-tenant attribution. + team: Team or department. + cost_center: Cost centre for financial tracking. + region: Geographic region. + """ + span = trace.get_current_span() + + if customer_id: + span.set_attribute("botanu.customer_id", customer_id) + + if team: + span.set_attribute("botanu.team", team) + + if cost_center: + span.set_attribute("botanu.cost_center", cost_center) + + if region: + span.set_attribute("botanu.region", region) diff --git a/src/botanu/tracking/__init__.py b/src/botanu/tracking/__init__.py new file mode 100644 index 0000000..5933aa6 --- /dev/null +++ b/src/botanu/tracking/__init__.py @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Botanu tracking components. + +Provides tracking for different operation types: +- LLM/GenAI model calls +- Database, storage, and messaging operations +- Attempt ledger for durable cost tracking +""" + +from __future__ import annotations + +from botanu.tracking.data import ( + DBOperation, + MessagingOperation, + StorageOperation, + set_data_metrics, + set_warehouse_metrics, + track_db_operation, + track_messaging_operation, + track_storage_operation, +) +from botanu.tracking.ledger import ( + AttemptLedger, + AttemptStatus, + LedgerEventType, + get_ledger, + record_attempt_ended, + record_attempt_started, + record_llm_attempted, + record_tool_attempted, + set_ledger, +) +from botanu.tracking.llm import ( + BotanuAttributes, + GenAIAttributes, + LLMTracker, + ModelOperation, + ToolTracker, + set_llm_attributes, + set_token_usage, + track_llm_call, + track_tool_call, +) + +__all__ = [ + # LLM tracking + "track_llm_call", + "track_tool_call", + "set_llm_attributes", + "set_token_usage", + "ModelOperation", + "GenAIAttributes", + "BotanuAttributes", + "LLMTracker", + "ToolTracker", + # Data tracking + "track_db_operation", + "track_storage_operation", + "track_messaging_operation", + "set_data_metrics", + "set_warehouse_metrics", + "DBOperation", + "StorageOperation", + "MessagingOperation", + # Attempt ledger + "AttemptLedger", + "get_ledger", + "set_ledger", + "record_attempt_started", + "record_attempt_ended", + "record_llm_attempted", + "record_tool_attempted", + "LedgerEventType", + "AttemptStatus", +] diff --git a/src/botanu/tracking/data.py b/src/botanu/tracking/data.py new file mode 100644 index 0000000..5a58f57 --- /dev/null +++ b/src/botanu/tracking/data.py @@ -0,0 +1,488 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Data Tracking — Track database, storage, and messaging operations. + +Usage:: + + from botanu.tracking.data import track_db_operation, track_storage_operation + + with track_db_operation(system="postgresql", operation="SELECT") as db: + result = cursor.execute("SELECT * FROM users WHERE active = true") + db.set_result(rows_returned=len(result)) +""" + +from __future__ import annotations + +from contextlib import contextmanager +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Dict, Generator, Optional + +from opentelemetry import trace +from opentelemetry.trace import Span, SpanKind, Status, StatusCode + +# ========================================================================= +# System Normalization Maps +# ========================================================================= + +DB_SYSTEMS: Dict[str, str] = { + "postgresql": "postgresql", + "postgres": "postgresql", + "pg": "postgresql", + "mysql": "mysql", + "mariadb": "mariadb", + "mssql": "mssql", + "sqlserver": "mssql", + "oracle": "oracle", + "sqlite": "sqlite", + "mongodb": "mongodb", + "mongo": "mongodb", + "dynamodb": "dynamodb", + "cassandra": "cassandra", + "couchdb": "couchdb", + "firestore": "firestore", + "cosmosdb": "cosmosdb", + "redis": "redis", + "memcached": "memcached", + "elasticache": "elasticache", + "elasticsearch": "elasticsearch", + "opensearch": "opensearch", + "snowflake": "snowflake", + "bigquery": "bigquery", + "redshift": "redshift", + "databricks": "databricks", + "athena": "athena", + "synapse": "synapse", + "influxdb": "influxdb", + "timescaledb": "timescaledb", + "neo4j": "neo4j", + "neptune": "neptune", +} + +STORAGE_SYSTEMS: Dict[str, str] = { + "s3": "s3", + "aws_s3": "s3", + "gcs": "gcs", + "google_cloud_storage": "gcs", + "blob": "azure_blob", + "azure_blob": "azure_blob", + "minio": "minio", + "ceph": "ceph", + "nfs": "nfs", + "efs": "efs", +} + +MESSAGING_SYSTEMS: Dict[str, str] = { + "sqs": "sqs", + "aws_sqs": "sqs", + "sns": "sns", + "kinesis": "kinesis", + "eventbridge": "eventbridge", + "pubsub": "pubsub", + "google_pubsub": "pubsub", + "servicebus": "servicebus", + "azure_servicebus": "servicebus", + "eventhub": "eventhub", + "kafka": "kafka", + "rabbitmq": "rabbitmq", + "nats": "nats", + "redis_pubsub": "redis_pubsub", + "celery": "celery", +} + + +class DBOperation: + SELECT = "SELECT" + INSERT = "INSERT" + UPDATE = "UPDATE" + DELETE = "DELETE" + UPSERT = "UPSERT" + MERGE = "MERGE" + CREATE = "CREATE" + DROP = "DROP" + ALTER = "ALTER" + INDEX = "INDEX" + TRANSACTION = "TRANSACTION" + BATCH = "BATCH" + + +class StorageOperation: + GET = "GET" + PUT = "PUT" + DELETE = "DELETE" + LIST = "LIST" + HEAD = "HEAD" + COPY = "COPY" + MULTIPART_UPLOAD = "MULTIPART_UPLOAD" + + +class MessagingOperation: + PUBLISH = "publish" + CONSUME = "consume" + RECEIVE = "receive" + SEND = "send" + SUBSCRIBE = "subscribe" + + +# ========================================================================= +# Database Tracker +# ========================================================================= + + +@dataclass +class DBTracker: + """Tracks database operations.""" + + system: str + operation: str + span: Optional[Span] = field(default=None, repr=False) + start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + + rows_returned: int = 0 + rows_affected: int = 0 + bytes_read: int = 0 + bytes_written: int = 0 + + def set_result( + self, + rows_returned: int = 0, + rows_affected: int = 0, + bytes_read: int = 0, + bytes_written: int = 0, + ) -> DBTracker: + self.rows_returned = rows_returned + self.rows_affected = rows_affected + self.bytes_read = bytes_read + self.bytes_written = bytes_written + if self.span: + if rows_returned > 0: + self.span.set_attribute("botanu.data.rows_returned", rows_returned) + if rows_affected > 0: + self.span.set_attribute("botanu.data.rows_affected", rows_affected) + if bytes_read > 0: + self.span.set_attribute("botanu.data.bytes_read", bytes_read) + if bytes_written > 0: + self.span.set_attribute("botanu.data.bytes_written", bytes_written) + return self + + def set_table(self, table_name: str, schema: Optional[str] = None) -> DBTracker: + if self.span: + self.span.set_attribute("db.collection.name", table_name) + if schema: + self.span.set_attribute("db.schema", schema) + return self + + def set_query_id(self, query_id: str) -> DBTracker: + if self.span: + self.span.set_attribute("botanu.warehouse.query_id", query_id) + return self + + def set_bytes_scanned(self, bytes_scanned: int) -> DBTracker: + self.bytes_read = bytes_scanned + if self.span: + self.span.set_attribute("botanu.warehouse.bytes_scanned", bytes_scanned) + return self + + def set_error(self, error: Exception) -> DBTracker: + if self.span: + self.span.set_status(Status(StatusCode.ERROR, str(error))) + self.span.set_attribute("botanu.data.error", type(error).__name__) + self.span.record_exception(error) + return self + + def add_metadata(self, **kwargs: Any) -> DBTracker: + if self.span: + for key, value in kwargs.items(): + attr_key = key if key.startswith("botanu.") else f"botanu.data.{key}" + self.span.set_attribute(attr_key, value) + return self + + def _finalize(self) -> None: + if not self.span: + return + duration_ms = (datetime.now(timezone.utc) - self.start_time).total_seconds() * 1000 + self.span.set_attribute("botanu.data.duration_ms", duration_ms) + + +@contextmanager +def track_db_operation( + system: str, + operation: str, + database: Optional[str] = None, + **kwargs: Any, +) -> Generator[DBTracker, None, None]: + """Track a database operation. + + Args: + system: Database system (postgresql, mysql, mongodb, …). + operation: Type of operation (SELECT, INSERT, …). + database: Database name (optional). + """ + tracer = trace.get_tracer("botanu.data") + normalized_system = DB_SYSTEMS.get(system.lower(), system.lower()) + + with tracer.start_as_current_span( + name=f"db.{normalized_system}.{operation.lower()}", + kind=SpanKind.CLIENT, + ) as span: + span.set_attribute("db.system", normalized_system) + span.set_attribute("db.operation", operation.upper()) + span.set_attribute("botanu.vendor", normalized_system) + if database: + span.set_attribute("db.name", database) + for key, value in kwargs.items(): + span.set_attribute(f"botanu.data.{key}", value) + + tracker = DBTracker(system=normalized_system, operation=operation, span=span) + try: + yield tracker + except Exception as exc: + tracker.set_error(exc) + raise + finally: + tracker._finalize() + + +# ========================================================================= +# Storage Tracker +# ========================================================================= + + +@dataclass +class StorageTracker: + """Tracks storage operations.""" + + system: str + operation: str + span: Optional[Span] = field(default=None, repr=False) + start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + + objects_count: int = 0 + bytes_read: int = 0 + bytes_written: int = 0 + + def set_result( + self, + objects_count: int = 0, + bytes_read: int = 0, + bytes_written: int = 0, + ) -> StorageTracker: + self.objects_count = objects_count + self.bytes_read = bytes_read + self.bytes_written = bytes_written + if self.span: + if objects_count > 0: + self.span.set_attribute("botanu.data.objects_count", objects_count) + if bytes_read > 0: + self.span.set_attribute("botanu.data.bytes_read", bytes_read) + if bytes_written > 0: + self.span.set_attribute("botanu.data.bytes_written", bytes_written) + return self + + def set_bucket(self, bucket: str) -> StorageTracker: + if self.span: + self.span.set_attribute("botanu.storage.bucket", bucket) + return self + + def set_error(self, error: Exception) -> StorageTracker: + if self.span: + self.span.set_status(Status(StatusCode.ERROR, str(error))) + self.span.set_attribute("botanu.storage.error", type(error).__name__) + self.span.record_exception(error) + return self + + def add_metadata(self, **kwargs: Any) -> StorageTracker: + if self.span: + for key, value in kwargs.items(): + attr_key = key if key.startswith("botanu.") else f"botanu.storage.{key}" + self.span.set_attribute(attr_key, value) + return self + + def _finalize(self) -> None: + if not self.span: + return + duration_ms = (datetime.now(timezone.utc) - self.start_time).total_seconds() * 1000 + self.span.set_attribute("botanu.storage.duration_ms", duration_ms) + + +@contextmanager +def track_storage_operation( + system: str, + operation: str, + **kwargs: Any, +) -> Generator[StorageTracker, None, None]: + """Track a storage operation. + + Args: + system: Storage system (s3, gcs, azure_blob, …). + operation: Type of operation (GET, PUT, DELETE, …). + """ + tracer = trace.get_tracer("botanu.storage") + normalized_system = STORAGE_SYSTEMS.get(system.lower(), system.lower()) + + with tracer.start_as_current_span( + name=f"storage.{normalized_system}.{operation.lower()}", + kind=SpanKind.CLIENT, + ) as span: + span.set_attribute("botanu.storage.system", normalized_system) + span.set_attribute("botanu.storage.operation", operation.upper()) + span.set_attribute("botanu.vendor", normalized_system) + for key, value in kwargs.items(): + span.set_attribute(f"botanu.storage.{key}", value) + + tracker = StorageTracker(system=normalized_system, operation=operation, span=span) + try: + yield tracker + except Exception as exc: + tracker.set_error(exc) + raise + finally: + tracker._finalize() + + +# ========================================================================= +# Messaging Tracker +# ========================================================================= + + +@dataclass +class MessagingTracker: + """Tracks messaging operations.""" + + system: str + operation: str + destination: str + span: Optional[Span] = field(default=None, repr=False) + start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + + message_count: int = 0 + bytes_transferred: int = 0 + + def set_result( + self, + message_count: int = 0, + bytes_transferred: int = 0, + ) -> MessagingTracker: + self.message_count = message_count + self.bytes_transferred = bytes_transferred + if self.span: + if message_count > 0: + self.span.set_attribute("botanu.messaging.message_count", message_count) + if bytes_transferred > 0: + self.span.set_attribute("botanu.messaging.bytes_transferred", bytes_transferred) + return self + + def set_error(self, error: Exception) -> MessagingTracker: + if self.span: + self.span.set_status(Status(StatusCode.ERROR, str(error))) + self.span.set_attribute("botanu.messaging.error", type(error).__name__) + self.span.record_exception(error) + return self + + def add_metadata(self, **kwargs: Any) -> MessagingTracker: + if self.span: + for key, value in kwargs.items(): + attr_key = key if key.startswith("botanu.") else f"botanu.messaging.{key}" + self.span.set_attribute(attr_key, value) + return self + + def _finalize(self) -> None: + if not self.span: + return + duration_ms = (datetime.now(timezone.utc) - self.start_time).total_seconds() * 1000 + self.span.set_attribute("botanu.messaging.duration_ms", duration_ms) + + +@contextmanager +def track_messaging_operation( + system: str, + operation: str, + destination: str, + **kwargs: Any, +) -> Generator[MessagingTracker, None, None]: + """Track a messaging operation. + + Args: + system: Messaging system (sqs, kafka, pubsub, …). + operation: Type of operation (publish, consume, …). + destination: Queue/topic name. + """ + tracer = trace.get_tracer("botanu.messaging") + normalized_system = MESSAGING_SYSTEMS.get(system.lower(), system.lower()) + span_kind = SpanKind.PRODUCER if operation in ("publish", "send") else SpanKind.CONSUMER + + with tracer.start_as_current_span( + name=f"messaging.{normalized_system}.{operation.lower()}", + kind=span_kind, + ) as span: + span.set_attribute("messaging.system", normalized_system) + span.set_attribute("messaging.operation", operation.lower()) + span.set_attribute("messaging.destination.name", destination) + span.set_attribute("botanu.vendor", normalized_system) + for key, value in kwargs.items(): + span.set_attribute(f"botanu.messaging.{key}", value) + + tracker = MessagingTracker( + system=normalized_system, + operation=operation, + destination=destination, + span=span, + ) + try: + yield tracker + except Exception as exc: + tracker.set_error(exc) + raise + finally: + tracker._finalize() + + +# ========================================================================= +# Standalone Helpers +# ========================================================================= + + +def set_data_metrics( + rows_returned: int = 0, + rows_affected: int = 0, + bytes_read: int = 0, + bytes_written: int = 0, + objects_count: int = 0, + span: Optional[Span] = None, +) -> None: + """Set data operation metrics on the current span.""" + target_span = span or trace.get_current_span() + if not target_span or not target_span.is_recording(): + return + + if rows_returned > 0: + target_span.set_attribute("botanu.data.rows_returned", rows_returned) + if rows_affected > 0: + target_span.set_attribute("botanu.data.rows_affected", rows_affected) + if bytes_read > 0: + target_span.set_attribute("botanu.data.bytes_read", bytes_read) + if bytes_written > 0: + target_span.set_attribute("botanu.data.bytes_written", bytes_written) + if objects_count > 0: + target_span.set_attribute("botanu.data.objects_count", objects_count) + + +def set_warehouse_metrics( + query_id: str, + bytes_scanned: int, + rows_returned: int = 0, + partitions_scanned: int = 0, + span: Optional[Span] = None, +) -> None: + """Set data warehouse query metrics on the current span.""" + target_span = span or trace.get_current_span() + if not target_span or not target_span.is_recording(): + return + + target_span.set_attribute("botanu.warehouse.query_id", query_id) + target_span.set_attribute("botanu.warehouse.bytes_scanned", bytes_scanned) + if rows_returned > 0: + target_span.set_attribute("botanu.data.rows_returned", rows_returned) + if partitions_scanned > 0: + target_span.set_attribute("botanu.warehouse.partitions_scanned", partitions_scanned) diff --git a/src/botanu/tracking/ledger.py b/src/botanu/tracking/ledger.py new file mode 100644 index 0000000..3fe982a --- /dev/null +++ b/src/botanu/tracking/ledger.py @@ -0,0 +1,420 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Attempt Ledger — durable event log for invisible cost tracking. + +An append-only event log that is NEVER sampled and survives crashes. +Uses OTel Logs API to emit structured events. + +Event Types: +- ``attempt.started``: Run/attempt began +- ``llm.attempted``: LLM call attempt (with tokens, cost) +- ``tool.attempted``: Tool execution attempt +- ``attempt.ended``: Run/attempt completed +- ``cancellation.requested``: Cancellation was requested +- ``zombie.detected``: Work continued after timeout +""" + +from __future__ import annotations + +import logging +import os +import time +from dataclasses import dataclass, field +from enum import Enum +from functools import lru_cache +from typing import Any, Dict, Optional + +from opentelemetry import trace + +logger = logging.getLogger(__name__) + + +class LedgerEventType(str, Enum): + ATTEMPT_STARTED = "attempt.started" + ATTEMPT_ENDED = "attempt.ended" + LLM_ATTEMPTED = "llm.attempted" + TOOL_ATTEMPTED = "tool.attempted" + CANCEL_REQUESTED = "cancellation.requested" + CANCEL_ACKNOWLEDGED = "cancellation.acknowledged" + ZOMBIE_DETECTED = "zombie.detected" + REDELIVERY_DETECTED = "redelivery.detected" + + +class AttemptStatus(str, Enum): + SUCCESS = "success" + ERROR = "error" + TIMEOUT = "timeout" + CANCELLED = "cancelled" + RATE_LIMITED = "rate_limited" + + +@dataclass +class AttemptLedger: + """Durable event ledger for cost tracking. + + Emits structured log records that are never sampled, providing a + reliable source of truth for attempt counts, token costs, and zombie work. + """ + + service_name: str = field( + default_factory=lambda: os.getenv("OTEL_SERVICE_NAME", "unknown"), + ) + otlp_endpoint: Optional[str] = field(default=None) + _logger: Any = field(default=None, init=False, repr=False) + _initialized: bool = field(default=False, init=False) + + def __post_init__(self) -> None: + self._initialize_logger() + + def _initialize_logger(self) -> None: + try: + from opentelemetry._logs import get_logger_provider, set_logger_provider + from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter + from opentelemetry.sdk._logs import LoggerProvider + from opentelemetry.sdk._logs.export import BatchLogRecordProcessor + + provider = get_logger_provider() + + endpoint = self.otlp_endpoint + if not endpoint: + traces_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT") + if traces_endpoint: + endpoint = f"{traces_endpoint.rstrip('/')}/v1/logs" + else: + endpoint = "http://localhost:4318/v1/logs" + + if provider is None or not hasattr(provider, "get_logger"): + new_provider = LoggerProvider() + exporter = OTLPLogExporter(endpoint=endpoint) + new_provider.add_log_record_processor(BatchLogRecordProcessor(exporter)) + set_logger_provider(new_provider) + provider = new_provider + + self._logger = provider.get_logger("botanu.attempt_ledger") + self._initialized = True + logger.debug("AttemptLedger initialized with endpoint: %s", endpoint) + + except Exception as exc: + logger.warning("Failed to initialize AttemptLedger: %s", exc) + self._initialized = False + + def _get_trace_context(self) -> Dict[str, str]: + span = trace.get_current_span() + ctx = span.get_span_context() if span else None + if ctx and ctx.is_valid: + return { + "trace_id": format(ctx.trace_id, "032x"), + "span_id": format(ctx.span_id, "016x"), + } + return {} + + def _emit( + self, + event_type: LedgerEventType, + severity: Any, + attributes: Dict[str, Any], + ) -> None: + if not self._initialized or not self._logger: + return + + try: + from opentelemetry.sdk._logs import LogRecord + + attrs = { + "event.name": event_type.value, + "service.name": self.service_name, + "timestamp_ms": int(time.time() * 1000), + **self._get_trace_context(), + **attributes, + } + + self._logger.emit( + LogRecord( + timestamp=int(time.time_ns()), + severity_number=severity, + severity_text=severity.name, + body=event_type.value, + attributes=attrs, + ) + ) + except Exception as exc: + logger.debug("Failed to emit ledger event: %s", exc) + + # ----------------------------------------------------------------- + # Attempt Lifecycle + # ----------------------------------------------------------------- + + def attempt_started( + self, + run_id: str, + use_case: str, + attempt: int = 1, + root_run_id: Optional[str] = None, + workflow: Optional[str] = None, + tenant_id: Optional[str] = None, + deadline_ts: Optional[float] = None, + ) -> None: + from opentelemetry._logs import SeverityNumber + + self._emit( + LedgerEventType.ATTEMPT_STARTED, + SeverityNumber.INFO, + { + "botanu.run_id": run_id, + "botanu.use_case": use_case, + "botanu.attempt": attempt, + "botanu.root_run_id": root_run_id or run_id, + "botanu.workflow": workflow, + "botanu.tenant_id": tenant_id, + "botanu.deadline_ts": deadline_ts, + }, + ) + + def attempt_ended( + self, + run_id: str, + status: str, + duration_ms: Optional[float] = None, + error_class: Optional[str] = None, + reason_code: Optional[str] = None, + ) -> None: + from opentelemetry._logs import SeverityNumber + + self._emit( + LedgerEventType.ATTEMPT_ENDED, + SeverityNumber.INFO if status == "success" else SeverityNumber.WARN, + { + "botanu.run_id": run_id, + "status": status, + "duration_ms": duration_ms, + "error_class": error_class, + "reason_code": reason_code, + }, + ) + + # ----------------------------------------------------------------- + # LLM Attempt Events + # ----------------------------------------------------------------- + + def llm_attempted( + self, + run_id: str, + provider: str, + model: str, + operation: str = "chat", + attempt_number: int = 1, + input_tokens: int = 0, + output_tokens: int = 0, + cached_tokens: int = 0, + duration_ms: Optional[float] = None, + status: str = "success", + error_class: Optional[str] = None, + provider_request_id: Optional[str] = None, + estimated_cost_usd: Optional[float] = None, + ) -> None: + from opentelemetry._logs import SeverityNumber + + self._emit( + LedgerEventType.LLM_ATTEMPTED, + SeverityNumber.INFO if status == "success" else SeverityNumber.WARN, + { + "botanu.run_id": run_id, + "gen_ai.provider.name": provider, + "gen_ai.request.model": model, + "gen_ai.operation.name": operation, + "botanu.attempt": attempt_number, + "gen_ai.usage.input_tokens": input_tokens, + "gen_ai.usage.output_tokens": output_tokens, + "botanu.usage.cached_tokens": cached_tokens, + "duration_ms": duration_ms, + "status": status, + "error_class": error_class, + "gen_ai.response.id": provider_request_id, + "botanu.cost.estimated_usd": estimated_cost_usd, + }, + ) + + def tool_attempted( + self, + run_id: str, + tool_name: str, + tool_call_id: Optional[str] = None, + attempt_number: int = 1, + duration_ms: Optional[float] = None, + status: str = "success", + error_class: Optional[str] = None, + items_returned: int = 0, + bytes_processed: int = 0, + ) -> None: + from opentelemetry._logs import SeverityNumber + + self._emit( + LedgerEventType.TOOL_ATTEMPTED, + SeverityNumber.INFO if status == "success" else SeverityNumber.WARN, + { + "botanu.run_id": run_id, + "gen_ai.tool.name": tool_name, + "gen_ai.tool.call.id": tool_call_id, + "botanu.attempt": attempt_number, + "duration_ms": duration_ms, + "status": status, + "error_class": error_class, + "items_returned": items_returned, + "bytes_processed": bytes_processed, + }, + ) + + # ----------------------------------------------------------------- + # Cancellation & Zombie Detection + # ----------------------------------------------------------------- + + def cancel_requested( + self, + run_id: str, + reason: str = "user", + requested_at_ms: Optional[float] = None, + ) -> None: + from opentelemetry._logs import SeverityNumber + + self._emit( + LedgerEventType.CANCEL_REQUESTED, + SeverityNumber.WARN, + { + "botanu.run_id": run_id, + "cancellation.reason": reason, + "cancellation.requested_at_ms": requested_at_ms or int(time.time() * 1000), + }, + ) + + def cancel_acknowledged( + self, + run_id: str, + acknowledged_by: str, + latency_ms: Optional[float] = None, + ) -> None: + from opentelemetry._logs import SeverityNumber + + self._emit( + LedgerEventType.CANCEL_ACKNOWLEDGED, + SeverityNumber.INFO, + { + "botanu.run_id": run_id, + "cancellation.acknowledged_by": acknowledged_by, + "cancellation.latency_ms": latency_ms, + }, + ) + + def zombie_detected( + self, + run_id: str, + deadline_ts: float, + actual_end_ts: float, + zombie_duration_ms: float, + component: str, + ) -> None: + from opentelemetry._logs import SeverityNumber + + self._emit( + LedgerEventType.ZOMBIE_DETECTED, + SeverityNumber.ERROR, + { + "botanu.run_id": run_id, + "deadline_ts": deadline_ts, + "actual_end_ts": actual_end_ts, + "zombie_duration_ms": zombie_duration_ms, + "zombie_component": component, + }, + ) + + def redelivery_detected( + self, + run_id: str, + queue_name: str, + delivery_count: int, + original_message_id: Optional[str] = None, + ) -> None: + from opentelemetry._logs import SeverityNumber + + self._emit( + LedgerEventType.REDELIVERY_DETECTED, + SeverityNumber.WARN, + { + "botanu.run_id": run_id, + "queue.name": queue_name, + "delivery_count": delivery_count, + "original_message_id": original_message_id, + }, + ) + + # ----------------------------------------------------------------- + # Lifecycle + # ----------------------------------------------------------------- + + def flush(self, timeout_ms: int = 5000) -> bool: + if not self._initialized: + return True + try: + from opentelemetry._logs import get_logger_provider + + provider = get_logger_provider() + if hasattr(provider, "force_flush"): + return provider.force_flush(timeout_ms) + return True + except Exception as exc: + logger.debug("Failed to flush AttemptLedger: %s", exc) + return False + + def shutdown(self) -> None: + if not self._initialized: + return + try: + from opentelemetry._logs import get_logger_provider + + provider = get_logger_provider() + if hasattr(provider, "shutdown"): + provider.shutdown() + except Exception as exc: + logger.debug("Failed to shutdown AttemptLedger: %s", exc) + + +# ========================================================================= +# Global ledger +# ========================================================================= + +_global_ledger: Optional[AttemptLedger] = None + + +@lru_cache(maxsize=1) +def _create_default_ledger() -> AttemptLedger: + """Create default ledger instance (thread-safe via lru_cache).""" + return AttemptLedger() + + +def get_ledger() -> AttemptLedger: + """Get the global attempt ledger instance (thread-safe).""" + if _global_ledger is not None: + return _global_ledger + return _create_default_ledger() + + +def set_ledger(ledger: AttemptLedger) -> None: + """Set the global attempt ledger instance.""" + global _global_ledger + _global_ledger = ledger + + +def record_attempt_started(**kwargs: Any) -> None: + get_ledger().attempt_started(**kwargs) + + +def record_attempt_ended(**kwargs: Any) -> None: + get_ledger().attempt_ended(**kwargs) + + +def record_llm_attempted(**kwargs: Any) -> None: + get_ledger().llm_attempted(**kwargs) + + +def record_tool_attempted(**kwargs: Any) -> None: + get_ledger().tool_attempted(**kwargs) diff --git a/src/botanu/tracking/llm.py b/src/botanu/tracking/llm.py new file mode 100644 index 0000000..9ddccc4 --- /dev/null +++ b/src/botanu/tracking/llm.py @@ -0,0 +1,688 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""LLM/Model Tracking — Track AI model usage for cost attribution. + +Aligned with OpenTelemetry GenAI Semantic Conventions: +https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/ + +Usage:: + + from botanu.tracking.llm import track_llm_call, track_tool_call + + with track_llm_call(provider="openai", model="gpt-4") as tracker: + response = openai.chat.completions.create(...) + tracker.set_tokens( + input_tokens=response.usage.prompt_tokens, + output_tokens=response.usage.completion_tokens, + ) + tracker.set_request_id(response.id) +""" + +from __future__ import annotations + +import functools +from contextlib import contextmanager +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any, Dict, Generator, List, Optional + +from opentelemetry import metrics, trace +from opentelemetry.trace import Span, SpanKind, Status, StatusCode + +# ========================================================================= +# OTel GenAI Semantic Convention Attribute Names +# ========================================================================= + + +class GenAIAttributes: + """OpenTelemetry GenAI Semantic Convention attribute names.""" + + OPERATION_NAME = "gen_ai.operation.name" + PROVIDER_NAME = "gen_ai.provider.name" + REQUEST_MODEL = "gen_ai.request.model" + RESPONSE_MODEL = "gen_ai.response.model" + USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens" + USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens" + REQUEST_TEMPERATURE = "gen_ai.request.temperature" + REQUEST_TOP_P = "gen_ai.request.top_p" + REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens" + REQUEST_STOP_SEQUENCES = "gen_ai.request.stop_sequences" + REQUEST_FREQUENCY_PENALTY = "gen_ai.request.frequency_penalty" + REQUEST_PRESENCE_PENALTY = "gen_ai.request.presence_penalty" + RESPONSE_ID = "gen_ai.response.id" + RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons" + TOOL_NAME = "gen_ai.tool.name" + TOOL_CALL_ID = "gen_ai.tool.call.id" + ERROR_TYPE = "error.type" + + +class BotanuAttributes: + """Botanu-specific attributes for cost attribution.""" + + PROVIDER_REQUEST_ID = "botanu.provider.request_id" + CLIENT_REQUEST_ID = "botanu.provider.client_request_id" + TOKENS_CACHED = "botanu.usage.cached_tokens" + TOKENS_CACHED_READ = "botanu.usage.cache_read_tokens" + TOKENS_CACHED_WRITE = "botanu.usage.cache_write_tokens" + STREAMING = "botanu.request.streaming" + CACHE_HIT = "botanu.request.cache_hit" + ATTEMPT_NUMBER = "botanu.request.attempt" + TOOL_SUCCESS = "botanu.tool.success" + TOOL_ITEMS_RETURNED = "botanu.tool.items_returned" + TOOL_BYTES_PROCESSED = "botanu.tool.bytes_processed" + TOOL_DURATION_MS = "botanu.tool.duration_ms" + VENDOR = "botanu.vendor" + + +# ========================================================================= +# Provider name mapping +# ========================================================================= + +LLM_PROVIDERS: Dict[str, str] = { + "openai": "openai", + "azure_openai": "azure.openai", + "azure-openai": "azure.openai", + "azureopenai": "azure.openai", + "anthropic": "anthropic", + "claude": "anthropic", + "bedrock": "aws.bedrock", + "aws_bedrock": "aws.bedrock", + "amazon_bedrock": "aws.bedrock", + "vertex": "gcp.vertex_ai", + "vertexai": "gcp.vertex_ai", + "vertex_ai": "gcp.vertex_ai", + "gcp_vertex": "gcp.vertex_ai", + "gemini": "gcp.vertex_ai", + "google": "gcp.vertex_ai", + "cohere": "cohere", + "mistral": "mistral", + "mistralai": "mistral", + "together": "together", + "togetherai": "together", + "groq": "groq", + "replicate": "replicate", + "ollama": "ollama", + "huggingface": "huggingface", + "hf": "huggingface", + "fireworks": "fireworks", + "perplexity": "perplexity", +} + + +class ModelOperation: + """GenAI operation types per OTel semconv.""" + + CHAT = "chat" + TEXT_COMPLETION = "text_completion" + EMBEDDINGS = "embeddings" + GENERATE_CONTENT = "generate_content" + EXECUTE_TOOL = "execute_tool" + CREATE_AGENT = "create_agent" + INVOKE_AGENT = "invoke_agent" + RERANK = "rerank" + IMAGE_GENERATION = "image_generation" + IMAGE_EDIT = "image_edit" + SPEECH_TO_TEXT = "speech_to_text" + TEXT_TO_SPEECH = "text_to_speech" + MODERATION = "moderation" + + # Aliases + COMPLETION = "text_completion" + EMBEDDING = "embeddings" + FUNCTION_CALL = "execute_tool" + TOOL_USE = "execute_tool" + + +# ========================================================================= +# GenAI Metrics +# ========================================================================= + +_meter = metrics.get_meter("botanu.gen_ai") + +_token_usage_histogram = _meter.create_histogram( + name="gen_ai.client.token.usage", + description="Number of input and output tokens used", + unit="{token}", +) + +_operation_duration_histogram = _meter.create_histogram( + name="gen_ai.client.operation.duration", + description="GenAI operation duration", + unit="s", +) + +_attempt_counter = _meter.create_counter( + name="botanu.gen_ai.attempts", + description="Number of request attempts (including retries)", + unit="{attempt}", +) + + +def _record_token_metrics( + provider: str, + model: str, + operation: str, + input_tokens: int, + output_tokens: int, + error_type: Optional[str] = None, +) -> None: + base_attrs: Dict[str, str] = { + GenAIAttributes.OPERATION_NAME: operation, + GenAIAttributes.PROVIDER_NAME: provider, + GenAIAttributes.REQUEST_MODEL: model, + } + if error_type: + base_attrs[GenAIAttributes.ERROR_TYPE] = error_type + + if input_tokens > 0: + _token_usage_histogram.record( + input_tokens, + {**base_attrs, "gen_ai.token.type": "input"}, + ) + if output_tokens > 0: + _token_usage_histogram.record( + output_tokens, + {**base_attrs, "gen_ai.token.type": "output"}, + ) + + +def _record_duration_metric( + provider: str, + model: str, + operation: str, + duration_seconds: float, + error_type: Optional[str] = None, +) -> None: + attrs: Dict[str, str] = { + GenAIAttributes.OPERATION_NAME: operation, + GenAIAttributes.PROVIDER_NAME: provider, + GenAIAttributes.REQUEST_MODEL: model, + } + if error_type: + attrs[GenAIAttributes.ERROR_TYPE] = error_type + + _operation_duration_histogram.record(duration_seconds, attrs) + + +# ========================================================================= +# LLM Tracker +# ========================================================================= + + +@dataclass +class LLMTracker: + """Context manager for tracking LLM calls with OTel GenAI semconv.""" + + provider: str + model: str + operation: str = ModelOperation.CHAT + span: Optional[Span] = field(default=None, repr=False) + start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + + input_tokens: int = 0 + output_tokens: int = 0 + cached_tokens: int = 0 + cache_read_tokens: int = 0 + cache_write_tokens: int = 0 + + provider_request_id: Optional[str] = None + client_request_id: Optional[str] = None + response_model: Optional[str] = None + finish_reason: Optional[str] = None + is_streaming: bool = False + cache_hit: bool = False + attempt_number: int = 1 + error_type: Optional[str] = None + + def set_tokens( + self, + input_tokens: int = 0, + output_tokens: int = 0, + cached_tokens: int = 0, + cache_read_tokens: int = 0, + cache_write_tokens: int = 0, + ) -> LLMTracker: + """Set token counts from model response.""" + self.input_tokens = input_tokens + self.output_tokens = output_tokens + self.cached_tokens = cached_tokens or cache_read_tokens + self.cache_read_tokens = cache_read_tokens + self.cache_write_tokens = cache_write_tokens + + if self.span: + self.span.set_attribute(GenAIAttributes.USAGE_INPUT_TOKENS, input_tokens) + self.span.set_attribute(GenAIAttributes.USAGE_OUTPUT_TOKENS, output_tokens) + if self.cached_tokens > 0: + self.span.set_attribute(BotanuAttributes.TOKENS_CACHED, self.cached_tokens) + if cache_read_tokens > 0: + self.span.set_attribute(BotanuAttributes.TOKENS_CACHED_READ, cache_read_tokens) + if cache_write_tokens > 0: + self.span.set_attribute(BotanuAttributes.TOKENS_CACHED_WRITE, cache_write_tokens) + return self + + def set_request_id( + self, + provider_request_id: Optional[str] = None, + client_request_id: Optional[str] = None, + ) -> LLMTracker: + """Set provider request IDs for billing reconciliation.""" + if provider_request_id: + self.provider_request_id = provider_request_id + if self.span: + self.span.set_attribute(GenAIAttributes.RESPONSE_ID, provider_request_id) + self.span.set_attribute(BotanuAttributes.PROVIDER_REQUEST_ID, provider_request_id) + if client_request_id: + self.client_request_id = client_request_id + if self.span: + self.span.set_attribute(BotanuAttributes.CLIENT_REQUEST_ID, client_request_id) + return self + + def set_response_model(self, model: str) -> LLMTracker: + """Set the actual model used in the response.""" + self.response_model = model + if self.span: + self.span.set_attribute(GenAIAttributes.RESPONSE_MODEL, model) + return self + + def set_finish_reason(self, reason: str) -> LLMTracker: + """Set the finish/stop reason from the response.""" + self.finish_reason = reason + if self.span: + self.span.set_attribute(GenAIAttributes.RESPONSE_FINISH_REASONS, [reason]) + return self + + def set_streaming(self, is_streaming: bool = True) -> LLMTracker: + """Mark request as streaming.""" + self.is_streaming = is_streaming + if self.span: + self.span.set_attribute(BotanuAttributes.STREAMING, is_streaming) + return self + + def set_cache_hit(self, cache_hit: bool = True) -> LLMTracker: + """Mark as cache hit.""" + self.cache_hit = cache_hit + if self.span: + self.span.set_attribute(BotanuAttributes.CACHE_HIT, cache_hit) + return self + + def set_attempt(self, attempt_number: int) -> LLMTracker: + """Set the attempt number (for retry tracking).""" + self.attempt_number = attempt_number + if self.span: + self.span.set_attribute(BotanuAttributes.ATTEMPT_NUMBER, attempt_number) + return self + + def set_request_params( + self, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_tokens: Optional[int] = None, + stop_sequences: Optional[List[str]] = None, + frequency_penalty: Optional[float] = None, + presence_penalty: Optional[float] = None, + ) -> LLMTracker: + """Set request parameters per OTel GenAI semconv.""" + if self.span: + if temperature is not None: + self.span.set_attribute(GenAIAttributes.REQUEST_TEMPERATURE, temperature) + if top_p is not None: + self.span.set_attribute(GenAIAttributes.REQUEST_TOP_P, top_p) + if max_tokens is not None: + self.span.set_attribute(GenAIAttributes.REQUEST_MAX_TOKENS, max_tokens) + if stop_sequences is not None: + self.span.set_attribute(GenAIAttributes.REQUEST_STOP_SEQUENCES, stop_sequences) + if frequency_penalty is not None: + self.span.set_attribute(GenAIAttributes.REQUEST_FREQUENCY_PENALTY, frequency_penalty) + if presence_penalty is not None: + self.span.set_attribute(GenAIAttributes.REQUEST_PRESENCE_PENALTY, presence_penalty) + return self + + def set_error(self, error: Exception) -> LLMTracker: + """Record an error from the LLM call.""" + self.error_type = type(error).__name__ + if self.span: + self.span.set_status(Status(StatusCode.ERROR, str(error))) + self.span.set_attribute(GenAIAttributes.ERROR_TYPE, self.error_type) + self.span.record_exception(error) + return self + + def add_metadata(self, **kwargs: Any) -> LLMTracker: + """Add custom metadata to the span.""" + if self.span: + for key, value in kwargs.items(): + attr_key = key if key.startswith(("botanu.", "gen_ai.")) else f"botanu.{key}" + self.span.set_attribute(attr_key, value) + return self + + def _finalize(self) -> None: + if not self.span: + return + + duration_seconds = (datetime.now(timezone.utc) - self.start_time).total_seconds() + + _record_token_metrics( + provider=self.provider, + model=self.model, + operation=self.operation, + input_tokens=self.input_tokens, + output_tokens=self.output_tokens, + error_type=self.error_type, + ) + _record_duration_metric( + provider=self.provider, + model=self.model, + operation=self.operation, + duration_seconds=duration_seconds, + error_type=self.error_type, + ) + _attempt_counter.add( + 1, + { + GenAIAttributes.PROVIDER_NAME: self.provider, + GenAIAttributes.REQUEST_MODEL: self.model, + GenAIAttributes.OPERATION_NAME: self.operation, + "status": "error" if self.error_type else "success", + }, + ) + + +@contextmanager +def track_llm_call( + provider: str, + model: str, + operation: str = ModelOperation.CHAT, + client_request_id: Optional[str] = None, + **kwargs: Any, +) -> Generator[LLMTracker, None, None]: + """Context manager for tracking LLM/model calls with OTel GenAI semconv. + + Args: + provider: LLM provider (openai, anthropic, bedrock, vertex, …). + model: Model name/ID (gpt-4, claude-3-opus, …). + operation: Type of operation (chat, embeddings, text_completion, …). + client_request_id: Optional client-generated request ID. + **kwargs: Additional span attributes. + + Yields: + :class:`LLMTracker` instance. + """ + tracer = trace.get_tracer("botanu.gen_ai") + normalized_provider = LLM_PROVIDERS.get(provider.lower(), provider.lower()) + span_name = f"{operation} {model}" + + with tracer.start_as_current_span(name=span_name, kind=SpanKind.CLIENT) as span: + span.set_attribute(GenAIAttributes.OPERATION_NAME, operation) + span.set_attribute(GenAIAttributes.PROVIDER_NAME, normalized_provider) + span.set_attribute(GenAIAttributes.REQUEST_MODEL, model) + span.set_attribute(BotanuAttributes.VENDOR, normalized_provider) + + for key, value in kwargs.items(): + attr_key = key if key.startswith(("botanu.", "gen_ai.")) else f"botanu.{key}" + span.set_attribute(attr_key, value) + + tracker = LLMTracker( + provider=normalized_provider, + model=model, + operation=operation, + span=span, + ) + if client_request_id: + tracker.set_request_id(client_request_id=client_request_id) + + try: + yield tracker + except Exception as exc: + tracker.set_error(exc) + raise + finally: + tracker._finalize() + + +# ========================================================================= +# Tool/Function Call Tracker +# ========================================================================= + +_tool_duration_histogram = _meter.create_histogram( + name="botanu.tool.duration", + description="Tool execution duration", + unit="s", +) + +_tool_counter = _meter.create_counter( + name="botanu.tool.executions", + description="Number of tool executions", + unit="{execution}", +) + + +@dataclass +class ToolTracker: + """Context manager for tracking tool/function calls.""" + + tool_name: str + tool_call_id: Optional[str] = None + provider: Optional[str] = None + span: Optional[Span] = field(default=None, repr=False) + start_time: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + + success: bool = True + items_returned: int = 0 + bytes_processed: int = 0 + error_type: Optional[str] = None + + def set_result( + self, + success: bool = True, + items_returned: int = 0, + bytes_processed: int = 0, + ) -> ToolTracker: + """Set tool execution result.""" + self.success = success + self.items_returned = items_returned + self.bytes_processed = bytes_processed + if self.span: + self.span.set_attribute(BotanuAttributes.TOOL_SUCCESS, success) + if items_returned > 0: + self.span.set_attribute(BotanuAttributes.TOOL_ITEMS_RETURNED, items_returned) + if bytes_processed > 0: + self.span.set_attribute(BotanuAttributes.TOOL_BYTES_PROCESSED, bytes_processed) + return self + + def set_tool_call_id(self, tool_call_id: str) -> ToolTracker: + """Set the tool call ID from the LLM response.""" + self.tool_call_id = tool_call_id + if self.span: + self.span.set_attribute(GenAIAttributes.TOOL_CALL_ID, tool_call_id) + return self + + def set_error(self, error: Exception) -> ToolTracker: + """Record tool execution error.""" + self.success = False + self.error_type = type(error).__name__ + if self.span: + self.span.set_status(Status(StatusCode.ERROR, str(error))) + self.span.set_attribute(GenAIAttributes.ERROR_TYPE, self.error_type) + self.span.record_exception(error) + return self + + def add_metadata(self, **kwargs: Any) -> ToolTracker: + """Add custom metadata to the span.""" + if self.span: + for key, value in kwargs.items(): + attr_key = key if key.startswith(("botanu.", "gen_ai.")) else f"botanu.tool.{key}" + self.span.set_attribute(attr_key, value) + return self + + def _finalize(self) -> None: + if not self.span: + return + duration_seconds = (datetime.now(timezone.utc) - self.start_time).total_seconds() + self.span.set_attribute(BotanuAttributes.TOOL_DURATION_MS, duration_seconds * 1000) + + attrs: Dict[str, str] = { + GenAIAttributes.TOOL_NAME: self.tool_name, + "status": "error" if self.error_type else "success", + } + if self.provider: + attrs[GenAIAttributes.PROVIDER_NAME] = self.provider + + _tool_duration_histogram.record(duration_seconds, attrs) + _tool_counter.add(1, attrs) + + +@contextmanager +def track_tool_call( + tool_name: str, + tool_call_id: Optional[str] = None, + provider: Optional[str] = None, + **kwargs: Any, +) -> Generator[ToolTracker, None, None]: + """Context manager for tracking tool/function calls. + + Args: + tool_name: Name of the tool/function. + tool_call_id: Tool call ID from the LLM response. + provider: Tool provider if external (e.g., ``"tavily"``). + **kwargs: Additional span attributes. + + Yields: + :class:`ToolTracker` instance. + """ + tracer = trace.get_tracer("botanu.gen_ai") + span_name = f"execute_tool {tool_name}" + + with tracer.start_as_current_span(name=span_name, kind=SpanKind.INTERNAL) as span: + span.set_attribute(GenAIAttributes.OPERATION_NAME, ModelOperation.EXECUTE_TOOL) + span.set_attribute(GenAIAttributes.TOOL_NAME, tool_name) + + if tool_call_id: + span.set_attribute(GenAIAttributes.TOOL_CALL_ID, tool_call_id) + if provider: + normalized = LLM_PROVIDERS.get(provider.lower(), provider.lower()) + span.set_attribute(GenAIAttributes.PROVIDER_NAME, normalized) + span.set_attribute(BotanuAttributes.VENDOR, normalized) + + for key, value in kwargs.items(): + attr_key = key if key.startswith(("botanu.", "gen_ai.")) else f"botanu.tool.{key}" + span.set_attribute(attr_key, value) + + tracker = ToolTracker( + tool_name=tool_name, + tool_call_id=tool_call_id, + provider=provider, + span=span, + ) + + try: + yield tracker + except Exception as exc: + tracker.set_error(exc) + raise + finally: + tracker._finalize() + + +# ========================================================================= +# Standalone Helpers +# ========================================================================= + + +def set_llm_attributes( + provider: str, + model: str, + operation: str = ModelOperation.CHAT, + input_tokens: int = 0, + output_tokens: int = 0, + cached_tokens: int = 0, + streaming: bool = False, + provider_request_id: Optional[str] = None, + span: Optional[Span] = None, +) -> None: + """Set LLM attributes on the current span using OTel GenAI semconv.""" + target_span = span or trace.get_current_span() + if not target_span or not target_span.is_recording(): + return + + normalized_provider = LLM_PROVIDERS.get(provider.lower(), provider.lower()) + + target_span.set_attribute(GenAIAttributes.OPERATION_NAME, operation) + target_span.set_attribute(GenAIAttributes.PROVIDER_NAME, normalized_provider) + target_span.set_attribute(GenAIAttributes.REQUEST_MODEL, model) + target_span.set_attribute(BotanuAttributes.VENDOR, normalized_provider) + + if input_tokens > 0: + target_span.set_attribute(GenAIAttributes.USAGE_INPUT_TOKENS, input_tokens) + if output_tokens > 0: + target_span.set_attribute(GenAIAttributes.USAGE_OUTPUT_TOKENS, output_tokens) + if cached_tokens > 0: + target_span.set_attribute(BotanuAttributes.TOKENS_CACHED, cached_tokens) + if streaming: + target_span.set_attribute(BotanuAttributes.STREAMING, True) + if provider_request_id: + target_span.set_attribute(GenAIAttributes.RESPONSE_ID, provider_request_id) + target_span.set_attribute(BotanuAttributes.PROVIDER_REQUEST_ID, provider_request_id) + + _record_token_metrics( + provider=normalized_provider, + model=model, + operation=operation, + input_tokens=input_tokens, + output_tokens=output_tokens, + ) + + +def set_token_usage( + input_tokens: int, + output_tokens: int, + cached_tokens: int = 0, + span: Optional[Span] = None, +) -> None: + """Set token usage on the current span using OTel GenAI semconv.""" + target_span = span or trace.get_current_span() + if not target_span or not target_span.is_recording(): + return + + target_span.set_attribute(GenAIAttributes.USAGE_INPUT_TOKENS, input_tokens) + target_span.set_attribute(GenAIAttributes.USAGE_OUTPUT_TOKENS, output_tokens) + + if cached_tokens > 0: + target_span.set_attribute(BotanuAttributes.TOKENS_CACHED, cached_tokens) + + +def llm_instrumented( + provider: str, + model_param: str = "model", + tokens_from_response: bool = True, +) -> Any: + """Decorator to auto-instrument LLM client methods. + + Args: + provider: LLM provider name. + model_param: Name of the parameter containing the model name. + tokens_from_response: Whether to extract tokens from ``response.usage``. + """ + + def decorator(func: Any) -> Any: + @functools.wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + model = kwargs.get(model_param) or (args[1] if len(args) > 1 else "unknown") + + with track_llm_call(provider, model) as tracker: + if kwargs.get("stream"): + tracker.set_streaming(True) + + response = func(*args, **kwargs) + + if tokens_from_response and hasattr(response, "usage"): + usage = response.usage + tracker.set_tokens( + input_tokens=getattr(usage, "prompt_tokens", 0) or getattr(usage, "input_tokens", 0), + output_tokens=getattr(usage, "completion_tokens", 0) or getattr(usage, "output_tokens", 0), + ) + + return response + + return wrapper + + return decorator diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e6ae60f --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..202e424 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Shared test fixtures for Botanu SDK tests.""" + +from __future__ import annotations + +import pytest +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from opentelemetry.sdk.trace.sampling import ALWAYS_ON + +# Module-level provider and exporter to avoid "cannot override" warnings +_provider: TracerProvider = None +_exporter: InMemorySpanExporter = None + + +def _get_or_create_provider() -> tuple[TracerProvider, InMemorySpanExporter]: + """Get or create the global test provider.""" + global _provider, _exporter + + if _provider is None: + _provider = TracerProvider(sampler=ALWAYS_ON) + _exporter = InMemorySpanExporter() + _provider.add_span_processor(SimpleSpanProcessor(_exporter)) + trace.set_tracer_provider(_provider) + + return _provider, _exporter + + +@pytest.fixture(autouse=True) +def reset_tracing(): + """Reset tracing state before each test.""" + _, exporter = _get_or_create_provider() + exporter.clear() + yield + exporter.clear() + + +@pytest.fixture +def tracer_provider(): + """Get the test TracerProvider.""" + provider, _ = _get_or_create_provider() + return provider + + +@pytest.fixture +def memory_exporter(): + """Get the in-memory span exporter for testing.""" + _, exporter = _get_or_create_provider() + return exporter + + +@pytest.fixture +def tracer(tracer_provider): + """Get a tracer instance.""" + return trace.get_tracer("test-tracer") diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..e6ae60f --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e6ae60f --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1,2 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 diff --git a/tests/unit/test_bootstrap.py b/tests/unit/test_bootstrap.py new file mode 100644 index 0000000..ac0a2c9 --- /dev/null +++ b/tests/unit/test_bootstrap.py @@ -0,0 +1,670 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for bootstrap module — enable(), auto-instrumentation, and config env var precedence.""" + +from __future__ import annotations + +import os +from unittest import mock + +from botanu.sdk.config import BotanuConfig + +# --------------------------------------------------------------------------- +# Config env-var precedence: BOTANU_* > OTEL_* > defaults +# --------------------------------------------------------------------------- + + +class TestConfigBotanuEnvPrecedence: + """BOTANU_* env vars take precedence over OTEL_* equivalents.""" + + def test_botanu_service_name_over_otel(self): + env = { + "BOTANU_SERVICE_NAME": "botanu-svc", + "OTEL_SERVICE_NAME": "otel-svc", + } + with mock.patch.dict(os.environ, env, clear=False): + cfg = BotanuConfig() + assert cfg.service_name == "botanu-svc" + + def test_otel_service_name_fallback(self): + env = {"OTEL_SERVICE_NAME": "otel-svc"} + with mock.patch.dict(os.environ, env, clear=False): + for key in ["BOTANU_SERVICE_NAME"]: + os.environ.pop(key, None) + cfg = BotanuConfig() + assert cfg.service_name == "otel-svc" + + def test_service_name_default(self): + with mock.patch.dict(os.environ, {}, clear=True): + cfg = BotanuConfig() + assert cfg.service_name == "unknown_service" + + def test_botanu_collector_endpoint_over_otel(self): + env = { + "BOTANU_COLLECTOR_ENDPOINT": "http://botanu-collector:4318", + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://otel-collector:4318", + } + with mock.patch.dict(os.environ, env, clear=False): + cfg = BotanuConfig() + assert cfg.otlp_endpoint == "http://botanu-collector:4318" + + def test_otel_exporter_endpoint_fallback(self): + env = {"OTEL_EXPORTER_OTLP_ENDPOINT": "http://otel-collector:4318"} + with mock.patch.dict(os.environ, env, clear=False): + for key in ["BOTANU_COLLECTOR_ENDPOINT", "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"]: + os.environ.pop(key, None) + cfg = BotanuConfig() + assert cfg.otlp_endpoint == "http://otel-collector:4318" + + def test_otel_traces_endpoint_over_base_endpoint(self): + env = { + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": "http://traces:4318/v1/traces", + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://base:4318", + } + with mock.patch.dict(os.environ, env, clear=False): + for key in ["BOTANU_COLLECTOR_ENDPOINT"]: + os.environ.pop(key, None) + cfg = BotanuConfig() + assert cfg.otlp_endpoint == "http://traces:4318/v1/traces" + + def test_endpoint_default_localhost(self): + with mock.patch.dict(os.environ, {}, clear=True): + cfg = BotanuConfig() + assert cfg.otlp_endpoint == "http://localhost:4318" + + def test_botanu_environment_over_otel(self): + env = { + "BOTANU_ENVIRONMENT": "botanu-staging", + "OTEL_DEPLOYMENT_ENVIRONMENT": "otel-prod", + } + with mock.patch.dict(os.environ, env, clear=False): + cfg = BotanuConfig() + assert cfg.deployment_environment == "botanu-staging" + + def test_otel_deployment_environment_fallback(self): + env = {"OTEL_DEPLOYMENT_ENVIRONMENT": "otel-prod"} + with mock.patch.dict(os.environ, env, clear=False): + for key in ["BOTANU_ENVIRONMENT"]: + os.environ.pop(key, None) + cfg = BotanuConfig() + assert cfg.deployment_environment == "otel-prod" + + def test_environment_default_production(self): + with mock.patch.dict(os.environ, {}, clear=True): + cfg = BotanuConfig() + assert cfg.deployment_environment == "production" + + def test_explicit_args_override_all_env(self): + env = { + "BOTANU_SERVICE_NAME": "env-name", + "BOTANU_COLLECTOR_ENDPOINT": "http://env:4318", + "BOTANU_ENVIRONMENT": "env-staging", + } + with mock.patch.dict(os.environ, env, clear=False): + cfg = BotanuConfig( + service_name="explicit-name", + otlp_endpoint="http://explicit:4318", + deployment_environment="explicit-staging", + ) + assert cfg.service_name == "explicit-name" + assert cfg.otlp_endpoint == "http://explicit:4318" + assert cfg.deployment_environment == "explicit-staging" + + +# --------------------------------------------------------------------------- +# Config: propagation mode +# --------------------------------------------------------------------------- + + +class TestConfigPropagationMode: + """Tests for propagation mode configuration.""" + + def test_default_lean(self): + with mock.patch.dict(os.environ, {}, clear=True): + cfg = BotanuConfig() + assert cfg.propagation_mode == "lean" + + def test_env_var_full(self): + with mock.patch.dict(os.environ, {"BOTANU_PROPAGATION_MODE": "full"}): + cfg = BotanuConfig() + assert cfg.propagation_mode == "full" + + def test_env_var_lean(self): + with mock.patch.dict(os.environ, {"BOTANU_PROPAGATION_MODE": "lean"}): + cfg = BotanuConfig() + assert cfg.propagation_mode == "lean" + + def test_invalid_propagation_mode_ignored(self): + with mock.patch.dict(os.environ, {"BOTANU_PROPAGATION_MODE": "invalid"}): + cfg = BotanuConfig() + assert cfg.propagation_mode == "lean" + + +# --------------------------------------------------------------------------- +# Config: auto-detect resources +# --------------------------------------------------------------------------- + + +class TestConfigAutoDetectResources: + """Tests for auto-detect resources toggle.""" + + def test_default_true(self): + with mock.patch.dict(os.environ, {}, clear=True): + cfg = BotanuConfig() + assert cfg.auto_detect_resources is True + + def test_env_disable(self): + with mock.patch.dict(os.environ, {"BOTANU_AUTO_DETECT_RESOURCES": "false"}): + cfg = BotanuConfig() + assert cfg.auto_detect_resources is False + + def test_env_enable(self): + with mock.patch.dict(os.environ, {"BOTANU_AUTO_DETECT_RESOURCES": "true"}): + cfg = BotanuConfig() + assert cfg.auto_detect_resources is True + + def test_env_numeric(self): + with mock.patch.dict(os.environ, {"BOTANU_AUTO_DETECT_RESOURCES": "0"}): + cfg = BotanuConfig() + assert cfg.auto_detect_resources is False + + +# --------------------------------------------------------------------------- +# Bootstrap: auto-instrumentation coverage +# --------------------------------------------------------------------------- + + +class TestAutoInstrumentationCoverage: + """Verify all expected instrumentations are wired in _enable_auto_instrumentation.""" + + def _get_instrumentation_names(self) -> list[str]: + """Extract all instrumentation names from the bootstrap source.""" + import ast + import inspect + + from botanu.sdk.bootstrap import _enable_auto_instrumentation + + source = inspect.getsource(_enable_auto_instrumentation) + names: list[str] = [] + # Parse all _try_instrument calls and extract the 'name' argument + tree = ast.parse(source) + for node in ast.walk(tree): + if isinstance(node, ast.Call) and isinstance(node.func, ast.Name): + if node.func.id == "_try_instrument" and len(node.args) >= 3: + name_arg = node.args[2] + if isinstance(name_arg, ast.Constant): + names.append(name_arg.value) + return names + + # ── HTTP clients ────────────────────────────────────────────── + + def test_httpx_instrumented(self): + assert "httpx" in self._get_instrumentation_names() + + def test_requests_instrumented(self): + assert "requests" in self._get_instrumentation_names() + + def test_urllib3_instrumented(self): + assert "urllib3" in self._get_instrumentation_names() + + def test_urllib_instrumented(self): + assert "urllib" in self._get_instrumentation_names() + + def test_aiohttp_client_instrumented(self): + assert "aiohttp_client" in self._get_instrumentation_names() + + def test_aiohttp_server_instrumented(self): + assert "aiohttp_server" in self._get_instrumentation_names() + + # ── Web frameworks ──────────────────────────────────────────── + + def test_fastapi_instrumented(self): + assert "fastapi" in self._get_instrumentation_names() + + def test_flask_instrumented(self): + assert "flask" in self._get_instrumentation_names() + + def test_django_instrumented(self): + assert "django" in self._get_instrumentation_names() + + def test_starlette_instrumented(self): + assert "starlette" in self._get_instrumentation_names() + + def test_falcon_instrumented(self): + assert "falcon" in self._get_instrumentation_names() + + def test_pyramid_instrumented(self): + assert "pyramid" in self._get_instrumentation_names() + + def test_tornado_instrumented(self): + assert "tornado" in self._get_instrumentation_names() + + # ── Databases ───────────────────────────────────────────────── + + def test_sqlalchemy_instrumented(self): + assert "sqlalchemy" in self._get_instrumentation_names() + + def test_psycopg2_instrumented(self): + assert "psycopg2" in self._get_instrumentation_names() + + def test_psycopg_instrumented(self): + assert "psycopg" in self._get_instrumentation_names() + + def test_asyncpg_instrumented(self): + assert "asyncpg" in self._get_instrumentation_names() + + def test_aiopg_instrumented(self): + assert "aiopg" in self._get_instrumentation_names() + + def test_pymongo_instrumented(self): + assert "pymongo" in self._get_instrumentation_names() + + def test_redis_instrumented(self): + assert "redis" in self._get_instrumentation_names() + + def test_mysql_instrumented(self): + assert "mysql" in self._get_instrumentation_names() + + def test_pymysql_instrumented(self): + assert "pymysql" in self._get_instrumentation_names() + + def test_sqlite3_instrumented(self): + assert "sqlite3" in self._get_instrumentation_names() + + def test_elasticsearch_instrumented(self): + assert "elasticsearch" in self._get_instrumentation_names() + + def test_cassandra_instrumented(self): + assert "cassandra" in self._get_instrumentation_names() + + # ── Caching ─────────────────────────────────────────────────── + + def test_pymemcache_instrumented(self): + assert "pymemcache" in self._get_instrumentation_names() + + # ── Messaging ───────────────────────────────────────────────── + + def test_celery_instrumented(self): + assert "celery" in self._get_instrumentation_names() + + def test_kafka_python_instrumented(self): + assert "kafka-python" in self._get_instrumentation_names() + + def test_confluent_kafka_instrumented(self): + assert "confluent-kafka" in self._get_instrumentation_names() + + def test_aiokafka_instrumented(self): + assert "aiokafka" in self._get_instrumentation_names() + + def test_pika_instrumented(self): + assert "pika" in self._get_instrumentation_names() + + def test_aio_pika_instrumented(self): + assert "aio-pika" in self._get_instrumentation_names() + + # ── AWS ─────────────────────────────────────────────────────── + + def test_botocore_instrumented(self): + assert "botocore" in self._get_instrumentation_names() + + def test_boto3sqs_instrumented(self): + assert "boto3sqs" in self._get_instrumentation_names() + + # ── GenAI / AI ──────────────────────────────────────────────── + + def test_openai_instrumented(self): + assert "openai" in self._get_instrumentation_names() + + def test_anthropic_instrumented(self): + assert "anthropic" in self._get_instrumentation_names() + + def test_vertexai_instrumented(self): + assert "vertexai" in self._get_instrumentation_names() + + def test_google_genai_instrumented(self): + assert "google_genai" in self._get_instrumentation_names() + + def test_langchain_instrumented(self): + assert "langchain" in self._get_instrumentation_names() + + def test_ollama_instrumented(self): + assert "ollama" in self._get_instrumentation_names() + + def test_crewai_instrumented(self): + assert "crewai" in self._get_instrumentation_names() + + # ── Runtime ─────────────────────────────────────────────────── + + def test_logging_instrumented(self): + assert "logging" in self._get_instrumentation_names() + + def test_threading_instrumented(self): + assert "threading" in self._get_instrumentation_names() + + def test_asyncio_instrumented(self): + assert "asyncio" in self._get_instrumentation_names() + + +# --------------------------------------------------------------------------- +# Bootstrap: _try_instrument resilience +# --------------------------------------------------------------------------- + + +class TestTryInstrument: + """Tests for _try_instrument helper function.""" + + def test_missing_package_silently_skipped(self): + from botanu.sdk.bootstrap import _try_instrument + + enabled: list[str] = [] + failed: list[tuple[str, str]] = [] + _try_instrument(enabled, failed, "nonexistent", "nonexistent.module", "FooInstrumentor") + assert enabled == [] + assert failed == [] + + def test_instrument_error_recorded(self): + from botanu.sdk.bootstrap import _try_instrument + + enabled: list[str] = [] + failed: list[tuple[str, str]] = [] + # os module exists but has no 'FooInstrumentor' class + _try_instrument(enabled, failed, "os_fake", "os", "FooInstrumentor") + assert enabled == [] + assert len(failed) == 1 + assert failed[0][0] == "os_fake" + + +# --------------------------------------------------------------------------- +# Bootstrap: enable() / disable() / is_enabled() +# --------------------------------------------------------------------------- + + +class TestEnableDisable: + """Tests for bootstrap enable/disable lifecycle.""" + + def test_is_enabled_initially_false(self): + from botanu.sdk import bootstrap + + # Save and reset state + original = bootstrap._initialized + bootstrap._initialized = False + try: + assert bootstrap.is_enabled() is False + finally: + bootstrap._initialized = original + + def test_get_config_returns_none_when_not_initialized(self): + from botanu.sdk import bootstrap + + original_init = bootstrap._initialized + original_cfg = bootstrap._current_config + bootstrap._initialized = False + bootstrap._current_config = None + try: + assert bootstrap.get_config() is None + finally: + bootstrap._initialized = original_init + bootstrap._current_config = original_cfg + + +# --------------------------------------------------------------------------- +# Bootstrap: endpoint normalization in bootstrap +# --------------------------------------------------------------------------- + + +class TestEndpointNormalization: + """Verify bootstrap appends /v1/traces when needed.""" + + def test_base_endpoint_gets_v1_traces_appended(self): + """Config stores base URL; bootstrap should append /v1/traces.""" + with mock.patch.dict(os.environ, {"OTEL_EXPORTER_OTLP_ENDPOINT": "http://collector:4318"}, clear=True): + cfg = BotanuConfig() + assert cfg.otlp_endpoint == "http://collector:4318" + + # Simulate what bootstrap does + traces_endpoint = cfg.otlp_endpoint + if traces_endpoint and not traces_endpoint.endswith("/v1/traces"): + traces_endpoint = f"{traces_endpoint.rstrip('/')}/v1/traces" + assert traces_endpoint == "http://collector:4318/v1/traces" + + def test_traces_endpoint_not_doubled(self): + """If already ends with /v1/traces, don't append again.""" + with mock.patch.dict( + os.environ, + {"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": "http://collector:4318/v1/traces"}, + clear=True, + ): + cfg = BotanuConfig() + assert cfg.otlp_endpoint == "http://collector:4318/v1/traces" + + traces_endpoint = cfg.otlp_endpoint + if traces_endpoint and not traces_endpoint.endswith("/v1/traces"): + traces_endpoint = f"{traces_endpoint.rstrip('/')}/v1/traces" + assert traces_endpoint == "http://collector:4318/v1/traces" + + def test_botanu_endpoint_gets_v1_traces_appended(self): + """BOTANU_COLLECTOR_ENDPOINT also gets /v1/traces appended by bootstrap.""" + with mock.patch.dict( + os.environ, + {"BOTANU_COLLECTOR_ENDPOINT": "http://my-collector:4318"}, + clear=True, + ): + cfg = BotanuConfig() + assert cfg.otlp_endpoint == "http://my-collector:4318" + + traces_endpoint = cfg.otlp_endpoint + if traces_endpoint and not traces_endpoint.endswith("/v1/traces"): + traces_endpoint = f"{traces_endpoint.rstrip('/')}/v1/traces" + assert traces_endpoint == "http://my-collector:4318/v1/traces" + + def test_trailing_slash_handled(self): + """Trailing slash on base endpoint should not cause double slash.""" + with mock.patch.dict( + os.environ, + {"OTEL_EXPORTER_OTLP_ENDPOINT": "http://collector:4318/"}, + clear=True, + ): + cfg = BotanuConfig() + traces_endpoint = cfg.otlp_endpoint + if traces_endpoint and not traces_endpoint.endswith("/v1/traces"): + traces_endpoint = f"{traces_endpoint.rstrip('/')}/v1/traces" + assert traces_endpoint == "http://collector:4318/v1/traces" + + +# --------------------------------------------------------------------------- +# Bootstrap: thread safety +# --------------------------------------------------------------------------- + + +class TestBootstrapThreadSafety: + """Verify that enable() is thread-safe.""" + + def test_lock_exists(self): + """Bootstrap module must have a threading lock.""" + import threading + + from botanu.sdk import bootstrap + + assert hasattr(bootstrap, "_lock") + assert isinstance(bootstrap._lock, type(threading.RLock())) + + def test_concurrent_enable_only_initializes_once(self): + """Multiple threads calling enable() simultaneously should not race.""" + import threading + + from botanu.sdk import bootstrap + + # Reset state + original_init = bootstrap._initialized + original_cfg = bootstrap._current_config + bootstrap._initialized = False + bootstrap._current_config = None + + results = [] + barrier = threading.Barrier(5) + + def call_enable(): + barrier.wait() + try: + result = bootstrap.enable( + service_name="thread-test", + otlp_endpoint="http://localhost:4318", + auto_instrumentation=False, + ) + results.append(result) + except Exception: + results.append(None) + + try: + threads = [threading.Thread(target=call_enable) for _ in range(5)] + for t in threads: + t.start() + for t in threads: + t.join(timeout=10) + + # Exactly one thread should get True (first to init), rest get False + true_count = sum(1 for r in results if r is True) + false_count = sum(1 for r in results if r is False) + assert true_count == 1, f"Expected exactly 1 True, got {true_count}" + assert false_count == 4, f"Expected 4 False, got {false_count}" + finally: + bootstrap._initialized = original_init + bootstrap._current_config = original_cfg + + +# --------------------------------------------------------------------------- +# Bootstrap: full lifecycle +# --------------------------------------------------------------------------- + + +class TestBootstrapLifecycle: + """Tests for enable/disable full lifecycle.""" + + def test_disable_when_not_initialized_is_noop(self): + from botanu.sdk import bootstrap + + original = bootstrap._initialized + bootstrap._initialized = False + try: + bootstrap.disable() # Should not raise + finally: + bootstrap._initialized = original + + def test_disable_clears_config(self): + from botanu.sdk import bootstrap + + original_init = bootstrap._initialized + original_cfg = bootstrap._current_config + + bootstrap._initialized = True + bootstrap._current_config = BotanuConfig(service_name="test") + + try: + # Mock the tracer provider to avoid shutting down the real test provider + mock_provider = mock.MagicMock() + with mock.patch("opentelemetry.trace.get_tracer_provider", return_value=mock_provider): + bootstrap.disable() + assert bootstrap._current_config is None + assert bootstrap._initialized is False + mock_provider.force_flush.assert_called_once() + mock_provider.shutdown.assert_called_once() + finally: + bootstrap._initialized = original_init + bootstrap._current_config = original_cfg + + def test_is_enabled_reflects_state(self): + from botanu.sdk import bootstrap + + original = bootstrap._initialized + + try: + bootstrap._initialized = True + assert bootstrap.is_enabled() is True + bootstrap._initialized = False + assert bootstrap.is_enabled() is False + finally: + bootstrap._initialized = original + + def test_get_config_returns_config_when_set(self): + from botanu.sdk import bootstrap + + original_init = bootstrap._initialized + original_cfg = bootstrap._current_config + + test_cfg = BotanuConfig(service_name="my-svc") + bootstrap._current_config = test_cfg + + try: + assert bootstrap.get_config() is test_cfg + finally: + bootstrap._initialized = original_init + bootstrap._current_config = original_cfg + + +# --------------------------------------------------------------------------- +# Bootstrap: no-sampling guarantee +# --------------------------------------------------------------------------- + + +class TestNoSamplingGuarantee: + """Botanu NEVER samples or drops spans.""" + + def test_always_on_sampler_in_bootstrap(self): + """Bootstrap source must use ALWAYS_ON sampler explicitly.""" + import inspect + + from botanu.sdk import bootstrap + + source = inspect.getsource(bootstrap.enable) + assert "ALWAYS_ON" in source, "enable() must use ALWAYS_ON sampler" + assert "sampler=ALWAYS_ON" in source, "TracerProvider must have sampler=ALWAYS_ON" + + def test_no_sampling_imports_in_codebase(self): + """SDK must never import ratio or parent-based samplers.""" + import inspect + + from botanu.sdk import bootstrap + + source = inspect.getsource(bootstrap) + # These samplers would enable span dropping + assert "TraceIdRatio" not in source + assert "ParentBased" not in source + assert "ALWAYS_OFF" not in source + + def test_otel_traces_sampler_env_var_warning(self): + """Setting OTEL_TRACES_SAMPLER should log a warning, not enable sampling.""" + import inspect + + from botanu.sdk import bootstrap + + source = inspect.getsource(bootstrap.enable) + assert "OTEL_TRACES_SAMPLER" in source, "enable() must check for OTEL_TRACES_SAMPLER env var and warn" + + def test_conftest_uses_always_on(self): + """Test provider must also use ALWAYS_ON to match production behavior.""" + from opentelemetry.sdk.trace.sampling import ALWAYS_ON + + from tests.conftest import _get_or_create_provider + + provider, _ = _get_or_create_provider() + assert provider.sampler is ALWAYS_ON + + +# --------------------------------------------------------------------------- +# Bootstrap: provider reuse (no double-spanning) +# --------------------------------------------------------------------------- + + +class TestProviderReuse: + """Botanu must not create a second TracerProvider if one already exists.""" + + def test_reuse_existing_provider_code_path(self): + """Bootstrap source must check for existing TracerProvider.""" + import inspect + + from botanu.sdk import bootstrap + + source = inspect.getsource(bootstrap.enable) + assert "get_tracer_provider" in source, "enable() must check for existing TracerProvider" + assert "isinstance" in source, "enable() must use isinstance to check provider type" diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py new file mode 100644 index 0000000..88eb9cb --- /dev/null +++ b/tests/unit/test_config.py @@ -0,0 +1,360 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for BotanuConfig.""" + +from __future__ import annotations + +import os +from unittest import mock + +import pytest + +from botanu.sdk.config import BotanuConfig, _interpolate_env_vars + + +class TestInterpolateEnvVars: + """Tests for environment variable interpolation.""" + + def test_interpolates_env_vars(self): + with mock.patch.dict(os.environ, {"MY_VAR": "my_value"}): + result = _interpolate_env_vars("endpoint: ${MY_VAR}") + assert result == "endpoint: my_value" + + def test_preserves_unset_vars(self): + result = _interpolate_env_vars("endpoint: ${UNSET_VAR}") + assert result == "endpoint: ${UNSET_VAR}" + + def test_no_interpolation_needed(self): + result = _interpolate_env_vars("endpoint: http://localhost") + assert result == "endpoint: http://localhost" + + def test_default_value_when_unset(self): + result = _interpolate_env_vars("endpoint: ${UNSET_VAR:-default_value}") + assert result == "endpoint: default_value" + + def test_default_value_ignored_when_set(self): + with mock.patch.dict(os.environ, {"MY_VAR": "actual_value"}): + result = _interpolate_env_vars("endpoint: ${MY_VAR:-default_value}") + assert result == "endpoint: actual_value" + + +class TestBotanuConfigDefaults: + """Tests for BotanuConfig defaults.""" + + def test_default_values(self): + with mock.patch.dict(os.environ, {}, clear=True): + # Clear relevant env vars + for key in ["OTEL_SERVICE_NAME", "BOTANU_ENVIRONMENT", "OTEL_EXPORTER_OTLP_ENDPOINT"]: + os.environ.pop(key, None) + + config = BotanuConfig() + + assert config.service_name == "unknown_service" + assert config.deployment_environment == "production" + assert config.propagation_mode == "lean" + assert config.auto_detect_resources is True + + def test_env_var_service_name(self): + with mock.patch.dict(os.environ, {"OTEL_SERVICE_NAME": "my-service"}): + config = BotanuConfig() + assert config.service_name == "my-service" + + def test_env_var_environment(self): + with mock.patch.dict(os.environ, {"BOTANU_ENVIRONMENT": "staging"}): + config = BotanuConfig() + assert config.deployment_environment == "staging" + + def test_env_var_otlp_endpoint_base(self): + """OTEL_EXPORTER_OTLP_ENDPOINT is stored as base; bootstrap appends /v1/traces.""" + with mock.patch.dict(os.environ, {"OTEL_EXPORTER_OTLP_ENDPOINT": "http://collector:4318"}): + config = BotanuConfig() + # Base endpoint stored as-is; bootstrap.py appends /v1/traces + assert config.otlp_endpoint == "http://collector:4318" + + def test_env_var_otlp_traces_endpoint_direct(self): + """OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is used directly without appending.""" + with mock.patch.dict(os.environ, {"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": "http://collector:4318/v1/traces"}): + config = BotanuConfig() + # Direct traces endpoint is used as-is + assert config.otlp_endpoint == "http://collector:4318/v1/traces" + + def test_explicit_values_override_env(self): + with mock.patch.dict(os.environ, {"OTEL_SERVICE_NAME": "env-service"}): + config = BotanuConfig(service_name="explicit-service") + assert config.service_name == "explicit-service" + + def test_env_var_propagation_mode(self): + with mock.patch.dict(os.environ, {"BOTANU_PROPAGATION_MODE": "full"}): + config = BotanuConfig() + assert config.propagation_mode == "full" + + +class TestBotanuConfigFromYaml: + """Tests for loading config from YAML.""" + + def test_from_yaml_basic(self, tmp_path): + yaml_content = """ +service: + name: yaml-service + environment: production +""" + yaml_file = tmp_path / "config.yaml" + yaml_file.write_text(yaml_content) + + config = BotanuConfig.from_yaml(str(yaml_file)) + assert config.service_name == "yaml-service" + assert config.deployment_environment == "production" + + def test_from_yaml_with_otlp(self, tmp_path): + yaml_content = """ +service: + name: test-service +otlp: + endpoint: http://localhost:4318 + headers: + Authorization: Bearer token123 +""" + yaml_file = tmp_path / "config.yaml" + yaml_file.write_text(yaml_content) + + config = BotanuConfig.from_yaml(str(yaml_file)) + assert config.otlp_endpoint == "http://localhost:4318" + assert config.otlp_headers == {"Authorization": "Bearer token123"} + + def test_from_yaml_file_not_found(self): + with pytest.raises(FileNotFoundError): + BotanuConfig.from_yaml("/nonexistent/path/config.yaml") + + def test_from_yaml_empty_file(self, tmp_path): + yaml_file = tmp_path / "empty.yaml" + yaml_file.write_text("") + + config = BotanuConfig.from_yaml(str(yaml_file)) + # Should use defaults + assert config.service_name is not None + + def test_from_yaml_env_interpolation(self, tmp_path): + yaml_content = """ +service: + name: ${TEST_SERVICE_NAME} +""" + yaml_file = tmp_path / "config.yaml" + yaml_file.write_text(yaml_content) + + with mock.patch.dict(os.environ, {"TEST_SERVICE_NAME": "interpolated-service"}): + config = BotanuConfig.from_yaml(str(yaml_file)) + assert config.service_name == "interpolated-service" + + +class TestBotanuConfigFromFileOrEnv: + """Tests for from_file_or_env method.""" + + def test_uses_env_when_no_file(self): + with mock.patch.dict( + os.environ, + {"OTEL_SERVICE_NAME": "env-only-service"}, + clear=False, + ): + # Ensure no config files exist in current directory + config = BotanuConfig.from_file_or_env() + # Should use env vars + assert config.service_name == "env-only-service" + + def test_uses_specified_path(self, tmp_path): + yaml_content = """ +service: + name: file-service +""" + yaml_file = tmp_path / "config.yaml" + yaml_file.write_text(yaml_content) + + config = BotanuConfig.from_file_or_env(path=str(yaml_file)) + assert config.service_name == "file-service" + + +class TestBotanuConfigToDict: + """Tests for config serialization.""" + + def test_to_dict(self): + config = BotanuConfig( + service_name="test-service", + deployment_environment="staging", + otlp_endpoint="http://localhost:4318", + ) + d = config.to_dict() + + assert d["service"]["name"] == "test-service" + assert d["service"]["environment"] == "staging" + assert d["otlp"]["endpoint"] == "http://localhost:4318" + + +class TestBotanuConfigExportTuning: + """Tests for export tuning env vars (queue, batch, timeout).""" + + def test_default_export_values(self): + with mock.patch.dict(os.environ, {}, clear=True): + for key in ["BOTANU_MAX_QUEUE_SIZE", "BOTANU_MAX_EXPORT_BATCH_SIZE", "BOTANU_EXPORT_TIMEOUT_MILLIS"]: + os.environ.pop(key, None) + config = BotanuConfig() + assert config.max_queue_size == 65536 + assert config.max_export_batch_size == 512 + assert config.export_timeout_millis == 30000 + + def test_env_var_max_queue_size(self): + with mock.patch.dict(os.environ, {"BOTANU_MAX_QUEUE_SIZE": "131072"}): + config = BotanuConfig() + assert config.max_queue_size == 131072 + + def test_env_var_max_export_batch_size(self): + with mock.patch.dict(os.environ, {"BOTANU_MAX_EXPORT_BATCH_SIZE": "1024"}): + config = BotanuConfig() + assert config.max_export_batch_size == 1024 + + def test_env_var_export_timeout_millis(self): + with mock.patch.dict(os.environ, {"BOTANU_EXPORT_TIMEOUT_MILLIS": "60000"}): + config = BotanuConfig() + assert config.export_timeout_millis == 60000 + + def test_invalid_queue_size_ignored(self): + with mock.patch.dict(os.environ, {"BOTANU_MAX_QUEUE_SIZE": "not_a_number"}): + config = BotanuConfig() + assert config.max_queue_size == 65536 + + def test_invalid_batch_size_ignored(self): + with mock.patch.dict(os.environ, {"BOTANU_MAX_EXPORT_BATCH_SIZE": "bad"}): + config = BotanuConfig() + assert config.max_export_batch_size == 512 + + def test_invalid_timeout_ignored(self): + with mock.patch.dict(os.environ, {"BOTANU_EXPORT_TIMEOUT_MILLIS": "abc"}): + config = BotanuConfig() + assert config.export_timeout_millis == 30000 + + +class TestBotanuConfigFromYamlExport: + """Tests for YAML export configuration parsing.""" + + def test_from_yaml_with_export_config(self, tmp_path): + yaml_content = """ +service: + name: yaml-export-test +export: + batch_size: 256 + queue_size: 32768 + delay_ms: 2000 + export_timeout_ms: 15000 +""" + yaml_file = tmp_path / "config.yaml" + yaml_file.write_text(yaml_content) + + config = BotanuConfig.from_yaml(str(yaml_file)) + assert config.max_export_batch_size == 256 + assert config.max_queue_size == 32768 + assert config.schedule_delay_millis == 2000 + assert config.export_timeout_millis == 15000 + + def test_from_yaml_export_defaults(self, tmp_path): + """YAML without export section uses defaults.""" + yaml_content = """ +service: + name: minimal +""" + yaml_file = tmp_path / "config.yaml" + yaml_file.write_text(yaml_content) + + config = BotanuConfig.from_yaml(str(yaml_file)) + assert config.max_export_batch_size == 512 + assert config.max_queue_size == 65536 + assert config.export_timeout_millis == 30000 + + +class TestBotanuConfigToDictExport: + """Tests for to_dict roundtrip with export params.""" + + def test_to_dict_includes_export_timeout(self): + config = BotanuConfig(export_timeout_millis=45000) + d = config.to_dict() + assert d["export"]["export_timeout_ms"] == 45000 + + def test_to_dict_roundtrip(self, tmp_path): + """to_dict output should be loadable by _from_dict.""" + original = BotanuConfig( + service_name="roundtrip", + max_queue_size=4096, + max_export_batch_size=128, + export_timeout_millis=10000, + ) + d = original.to_dict() + d["auto_instrument_packages"] = original.auto_instrument_packages + restored = BotanuConfig._from_dict(d) + assert restored.max_queue_size == 4096 + assert restored.max_export_batch_size == 128 + assert restored.export_timeout_millis == 10000 + + +class TestBotanuConfigPrecedence: + """Tests for BOTANU_* > OTEL_* > default precedence.""" + + def test_botanu_service_name_over_otel(self): + with mock.patch.dict( + os.environ, + { + "BOTANU_SERVICE_NAME": "botanu-svc", + "OTEL_SERVICE_NAME": "otel-svc", + }, + ): + config = BotanuConfig() + assert config.service_name == "botanu-svc" + + def test_botanu_endpoint_over_otel(self): + with mock.patch.dict( + os.environ, + { + "BOTANU_COLLECTOR_ENDPOINT": "http://botanu:4318", + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://otel:4318", + }, + ): + config = BotanuConfig() + assert config.otlp_endpoint == "http://botanu:4318" + + def test_botanu_environment_over_otel(self): + with mock.patch.dict( + os.environ, + { + "BOTANU_ENVIRONMENT": "staging", + "OTEL_DEPLOYMENT_ENVIRONMENT": "production", + }, + ): + config = BotanuConfig() + assert config.deployment_environment == "staging" + + def test_propagation_mode_rejects_invalid(self): + with mock.patch.dict(os.environ, {"BOTANU_PROPAGATION_MODE": "invalid"}): + config = BotanuConfig() + assert config.propagation_mode == "lean" + + def test_auto_detect_resources_env_false(self): + with mock.patch.dict(os.environ, {"BOTANU_AUTO_DETECT_RESOURCES": "false"}): + config = BotanuConfig() + assert config.auto_detect_resources is False + + def test_auto_detect_resources_truthy_values(self): + for truthy in ("true", "1", "yes"): + with mock.patch.dict(os.environ, {"BOTANU_AUTO_DETECT_RESOURCES": truthy}): + config = BotanuConfig() + assert config.auto_detect_resources is True + + +class TestBotanuConfigAutoInstrument: + """Tests for auto-instrumentation configuration.""" + + def test_default_packages(self): + config = BotanuConfig() + packages = config.auto_instrument_packages + + assert "requests" in packages + assert "httpx" in packages + assert "fastapi" in packages + assert "openai_v2" in packages + assert "anthropic" in packages diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py new file mode 100644 index 0000000..91feca4 --- /dev/null +++ b/tests/unit/test_context.py @@ -0,0 +1,107 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for context and baggage helpers.""" + +from __future__ import annotations + +from opentelemetry import trace + +from botanu.sdk.context import ( + get_baggage, + get_current_span, + get_run_id, + get_use_case, + get_workflow, + set_baggage, +) + + +class TestBaggageHelpers: + """Tests for baggage helper functions.""" + + def test_set_and_get_baggage(self): + token = set_baggage("test.key", "test-value") + assert token is not None + + value = get_baggage("test.key") + assert value == "test-value" + + def test_get_baggage_missing_key(self): + value = get_baggage("nonexistent.key") + assert value is None + + def test_get_run_id(self): + set_baggage("botanu.run_id", "run-12345") + assert get_run_id() == "run-12345" + + def test_get_run_id_not_set(self): + # In a fresh context, run_id might not be set + # This tests the function doesn't crash + result = get_run_id() + # Result could be None or a previously set value + assert result is None or isinstance(result, str) + + def test_get_use_case(self): + set_baggage("botanu.use_case", "Customer Support") + assert get_use_case() == "Customer Support" + + def test_get_workflow(self): + set_baggage("botanu.workflow", "ticket_handler") + assert get_workflow() == "ticket_handler" + + def test_get_workflow_not_set(self): + result = get_workflow() + assert result is None or isinstance(result, str) + + +class TestSpanHelpers: + """Tests for span helper functions.""" + + def test_get_current_span_with_active_span(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span") as expected_span: + current = get_current_span() + assert current == expected_span + + def test_get_current_span_no_active_span(self): + # When no span is active, should return a non-recording span + span = get_current_span() + assert span is not None + # Non-recording spans have is_recording() == False + assert not span.is_recording() + + +class TestSetBaggageTokenManagement: + """Tests for set_baggage token lifecycle and context management.""" + + def test_set_baggage_returns_detachable_token(self): + from opentelemetry.context import detach + + token = set_baggage("botanu.token_test", "val1") + assert token is not None + assert get_baggage("botanu.token_test") == "val1" + detach(token) + + def test_multiple_set_baggage_stacks_values(self): + token1 = set_baggage("botanu.stack_a", "a") + token2 = set_baggage("botanu.stack_b", "b") + + assert get_baggage("botanu.stack_a") == "a" + assert get_baggage("botanu.stack_b") == "b" + assert token1 is not None + assert token2 is not None + + def test_overwrite_same_key(self): + set_baggage("botanu.overwrite", "first") + set_baggage("botanu.overwrite", "second") + assert get_baggage("botanu.overwrite") == "second" + + def test_get_baggage_returns_none_in_clean_context(self): + from opentelemetry import context as otel_context + + token = otel_context.attach(otel_context.Context()) + try: + assert get_baggage("botanu.surely_missing") is None + finally: + otel_context.detach(token) diff --git a/tests/unit/test_data_tracking.py b/tests/unit/test_data_tracking.py new file mode 100644 index 0000000..6d0f003 --- /dev/null +++ b/tests/unit/test_data_tracking.py @@ -0,0 +1,473 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for data tracking (DB, storage, messaging).""" + +from __future__ import annotations + +import pytest + +from botanu.tracking.data import ( + DBOperation, + MessagingOperation, + StorageOperation, + track_db_operation, + track_messaging_operation, + track_storage_operation, +) + + +class TestTrackDBOperation: + """Tests for track_db_operation context manager.""" + + def test_creates_span_with_operation(self, memory_exporter): + with track_db_operation( + system="postgresql", + operation=DBOperation.SELECT, + database="mydb", + ) as tracker: + tracker.set_result(rows_returned=10) + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + assert "db" in spans[0].name.lower() or "select" in spans[0].name.lower() + + def test_records_db_attributes(self, memory_exporter): + with track_db_operation( + system="postgresql", + operation=DBOperation.INSERT, + database="users_db", + ) as tracker: + tracker.set_result(rows_affected=1) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("db.system") == "postgresql" + assert attrs.get("db.name") == "users_db" + + def test_records_error_on_exception(self, memory_exporter): + with pytest.raises(ValueError): + with track_db_operation( + system="mysql", + operation=DBOperation.SELECT, + ): + raise ValueError("Connection failed") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.data.error") == "ValueError" + + def test_set_table(self, memory_exporter): + with track_db_operation( + system="postgresql", + operation=DBOperation.SELECT, + ) as tracker: + tracker.set_table("users", schema="public") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("db.collection.name") == "users" + assert attrs.get("db.schema") == "public" + + def test_set_query_id(self, memory_exporter): + with track_db_operation( + system="snowflake", + operation=DBOperation.SELECT, + ) as tracker: + tracker.set_query_id("01abc123-def4-5678") + tracker.set_bytes_scanned(1024000) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.warehouse.query_id") == "01abc123-def4-5678" + assert attrs.get("botanu.warehouse.bytes_scanned") == 1024000 + + +class TestTrackStorageOperation: + """Tests for track_storage_operation context manager.""" + + def test_creates_span_for_read(self, memory_exporter): + with track_storage_operation( + system="s3", + operation=StorageOperation.GET, + ) as tracker: + tracker.set_result(bytes_read=1024) + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + + def test_records_storage_attributes(self, memory_exporter): + with track_storage_operation( + system="gcs", + operation=StorageOperation.PUT, + ) as tracker: + tracker.set_bucket("data-bucket") + tracker.set_result(bytes_written=2048) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.storage.system") == "gcs" + assert attrs.get("botanu.storage.bucket") == "data-bucket" + + def test_records_error(self, memory_exporter): + with pytest.raises(IOError): + with track_storage_operation( + system="s3", + operation=StorageOperation.GET, + ): + raise OSError("Access denied") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.storage.error") == "OSError" # IOError is alias for OSError + + def test_objects_count(self, memory_exporter): + with track_storage_operation( + system="s3", + operation=StorageOperation.LIST, + ) as tracker: + tracker.set_result(objects_count=50) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.data.objects_count") == 50 + + +class TestTrackMessagingOperation: + """Tests for track_messaging_operation context manager.""" + + def test_creates_span_for_publish(self, memory_exporter): + with track_messaging_operation( + system="kafka", + operation=MessagingOperation.PUBLISH, + destination="orders-topic", + ) as tracker: + tracker.set_result(message_count=1) + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + + def test_records_messaging_attributes(self, memory_exporter): + with track_messaging_operation( + system="sqs", + operation=MessagingOperation.RECEIVE, + destination="my-queue", + ) as tracker: + tracker.set_result(message_count=5) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("messaging.system") == "sqs" + assert attrs.get("messaging.destination.name") == "my-queue" + + def test_records_error(self, memory_exporter): + with pytest.raises(TimeoutError): + with track_messaging_operation( + system="rabbitmq", + operation=MessagingOperation.PUBLISH, + destination="events", + ): + raise TimeoutError("Queue full") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.messaging.error") == "TimeoutError" + + def test_consume_operation(self, memory_exporter): + with track_messaging_operation( + system="kafka", + operation=MessagingOperation.CONSUME, + destination="events-topic", + ) as tracker: + tracker.set_result(message_count=10, bytes_transferred=4096) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("messaging.operation") == "consume" + assert attrs.get("botanu.messaging.message_count") == 10 + assert attrs.get("botanu.messaging.bytes_transferred") == 4096 + + +class TestOperationEnums: + """Tests for operation type enums.""" + + def test_db_operations(self): + assert DBOperation.SELECT == "SELECT" + assert DBOperation.INSERT == "INSERT" + assert DBOperation.UPDATE == "UPDATE" + assert DBOperation.DELETE == "DELETE" + + def test_storage_operations(self): + assert StorageOperation.GET == "GET" + assert StorageOperation.PUT == "PUT" + assert StorageOperation.DELETE == "DELETE" + assert StorageOperation.LIST == "LIST" + + def test_messaging_operations(self): + assert MessagingOperation.PUBLISH == "publish" + assert MessagingOperation.RECEIVE == "receive" + assert MessagingOperation.CONSUME == "consume" + + +class TestSystemNormalization: + """Tests for system name normalization maps.""" + + def test_db_system_aliases(self, memory_exporter): + from botanu.tracking.data import DB_SYSTEMS + + assert DB_SYSTEMS["postgres"] == "postgresql" + assert DB_SYSTEMS["pg"] == "postgresql" + assert DB_SYSTEMS["mongo"] == "mongodb" + assert DB_SYSTEMS["sqlserver"] == "mssql" + + def test_storage_system_aliases(self): + from botanu.tracking.data import STORAGE_SYSTEMS + + assert STORAGE_SYSTEMS["aws_s3"] == "s3" + assert STORAGE_SYSTEMS["google_cloud_storage"] == "gcs" + assert STORAGE_SYSTEMS["blob"] == "azure_blob" + + def test_messaging_system_aliases(self): + from botanu.tracking.data import MESSAGING_SYSTEMS + + assert MESSAGING_SYSTEMS["aws_sqs"] == "sqs" + assert MESSAGING_SYSTEMS["google_pubsub"] == "pubsub" + assert MESSAGING_SYSTEMS["azure_servicebus"] == "servicebus" + + def test_db_alias_used_in_span(self, memory_exporter): + """Alias 'pg' should normalize to 'postgresql' in the span.""" + with track_db_operation(system="pg", operation="SELECT") as tracker: + tracker.set_result(rows_returned=1) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["db.system"] == "postgresql" + + def test_unknown_system_passthrough(self, memory_exporter): + """Unknown systems should pass through as lowercase.""" + with track_db_operation(system="CockroachDB", operation="SELECT"): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["db.system"] == "cockroachdb" + + +class TestDBTrackerMetadata: + """Tests for DBTracker.add_metadata and set_bytes_scanned.""" + + def test_add_metadata(self, memory_exporter): + with track_db_operation(system="postgresql", operation="SELECT") as tracker: + tracker.add_metadata(query_plan="seq_scan", cost_estimate=42.5) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.data.query_plan"] == "seq_scan" + assert attrs["botanu.data.cost_estimate"] == 42.5 + + def test_add_metadata_preserves_botanu_prefix(self, memory_exporter): + with track_db_operation(system="postgresql", operation="SELECT") as tracker: + tracker.add_metadata(**{"botanu.custom_key": "custom_val"}) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.custom_key"] == "custom_val" + + def test_set_bytes_scanned(self, memory_exporter): + with track_db_operation(system="bigquery", operation="SELECT") as tracker: + tracker.set_bytes_scanned(5_000_000) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.warehouse.bytes_scanned"] == 5_000_000 + assert tracker.bytes_read == 5_000_000 + + def test_duration_finalized(self, memory_exporter): + with track_db_operation(system="postgresql", operation="INSERT"): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert "botanu.data.duration_ms" in attrs + assert attrs["botanu.data.duration_ms"] >= 0 + + +class TestStorageTrackerMetadata: + """Tests for StorageTracker.add_metadata.""" + + def test_add_metadata(self, memory_exporter): + with track_storage_operation(system="s3", operation="PUT") as tracker: + tracker.add_metadata(content_type="application/json", region="us-east-1") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.storage.content_type"] == "application/json" + assert attrs["botanu.storage.region"] == "us-east-1" + + def test_duration_finalized(self, memory_exporter): + with track_storage_operation(system="gcs", operation="GET"): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert "botanu.storage.duration_ms" in attrs + + +class TestMessagingTrackerMetadata: + """Tests for MessagingTracker.add_metadata and span kind.""" + + def test_add_metadata(self, memory_exporter): + with track_messaging_operation( + system="kafka", + operation="publish", + destination="events", + ) as tracker: + tracker.add_metadata(partition=3, key="order-123") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.messaging.partition"] == 3 + assert attrs["botanu.messaging.key"] == "order-123" + + def test_publish_uses_producer_span_kind(self, memory_exporter): + from opentelemetry.trace import SpanKind + + with track_messaging_operation( + system="kafka", + operation="publish", + destination="topic", + ): + pass + + spans = memory_exporter.get_finished_spans() + assert spans[0].kind == SpanKind.PRODUCER + + def test_consume_uses_consumer_span_kind(self, memory_exporter): + from opentelemetry.trace import SpanKind + + with track_messaging_operation( + system="kafka", + operation="consume", + destination="topic", + ): + pass + + spans = memory_exporter.get_finished_spans() + assert spans[0].kind == SpanKind.CONSUMER + + def test_send_uses_producer_span_kind(self, memory_exporter): + from opentelemetry.trace import SpanKind + + with track_messaging_operation( + system="sqs", + operation="send", + destination="queue", + ): + pass + + spans = memory_exporter.get_finished_spans() + assert spans[0].kind == SpanKind.PRODUCER + + def test_duration_finalized(self, memory_exporter): + with track_messaging_operation( + system="sqs", + operation="receive", + destination="q", + ): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert "botanu.messaging.duration_ms" in attrs + + +class TestStandaloneHelpers: + """Tests for set_data_metrics and set_warehouse_metrics.""" + + def test_set_data_metrics(self, memory_exporter): + from opentelemetry import trace as otl_trace + + from botanu.tracking.data import set_data_metrics + + tracer = otl_trace.get_tracer("test") + with tracer.start_as_current_span("test-data-metrics"): + set_data_metrics(rows_returned=100, bytes_read=8192) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.data.rows_returned"] == 100 + assert attrs["botanu.data.bytes_read"] == 8192 + + def test_set_data_metrics_no_active_span(self): + from botanu.tracking.data import set_data_metrics + + # Should not raise when no recording span + set_data_metrics(rows_returned=10) + + def test_set_warehouse_metrics(self, memory_exporter): + from opentelemetry import trace as otl_trace + + from botanu.tracking.data import set_warehouse_metrics + + tracer = otl_trace.get_tracer("test") + with tracer.start_as_current_span("test-warehouse"): + set_warehouse_metrics( + query_id="q-001", + bytes_scanned=10_000_000, + rows_returned=500, + partitions_scanned=12, + ) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.warehouse.query_id"] == "q-001" + assert attrs["botanu.warehouse.bytes_scanned"] == 10_000_000 + assert attrs["botanu.data.rows_returned"] == 500 + assert attrs["botanu.warehouse.partitions_scanned"] == 12 + + def test_set_warehouse_metrics_no_active_span(self): + from botanu.tracking.data import set_warehouse_metrics + + # Should not raise when no recording span + set_warehouse_metrics(query_id="q-002", bytes_scanned=1000) + + +class TestKwargsPassthrough: + """Tests for additional kwargs passed to context managers.""" + + def test_db_operation_kwargs(self, memory_exporter): + with track_db_operation( + system="postgresql", + operation="SELECT", + statement="SELECT 1", + ): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.data.statement"] == "SELECT 1" + + def test_storage_operation_kwargs(self, memory_exporter): + with track_storage_operation( + system="s3", + operation="GET", + bucket="my-bucket", + ): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.storage.bucket"] == "my-bucket" + + def test_messaging_operation_kwargs(self, memory_exporter): + with track_messaging_operation( + system="kafka", + operation="publish", + destination="topic", + partition_key="order-1", + ): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.messaging.partition_key"] == "order-1" diff --git a/tests/unit/test_decorators.py b/tests/unit/test_decorators.py new file mode 100644 index 0000000..b63f906 --- /dev/null +++ b/tests/unit/test_decorators.py @@ -0,0 +1,335 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for SDK decorators.""" + +from __future__ import annotations + +import pytest +from opentelemetry import baggage, trace +from opentelemetry import context as otel_context +from opentelemetry.context import get_current + +from botanu.sdk.decorators import botanu_outcome, botanu_use_case + + +@pytest.fixture(autouse=True) +def _clean_otel_context(): + """Reset OTel context before each test to avoid baggage leaking between tests.""" + token = otel_context.attach(otel_context.Context()) + yield + otel_context.detach(token) + + +class TestBotanuUseCaseDecorator: + """Tests for @botanu_use_case decorator.""" + + def test_sync_function_creates_span(self, memory_exporter): + @botanu_use_case("Test Use Case") + def my_function(): + return "result" + + result = my_function() + + assert result == "result" + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "botanu.run/Test Use Case" + + def test_span_has_run_attributes(self, memory_exporter): + @botanu_use_case("Customer Support", workflow="handle_ticket") + def my_function(): + return "done" + + my_function() + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + + assert "botanu.run_id" in attrs + assert attrs["botanu.use_case"] == "Customer Support" + assert attrs["botanu.workflow"] == "handle_ticket" + + def test_emits_started_event(self, memory_exporter): + @botanu_use_case("Test") + def my_function(): + pass + + my_function() + + spans = memory_exporter.get_finished_spans() + events = spans[0].events + + started_events = [e for e in events if e.name == "botanu.run.started"] + assert len(started_events) == 1 + + def test_emits_completed_event(self, memory_exporter): + @botanu_use_case("Test") + def my_function(): + return "done" + + my_function() + + spans = memory_exporter.get_finished_spans() + events = spans[0].events + + completed_events = [e for e in events if e.name == "botanu.run.completed"] + assert len(completed_events) == 1 + assert completed_events[0].attributes["status"] == "success" + + def test_records_exception_on_failure(self, memory_exporter): + @botanu_use_case("Test") + def failing_function(): + raise ValueError("test error") + + with pytest.raises(ValueError): + failing_function() + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + + events = spans[0].events + completed_events = [e for e in events if e.name == "botanu.run.completed"] + assert len(completed_events) == 1 + assert completed_events[0].attributes["status"] == "failure" + assert completed_events[0].attributes["error_class"] == "ValueError" + + @pytest.mark.asyncio + async def test_async_function_creates_span(self, memory_exporter): + @botanu_use_case("Async Test") + async def async_function(): + return "async result" + + result = await async_function() + + assert result == "async result" + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "botanu.run/Async Test" + + @pytest.mark.asyncio + async def test_async_exception_handling(self, memory_exporter): + @botanu_use_case("Async Test") + async def failing_async(): + raise RuntimeError("async error") + + with pytest.raises(RuntimeError): + await failing_async() + + spans = memory_exporter.get_finished_spans() + events = spans[0].events + completed_events = [e for e in events if e.name == "botanu.run.completed"] + assert completed_events[0].attributes["status"] == "failure" + + def test_workflow_version_computed(self, memory_exporter): + @botanu_use_case("Test") + def versioned_function(): + return "versioned" + + versioned_function() + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + + assert "botanu.workflow.version" in attrs + assert attrs["botanu.workflow.version"].startswith("v:") + + def test_return_value_preserved(self, memory_exporter): + @botanu_use_case("Test") + def returns_dict(): + return {"key": "value", "count": 42} + + result = returns_dict() + assert result == {"key": "value", "count": 42} + + @pytest.mark.asyncio + async def test_async_return_value_preserved(self, memory_exporter): + @botanu_use_case("Test") + async def returns_data(): + return [1, 2, 3] + + result = await returns_data() + assert result == [1, 2, 3] + + def test_exception_re_raised(self, memory_exporter): + @botanu_use_case("Test") + def raises(): + raise TypeError("bad type") + + with pytest.raises(TypeError, match="bad type"): + raises() + + def test_outcome_status_set_on_success(self, memory_exporter): + @botanu_use_case("Test") + def my_fn(): + return "ok" + + my_fn() + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.outcome.status"] == "success" + + def test_outcome_status_set_on_failure(self, memory_exporter): + @botanu_use_case("Test") + def failing(): + raise RuntimeError("boom") + + with pytest.raises(RuntimeError): + failing() + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.outcome.status"] == "failure" + + def test_duration_ms_recorded(self, memory_exporter): + @botanu_use_case("Test") + def quick_fn(): + return "done" + + quick_fn() + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert "botanu.run.duration_ms" in attrs + assert attrs["botanu.run.duration_ms"] >= 0 + + def test_custom_span_kind(self, memory_exporter): + from opentelemetry.trace import SpanKind + + @botanu_use_case("Test", span_kind=SpanKind.CLIENT) + def client_fn(): + return "ok" + + client_fn() + spans = memory_exporter.get_finished_spans() + assert spans[0].kind == SpanKind.CLIENT + + def test_root_run_id_equals_run_id_for_root(self, memory_exporter): + @botanu_use_case("Test") + def root_fn(): + return "root" + + root_fn() + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + # For a root run, root_run_id should equal run_id + assert attrs["botanu.root_run_id"] == attrs["botanu.run_id"] + + def test_tenant_id_propagated(self, memory_exporter): + @botanu_use_case("Test", tenant_id="tenant-abc") + def tenant_fn(): + return "ok" + + tenant_fn() + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.tenant_id"] == "tenant-abc" + + def test_baggage_cleaned_up_after_sync(self, memory_exporter): + """Verify baggage does NOT leak after the decorated function completes.""" + + @botanu_use_case("Leak Test") + def my_fn(): + # Inside the function, baggage should be set + assert baggage.get_baggage("botanu.run_id", get_current()) is not None + return "ok" + + # Before: no baggage + assert baggage.get_baggage("botanu.run_id", get_current()) is None + + my_fn() + + # After: baggage must be cleaned up (detached) + assert baggage.get_baggage("botanu.run_id", get_current()) is None + + @pytest.mark.asyncio + async def test_baggage_cleaned_up_after_async(self, memory_exporter): + """Verify baggage does NOT leak after an async decorated function.""" + + @botanu_use_case("Async Leak Test") + async def my_fn(): + assert baggage.get_baggage("botanu.run_id", get_current()) is not None + return "ok" + + assert baggage.get_baggage("botanu.run_id", get_current()) is None + + await my_fn() + + assert baggage.get_baggage("botanu.run_id", get_current()) is None + + def test_baggage_cleaned_up_after_exception(self, memory_exporter): + """Verify baggage is cleaned up even when the function raises.""" + + @botanu_use_case("Exception Leak Test") + def failing_fn(): + raise RuntimeError("boom") + + assert baggage.get_baggage("botanu.run_id", get_current()) is None + + with pytest.raises(RuntimeError): + failing_fn() + + # Must be cleaned up despite the exception + assert baggage.get_baggage("botanu.run_id", get_current()) is None + + +class TestBotanuOutcomeDecorator: + """Tests for @botanu_outcome decorator.""" + + def test_sync_success_emits_outcome(self, memory_exporter): + tracer_instance = trace.get_tracer("test") + + @botanu_outcome() + def my_fn(): + return "ok" + + with tracer_instance.start_as_current_span("parent"): + result = my_fn() + + assert result == "ok" + + def test_sync_failure_emits_failed(self, memory_exporter): + tracer_instance = trace.get_tracer("test") + + @botanu_outcome() + def failing_fn(): + raise ValueError("broken") + + with tracer_instance.start_as_current_span("parent"): + with pytest.raises(ValueError, match="broken"): + failing_fn() + + @pytest.mark.asyncio + async def test_async_success_emits_outcome(self, memory_exporter): + tracer_instance = trace.get_tracer("test") + + @botanu_outcome() + async def async_fn(): + return "async ok" + + with tracer_instance.start_as_current_span("parent"): + result = await async_fn() + + assert result == "async ok" + + @pytest.mark.asyncio + async def test_async_failure_emits_failed(self, memory_exporter): + tracer_instance = trace.get_tracer("test") + + @botanu_outcome() + async def async_fail(): + raise RuntimeError("async boom") + + with tracer_instance.start_as_current_span("parent"): + with pytest.raises(RuntimeError, match="async boom"): + await async_fail() + + def test_exception_re_raised(self, memory_exporter): + tracer_instance = trace.get_tracer("test") + + @botanu_outcome() + def raises(): + raise TypeError("type err") + + with tracer_instance.start_as_current_span("parent"): + with pytest.raises(TypeError, match="type err"): + raises() diff --git a/tests/unit/test_enricher.py b/tests/unit/test_enricher.py new file mode 100644 index 0000000..a08cfbb --- /dev/null +++ b/tests/unit/test_enricher.py @@ -0,0 +1,160 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for RunContextEnricher processor.""" + +from __future__ import annotations + +from unittest import mock + +from opentelemetry import baggage, context, trace +from opentelemetry.sdk.trace import ReadableSpan + +from botanu.processors.enricher import RunContextEnricher + + +class TestRunContextEnricher: + """Tests for RunContextEnricher processor.""" + + def test_init_lean_mode_default(self): + """Default should be lean mode.""" + enricher = RunContextEnricher() + assert enricher._lean_mode is True + assert enricher._baggage_keys == RunContextEnricher.BAGGAGE_KEYS_LEAN + + def test_init_lean_mode_false(self): + """Can enable full mode.""" + enricher = RunContextEnricher(lean_mode=False) + assert enricher._lean_mode is False + assert enricher._baggage_keys == RunContextEnricher.BAGGAGE_KEYS_FULL + + def test_on_start_reads_baggage(self, memory_exporter): + """on_start should read baggage and set span attributes.""" + enricher = RunContextEnricher(lean_mode=True) + + # Set up baggage context - start from a clean context + ctx = context.Context() + ctx = baggage.set_baggage("botanu.run_id", "test-run-123", context=ctx) + ctx = baggage.set_baggage("botanu.use_case", "Test Case", context=ctx) + + # Create a span with the baggage context + tracer = trace.get_tracer("test") + token = context.attach(ctx) + try: + with tracer.start_as_current_span("test-span") as span: + # Manually call on_start to simulate processor behavior + enricher.on_start(span, ctx) + finally: + context.detach(token) + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.run_id") == "test-run-123" + assert attrs.get("botanu.use_case") == "Test Case" + + def test_on_start_full_mode(self, memory_exporter): + """Full mode should read all baggage keys.""" + enricher = RunContextEnricher(lean_mode=False) + + # Set up baggage context with all keys - start from a clean context + ctx = context.Context() + ctx = baggage.set_baggage("botanu.run_id", "run-456", context=ctx) + ctx = baggage.set_baggage("botanu.use_case", "Full Test", context=ctx) + ctx = baggage.set_baggage("botanu.workflow", "my_workflow", context=ctx) + ctx = baggage.set_baggage("botanu.environment", "staging", context=ctx) + ctx = baggage.set_baggage("botanu.tenant_id", "tenant-789", context=ctx) + + tracer = trace.get_tracer("test") + token = context.attach(ctx) + try: + with tracer.start_as_current_span("test-span") as span: + enricher.on_start(span, ctx) + finally: + context.detach(token) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.run_id") == "run-456" + assert attrs.get("botanu.use_case") == "Full Test" + assert attrs.get("botanu.workflow") == "my_workflow" + assert attrs.get("botanu.environment") == "staging" + assert attrs.get("botanu.tenant_id") == "tenant-789" + + def test_on_start_missing_baggage(self, memory_exporter): + """Should handle missing baggage gracefully.""" + enricher = RunContextEnricher() + + # Create a clean context with no baggage + clean_ctx = context.Context() + + tracer = trace.get_tracer("test") + token = context.attach(clean_ctx) + try: + with tracer.start_as_current_span("test-span") as span: + # Pass the clean context with no baggage + enricher.on_start(span, clean_ctx) + finally: + context.detach(token) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + # No botanu attributes should be set + assert "botanu.run_id" not in attrs + + def test_on_start_does_not_override_existing(self, memory_exporter): + """Should not override existing span attributes.""" + enricher = RunContextEnricher() + + # Set up baggage context + ctx = context.Context() + ctx = baggage.set_baggage("botanu.run_id", "baggage-id", context=ctx) + ctx = baggage.set_baggage("botanu.use_case", "Baggage Case", context=ctx) + + tracer = trace.get_tracer("test") + token = context.attach(ctx) + try: + with tracer.start_as_current_span("test-span") as span: + # Set attribute before enricher runs + span.set_attribute("botanu.run_id", "existing-id") + # Now run enricher - should not override + enricher.on_start(span, ctx) + finally: + context.detach(token) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + # Should keep existing value + assert attrs.get("botanu.run_id") == "existing-id" + # But should set use_case since it wasn't set before + assert attrs.get("botanu.use_case") == "Baggage Case" + + def test_on_end_noop(self): + """on_end should be a no-op.""" + enricher = RunContextEnricher() + mock_span = mock.MagicMock(spec=ReadableSpan) + # Should not raise + enricher.on_end(mock_span) + + def test_shutdown_noop(self): + """shutdown should be a no-op.""" + enricher = RunContextEnricher() + # Should not raise + enricher.shutdown() + + def test_force_flush_returns_true(self): + """force_flush should return True.""" + enricher = RunContextEnricher() + assert enricher.force_flush() is True + assert enricher.force_flush(timeout_millis=1000) is True + + def test_baggage_keys_constants(self): + """Verify baggage key constants.""" + assert "botanu.run_id" in RunContextEnricher.BAGGAGE_KEYS_LEAN + assert "botanu.use_case" in RunContextEnricher.BAGGAGE_KEYS_LEAN + assert len(RunContextEnricher.BAGGAGE_KEYS_LEAN) == 2 + + assert "botanu.run_id" in RunContextEnricher.BAGGAGE_KEYS_FULL + assert "botanu.workflow" in RunContextEnricher.BAGGAGE_KEYS_FULL + assert "botanu.environment" in RunContextEnricher.BAGGAGE_KEYS_FULL + assert len(RunContextEnricher.BAGGAGE_KEYS_FULL) == 6 diff --git a/tests/unit/test_ledger.py b/tests/unit/test_ledger.py new file mode 100644 index 0000000..9c492b2 --- /dev/null +++ b/tests/unit/test_ledger.py @@ -0,0 +1,495 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for Attempt Ledger.""" + +from __future__ import annotations + +import os +from unittest import mock + +from opentelemetry import trace + +from botanu.tracking.ledger import ( + AttemptLedger, + AttemptStatus, + LedgerEventType, + get_ledger, + record_attempt_ended, + record_attempt_started, + record_llm_attempted, + record_tool_attempted, + set_ledger, +) + + +class TestLedgerEventType: + """Tests for LedgerEventType enum.""" + + def test_event_types_are_strings(self): + assert LedgerEventType.ATTEMPT_STARTED == "attempt.started" + assert LedgerEventType.ATTEMPT_ENDED == "attempt.ended" + assert LedgerEventType.LLM_ATTEMPTED == "llm.attempted" + assert LedgerEventType.TOOL_ATTEMPTED == "tool.attempted" + assert LedgerEventType.CANCEL_REQUESTED == "cancellation.requested" + assert LedgerEventType.CANCEL_ACKNOWLEDGED == "cancellation.acknowledged" + assert LedgerEventType.ZOMBIE_DETECTED == "zombie.detected" + assert LedgerEventType.REDELIVERY_DETECTED == "redelivery.detected" + + +class TestAttemptStatus: + """Tests for AttemptStatus enum.""" + + def test_status_values(self): + assert AttemptStatus.SUCCESS == "success" + assert AttemptStatus.ERROR == "error" + assert AttemptStatus.TIMEOUT == "timeout" + assert AttemptStatus.CANCELLED == "cancelled" + assert AttemptStatus.RATE_LIMITED == "rate_limited" + + +class TestAttemptLedger: + """Tests for AttemptLedger class.""" + + def test_default_service_name(self): + """Should use environment variable for default service name.""" + with mock.patch.dict(os.environ, {"OTEL_SERVICE_NAME": "test-service"}): + ledger = AttemptLedger.__new__(AttemptLedger) + ledger.service_name = os.getenv("OTEL_SERVICE_NAME", "unknown") + ledger._initialized = False + assert ledger.service_name == "test-service" + + def test_get_trace_context_no_span(self): + """Should return empty dict when no active span.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + + # No span context - should return empty + ctx = ledger._get_trace_context() + assert ctx == {} or "trace_id" in ctx # May have context from other tests + + def test_get_trace_context_with_span(self, memory_exporter): + """Should return trace context when span is active.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span") as span: + span_ctx = span.get_span_context() + ctx = ledger._get_trace_context() + + assert "trace_id" in ctx + assert "span_id" in ctx + assert ctx["trace_id"] == format(span_ctx.trace_id, "032x") + assert ctx["span_id"] == format(span_ctx.span_id, "016x") + + def test_emit_when_not_initialized(self): + """Should not raise when emitting without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + + # Should not raise + ledger._emit(LedgerEventType.ATTEMPT_STARTED, None, {"test": "value"}) + + def test_attempt_started_not_initialized(self): + """Should not raise when calling methods without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + ledger.service_name = "test" + + # Should not raise + ledger.attempt_started( + run_id="run-123", + use_case="Test Case", + attempt=1, + ) + + def test_attempt_ended_not_initialized(self): + """Should not raise when calling methods without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + ledger.service_name = "test" + + # Should not raise + ledger.attempt_ended( + run_id="run-123", + status="success", + duration_ms=1000.0, + ) + + def test_llm_attempted_not_initialized(self): + """Should not raise when calling methods without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + ledger.service_name = "test" + + # Should not raise + ledger.llm_attempted( + run_id="run-123", + provider="openai", + model="gpt-4", + input_tokens=100, + output_tokens=50, + ) + + def test_tool_attempted_not_initialized(self): + """Should not raise when calling methods without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + ledger.service_name = "test" + + # Should not raise + ledger.tool_attempted( + run_id="run-123", + tool_name="search", + ) + + def test_cancel_requested_not_initialized(self): + """Should not raise when calling methods without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + ledger.service_name = "test" + + # Should not raise + ledger.cancel_requested(run_id="run-123", reason="user") + + def test_cancel_acknowledged_not_initialized(self): + """Should not raise when calling methods without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + ledger.service_name = "test" + + # Should not raise + ledger.cancel_acknowledged(run_id="run-123", acknowledged_by="handler") + + def test_zombie_detected_not_initialized(self): + """Should not raise when calling methods without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + ledger.service_name = "test" + + # Should not raise + ledger.zombie_detected( + run_id="run-123", + deadline_ts=1000.0, + actual_end_ts=2000.0, + zombie_duration_ms=1000.0, + component="handler", + ) + + def test_redelivery_detected_not_initialized(self): + """Should not raise when calling methods without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + ledger._logger = None + ledger.service_name = "test" + + # Should not raise + ledger.redelivery_detected( + run_id="run-123", + queue_name="my-queue", + delivery_count=3, + ) + + def test_flush_when_not_initialized(self): + """Should return True when flushing without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + + result = ledger.flush() + assert result is True + + def test_shutdown_when_not_initialized(self): + """Should not raise when shutting down without initialization.""" + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = False + + # Should not raise + ledger.shutdown() + + +class TestGlobalLedger: + """Tests for global ledger functions.""" + + def test_get_ledger_creates_instance(self): + """get_ledger should create a ledger if none exists.""" + # Reset global + import botanu.tracking.ledger as ledger_module + + ledger_module._global_ledger = None + + ledger = get_ledger() + assert isinstance(ledger, AttemptLedger) + + def test_set_ledger(self): + """set_ledger should update the global instance.""" + custom_ledger = AttemptLedger.__new__(AttemptLedger) + custom_ledger._initialized = False + custom_ledger.service_name = "custom-service" + + set_ledger(custom_ledger) + assert get_ledger() is custom_ledger + + def test_record_attempt_started(self): + """record_attempt_started should call the global ledger.""" + mock_ledger = mock.MagicMock(spec=AttemptLedger) + set_ledger(mock_ledger) + + record_attempt_started(run_id="run-123", use_case="Test") + + mock_ledger.attempt_started.assert_called_once_with(run_id="run-123", use_case="Test") + + def test_record_attempt_ended(self): + """record_attempt_ended should call the global ledger.""" + mock_ledger = mock.MagicMock(spec=AttemptLedger) + set_ledger(mock_ledger) + + record_attempt_ended(run_id="run-123", status="success") + + mock_ledger.attempt_ended.assert_called_once_with(run_id="run-123", status="success") + + def test_record_llm_attempted(self): + """record_llm_attempted should call the global ledger.""" + mock_ledger = mock.MagicMock(spec=AttemptLedger) + set_ledger(mock_ledger) + + record_llm_attempted(run_id="run-123", provider="openai", model="gpt-4") + + mock_ledger.llm_attempted.assert_called_once_with(run_id="run-123", provider="openai", model="gpt-4") + + def test_record_tool_attempted(self): + """record_tool_attempted should call the global ledger.""" + mock_ledger = mock.MagicMock(spec=AttemptLedger) + set_ledger(mock_ledger) + + record_tool_attempted(run_id="run-123", tool_name="search") + + mock_ledger.tool_attempted.assert_called_once_with(run_id="run-123", tool_name="search") + + +class TestAttemptLedgerEmitMocked: + """Tests for ledger methods with mocked _emit to verify event attributes.""" + + def _make_ledger(self): + ledger = AttemptLedger.__new__(AttemptLedger) + ledger._initialized = True + ledger._logger = mock.MagicMock() + ledger.service_name = "test-svc" + return ledger + + def test_attempt_started_attributes(self): + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.attempt_started( + run_id="run-100", + use_case="billing", + attempt=2, + root_run_id="root-50", + workflow="invoice", + tenant_id="t-001", + deadline_ts=1700000000.0, + ) + + ledger._emit.assert_called_once() + event_type, _severity, attrs = ledger._emit.call_args[0] + assert event_type == LedgerEventType.ATTEMPT_STARTED + assert attrs["botanu.run_id"] == "run-100" + assert attrs["botanu.use_case"] == "billing" + assert attrs["botanu.attempt"] == 2 + assert attrs["botanu.root_run_id"] == "root-50" + assert attrs["botanu.workflow"] == "invoice" + assert attrs["botanu.tenant_id"] == "t-001" + assert attrs["botanu.deadline_ts"] == 1700000000.0 + + def test_attempt_ended_success(self): + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.attempt_ended( + run_id="run-200", + status="success", + duration_ms=1500.0, + ) + + _, _severity, attrs = ledger._emit.call_args[0] + assert attrs["botanu.run_id"] == "run-200" + assert attrs["status"] == "success" + assert attrs["duration_ms"] == 1500.0 + + def test_attempt_ended_error(self): + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.attempt_ended( + run_id="run-201", + status="error", + error_class="ValueError", + reason_code="INVALID_INPUT", + ) + + _, _severity, attrs = ledger._emit.call_args[0] + assert attrs["status"] == "error" + assert attrs["error_class"] == "ValueError" + assert attrs["reason_code"] == "INVALID_INPUT" + + def test_llm_attempted_full_attributes(self): + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.llm_attempted( + run_id="run-300", + provider="openai", + model="gpt-4", + operation="chat", + attempt_number=1, + input_tokens=500, + output_tokens=200, + cached_tokens=100, + duration_ms=800.0, + status="success", + provider_request_id="resp-abc", + estimated_cost_usd=0.0075, + ) + + _, _, attrs = ledger._emit.call_args[0] + assert attrs["gen_ai.provider.name"] == "openai" + assert attrs["gen_ai.request.model"] == "gpt-4" + assert attrs["gen_ai.usage.input_tokens"] == 500 + assert attrs["gen_ai.usage.output_tokens"] == 200 + assert attrs["botanu.usage.cached_tokens"] == 100 + assert attrs["botanu.cost.estimated_usd"] == 0.0075 + + def test_tool_attempted_attributes(self): + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.tool_attempted( + run_id="run-400", + tool_name="search", + tool_call_id="call-xyz", + duration_ms=250.0, + items_returned=3, + bytes_processed=4096, + ) + + _, _, attrs = ledger._emit.call_args[0] + assert attrs["gen_ai.tool.name"] == "search" + assert attrs["gen_ai.tool.call.id"] == "call-xyz" + assert attrs["items_returned"] == 3 + assert attrs["bytes_processed"] == 4096 + + def test_cancel_requested_attributes(self): + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.cancel_requested( + run_id="run-500", + reason="timeout", + requested_at_ms=1700000001000.0, + ) + + event_type, _, attrs = ledger._emit.call_args[0] + assert event_type == LedgerEventType.CANCEL_REQUESTED + assert attrs["cancellation.reason"] == "timeout" + assert attrs["cancellation.requested_at_ms"] == 1700000001000.0 + + def test_cancel_acknowledged_attributes(self): + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.cancel_acknowledged( + run_id="run-600", + acknowledged_by="worker-3", + latency_ms=150.0, + ) + + event_type, _, attrs = ledger._emit.call_args[0] + assert event_type == LedgerEventType.CANCEL_ACKNOWLEDGED + assert attrs["cancellation.acknowledged_by"] == "worker-3" + assert attrs["cancellation.latency_ms"] == 150.0 + + def test_zombie_detected_attributes(self): + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.zombie_detected( + run_id="run-700", + deadline_ts=1000.0, + actual_end_ts=5000.0, + zombie_duration_ms=4000.0, + component="agent_loop", + ) + + event_type, _, attrs = ledger._emit.call_args[0] + assert event_type == LedgerEventType.ZOMBIE_DETECTED + assert attrs["zombie_duration_ms"] == 4000.0 + assert attrs["zombie_component"] == "agent_loop" + + def test_redelivery_detected_attributes(self): + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.redelivery_detected( + run_id="run-800", + queue_name="tasks-queue", + delivery_count=3, + original_message_id="msg-original", + ) + + event_type, _, attrs = ledger._emit.call_args[0] + assert event_type == LedgerEventType.REDELIVERY_DETECTED + assert attrs["queue.name"] == "tasks-queue" + assert attrs["delivery_count"] == 3 + assert attrs["original_message_id"] == "msg-original" + + def test_attempt_started_default_root_run_id(self): + """root_run_id defaults to run_id when not provided.""" + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.attempt_started(run_id="run-solo", use_case="test") + + _, _, attrs = ledger._emit.call_args[0] + assert attrs["botanu.root_run_id"] == "run-solo" + + def test_cancel_requested_auto_timestamp(self): + """requested_at_ms uses current time when not provided.""" + ledger = self._make_ledger() + ledger._emit = mock.MagicMock() + + ledger.cancel_requested(run_id="run-ts", reason="user") + + _, _, attrs = ledger._emit.call_args[0] + assert attrs["cancellation.requested_at_ms"] > 0 + + +class TestLedgerGlobalReset: + """Tests for global ledger cleanup.""" + + def test_set_ledger_overrides_default(self): + import botanu.tracking.ledger as ledger_module + + ledger_module._global_ledger = None + default = get_ledger() + + custom = AttemptLedger.__new__(AttemptLedger) + custom._initialized = False + custom.service_name = "override" + set_ledger(custom) + + assert get_ledger() is custom + assert get_ledger() is not default + + # Cleanup + ledger_module._global_ledger = None diff --git a/tests/unit/test_llm_tracking.py b/tests/unit/test_llm_tracking.py new file mode 100644 index 0000000..dd09cf9 --- /dev/null +++ b/tests/unit/test_llm_tracking.py @@ -0,0 +1,537 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for LLM tracking.""" + +from __future__ import annotations + +import pytest + +from botanu.tracking.llm import ( + GenAIAttributes, + ModelOperation, + track_llm_call, +) + + +class TestTrackLLMCall: + """Tests for track_llm_call context manager.""" + + def test_creates_span_with_model_name(self, memory_exporter): + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.set_tokens(input_tokens=100, output_tokens=50) + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + # Span name format: "{operation} {model}" + assert spans[0].name == "chat gpt-4" + + def test_records_token_usage(self, memory_exporter): + with track_llm_call(model="claude-3-opus", provider="anthropic") as tracker: + tracker.set_tokens(input_tokens=500, output_tokens=200) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + + assert attrs[GenAIAttributes.USAGE_INPUT_TOKENS] == 500 + assert attrs[GenAIAttributes.USAGE_OUTPUT_TOKENS] == 200 + + def test_records_error_on_exception(self, memory_exporter): + with pytest.raises(ValueError): + with track_llm_call(model="gpt-4", provider="openai") as _tracker: + raise ValueError("API error") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get(GenAIAttributes.ERROR_TYPE) == "ValueError" + + def test_operation_type_attribute(self, memory_exporter): + with track_llm_call( + model="gpt-4", + provider="openai", + operation=ModelOperation.EMBEDDINGS, + ): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.OPERATION_NAME] == "embeddings" + + def test_request_params(self, memory_exporter): + with track_llm_call( + model="gpt-4", + provider="openai", + ) as tracker: + tracker.set_request_params(temperature=0.7, max_tokens=1000) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.REQUEST_TEMPERATURE] == 0.7 + assert attrs[GenAIAttributes.REQUEST_MAX_TOKENS] == 1000 + + +class TestLLMTracker: + """Tests for LLMTracker helper methods.""" + + def test_set_request_id(self, memory_exporter): + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.set_request_id(provider_request_id="resp_123") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.RESPONSE_ID] == "resp_123" + + def test_set_finish_reason(self, memory_exporter): + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.set_finish_reason("stop") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + # OTel converts lists to tuples for span attributes + assert attrs[GenAIAttributes.RESPONSE_FINISH_REASONS] == ("stop",) + + +class TestProviderNormalization: + """Tests for provider name normalization.""" + + def test_openai_normalized(self, memory_exporter): + with track_llm_call(model="gpt-4", provider="OpenAI"): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.PROVIDER_NAME] == "openai" + + def test_anthropic_normalized(self, memory_exporter): + with track_llm_call(model="claude-3", provider="Anthropic"): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.PROVIDER_NAME] == "anthropic" + + def test_bedrock_normalized(self, memory_exporter): + with track_llm_call(model="claude-v2", provider="bedrock"): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.PROVIDER_NAME] == "aws.bedrock" + + def test_vertex_normalized(self, memory_exporter): + with track_llm_call(model="gemini-pro", provider="vertex_ai"): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.PROVIDER_NAME] == "gcp.vertex_ai" + + def test_azure_openai_normalized(self, memory_exporter): + with track_llm_call(model="gpt-4", provider="azure_openai"): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.PROVIDER_NAME] == "azure.openai" + + def test_unknown_provider_passthrough(self, memory_exporter): + """Unknown provider names should be normalized to lowercase.""" + with track_llm_call(model="custom-model", provider="CustomProvider"): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.PROVIDER_NAME] == "customprovider" + + +class TestLLMTrackerExtended: + """Extended tests for LLMTracker methods.""" + + def test_set_streaming(self, memory_exporter): + from botanu.tracking.llm import BotanuAttributes + + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.set_streaming(True) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[BotanuAttributes.STREAMING] is True + + def test_set_cache_hit(self, memory_exporter): + from botanu.tracking.llm import BotanuAttributes + + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.set_cache_hit(True) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[BotanuAttributes.CACHE_HIT] is True + + def test_set_attempt(self, memory_exporter): + from botanu.tracking.llm import BotanuAttributes + + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.set_attempt(3) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[BotanuAttributes.ATTEMPT_NUMBER] == 3 + + def test_set_response_model(self, memory_exporter): + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.set_response_model("gpt-4-0125-preview") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.RESPONSE_MODEL] == "gpt-4-0125-preview" + + def test_set_tokens_with_cache(self, memory_exporter): + from botanu.tracking.llm import BotanuAttributes + + with track_llm_call(model="claude-3", provider="anthropic") as tracker: + tracker.set_tokens( + input_tokens=100, + output_tokens=50, + cache_read_tokens=80, + cache_write_tokens=20, + ) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.USAGE_INPUT_TOKENS] == 100 + assert attrs[GenAIAttributes.USAGE_OUTPUT_TOKENS] == 50 + assert attrs[BotanuAttributes.TOKENS_CACHED_READ] == 80 + assert attrs[BotanuAttributes.TOKENS_CACHED_WRITE] == 20 + + def test_set_request_id_with_client_id(self, memory_exporter): + from botanu.tracking.llm import BotanuAttributes + + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.set_request_id( + provider_request_id="resp_123", + client_request_id="client_456", + ) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.RESPONSE_ID] == "resp_123" + assert attrs[BotanuAttributes.CLIENT_REQUEST_ID] == "client_456" + + def test_set_request_params_extended(self, memory_exporter): + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.set_request_params( + temperature=0.8, + top_p=0.95, + max_tokens=2000, + stop_sequences=["END", "STOP"], + frequency_penalty=0.5, + presence_penalty=0.3, + ) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.REQUEST_TEMPERATURE] == 0.8 + assert attrs[GenAIAttributes.REQUEST_TOP_P] == 0.95 + assert attrs[GenAIAttributes.REQUEST_MAX_TOKENS] == 2000 + # OTel converts lists to tuples + assert attrs[GenAIAttributes.REQUEST_STOP_SEQUENCES] == ("END", "STOP") + assert attrs[GenAIAttributes.REQUEST_FREQUENCY_PENALTY] == 0.5 + assert attrs[GenAIAttributes.REQUEST_PRESENCE_PENALTY] == 0.3 + + def test_add_metadata(self, memory_exporter): + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.add_metadata(custom_field="value", another_field=123) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.custom_field"] == "value" + assert attrs["botanu.another_field"] == 123 + + def test_add_metadata_preserves_prefix(self, memory_exporter): + with track_llm_call(model="gpt-4", provider="openai") as tracker: + tracker.add_metadata(**{"botanu.explicit": "prefixed"}) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.explicit"] == "prefixed" + + def test_set_error_manually(self, memory_exporter): + with track_llm_call(model="gpt-4", provider="openai") as tracker: + error = RuntimeError("Rate limit exceeded") + tracker.set_error(error) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.ERROR_TYPE] == "RuntimeError" + + +class TestModelOperationConstants: + """Tests for ModelOperation constants.""" + + def test_operation_types(self): + assert ModelOperation.CHAT == "chat" + assert ModelOperation.TEXT_COMPLETION == "text_completion" + assert ModelOperation.EMBEDDINGS == "embeddings" + assert ModelOperation.GENERATE_CONTENT == "generate_content" + assert ModelOperation.EXECUTE_TOOL == "execute_tool" + assert ModelOperation.IMAGE_GENERATION == "image_generation" + assert ModelOperation.SPEECH_TO_TEXT == "speech_to_text" + assert ModelOperation.TEXT_TO_SPEECH == "text_to_speech" + + def test_operation_aliases(self): + """Aliases should match their canonical forms.""" + assert ModelOperation.COMPLETION == ModelOperation.TEXT_COMPLETION + assert ModelOperation.EMBEDDING == ModelOperation.EMBEDDINGS + assert ModelOperation.FUNCTION_CALL == ModelOperation.EXECUTE_TOOL + assert ModelOperation.TOOL_USE == ModelOperation.EXECUTE_TOOL + + +class TestGenAIAttributeConstants: + """Tests for GenAIAttributes and BotanuAttributes constants.""" + + def test_genai_attributes(self): + assert GenAIAttributes.OPERATION_NAME == "gen_ai.operation.name" + assert GenAIAttributes.PROVIDER_NAME == "gen_ai.provider.name" + assert GenAIAttributes.REQUEST_MODEL == "gen_ai.request.model" + assert GenAIAttributes.RESPONSE_MODEL == "gen_ai.response.model" + assert GenAIAttributes.USAGE_INPUT_TOKENS == "gen_ai.usage.input_tokens" + assert GenAIAttributes.USAGE_OUTPUT_TOKENS == "gen_ai.usage.output_tokens" + + def test_botanu_attributes(self): + from botanu.tracking.llm import BotanuAttributes + + assert BotanuAttributes.TOKENS_CACHED == "botanu.usage.cached_tokens" + assert BotanuAttributes.STREAMING == "botanu.request.streaming" + assert BotanuAttributes.CACHE_HIT == "botanu.request.cache_hit" + assert BotanuAttributes.ATTEMPT_NUMBER == "botanu.request.attempt" + assert BotanuAttributes.VENDOR == "botanu.vendor" + + +class TestTrackToolCall: + """Tests for track_tool_call context manager.""" + + def test_creates_span(self, memory_exporter): + from botanu.tracking.llm import track_tool_call + + with track_tool_call(tool_name="search"): + pass + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + assert spans[0].name == "execute_tool search" + + def test_tool_call_attributes(self, memory_exporter): + from botanu.tracking.llm import track_tool_call + + with track_tool_call( + tool_name="web_search", + tool_call_id="call_abc123", + provider="tavily", + ): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.TOOL_NAME] == "web_search" + assert attrs[GenAIAttributes.TOOL_CALL_ID] == "call_abc123" + assert attrs[GenAIAttributes.OPERATION_NAME] == "execute_tool" + + def test_tool_tracker_set_result(self, memory_exporter): + from botanu.tracking.llm import BotanuAttributes, track_tool_call + + with track_tool_call(tool_name="db_query") as tracker: + tracker.set_result(success=True, items_returned=42, bytes_processed=8192) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[BotanuAttributes.TOOL_SUCCESS] is True + assert attrs[BotanuAttributes.TOOL_ITEMS_RETURNED] == 42 + assert attrs[BotanuAttributes.TOOL_BYTES_PROCESSED] == 8192 + + def test_tool_tracker_set_error(self, memory_exporter): + from botanu.tracking.llm import track_tool_call + + with pytest.raises(ConnectionError): + with track_tool_call(tool_name="api_call"): + raise ConnectionError("Service down") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.ERROR_TYPE] == "ConnectionError" + + def test_tool_tracker_set_tool_call_id(self, memory_exporter): + from botanu.tracking.llm import track_tool_call + + with track_tool_call(tool_name="calc") as tracker: + tracker.set_tool_call_id("call_xyz789") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.TOOL_CALL_ID] == "call_xyz789" + + def test_tool_tracker_add_metadata(self, memory_exporter): + from botanu.tracking.llm import track_tool_call + + with track_tool_call(tool_name="search") as tracker: + tracker.add_metadata(query="python otel", source="web") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.tool.query"] == "python otel" + assert attrs["botanu.tool.source"] == "web" + + def test_tool_duration_recorded(self, memory_exporter): + from botanu.tracking.llm import BotanuAttributes, track_tool_call + + with track_tool_call(tool_name="slow_tool"): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert BotanuAttributes.TOOL_DURATION_MS in attrs + assert attrs[BotanuAttributes.TOOL_DURATION_MS] >= 0 + + +class TestStandaloneHelpers: + """Tests for set_llm_attributes and set_token_usage.""" + + def test_set_llm_attributes(self, memory_exporter): + from opentelemetry import trace as otl_trace + + from botanu.tracking.llm import BotanuAttributes, set_llm_attributes + + tracer = otl_trace.get_tracer("test") + with tracer.start_as_current_span("test-llm-attrs"): + set_llm_attributes( + provider="openai", + model="gpt-4", + input_tokens=150, + output_tokens=75, + streaming=True, + provider_request_id="resp_abc", + ) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.PROVIDER_NAME] == "openai" + assert attrs[GenAIAttributes.REQUEST_MODEL] == "gpt-4" + assert attrs[GenAIAttributes.USAGE_INPUT_TOKENS] == 150 + assert attrs[GenAIAttributes.USAGE_OUTPUT_TOKENS] == 75 + assert attrs[BotanuAttributes.STREAMING] is True + assert attrs[GenAIAttributes.RESPONSE_ID] == "resp_abc" + + def test_set_llm_attributes_no_active_span(self): + from botanu.tracking.llm import set_llm_attributes + + # Should not raise when no recording span + set_llm_attributes(provider="openai", model="gpt-4") + + def test_set_token_usage(self, memory_exporter): + from opentelemetry import trace as otl_trace + + from botanu.tracking.llm import BotanuAttributes, set_token_usage + + tracer = otl_trace.get_tracer("test") + with tracer.start_as_current_span("test-token-usage"): + set_token_usage(input_tokens=200, output_tokens=100, cached_tokens=50) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.USAGE_INPUT_TOKENS] == 200 + assert attrs[GenAIAttributes.USAGE_OUTPUT_TOKENS] == 100 + assert attrs[BotanuAttributes.TOKENS_CACHED] == 50 + + def test_set_token_usage_no_active_span(self): + from botanu.tracking.llm import set_token_usage + + # Should not raise when no recording span + set_token_usage(input_tokens=10, output_tokens=5) + + +class TestLLMInstrumentedDecorator: + """Tests for the llm_instrumented decorator.""" + + def test_decorator_creates_span(self, memory_exporter): + from botanu.tracking.llm import llm_instrumented + + @llm_instrumented(provider="openai") + def fake_completion(prompt, model="gpt-4"): + class _Usage: + prompt_tokens = 10 + completion_tokens = 20 + + class _Response: + usage = _Usage() + + return _Response() + + result = fake_completion("Hello", model="gpt-4") + assert result is not None + + spans = memory_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = dict(spans[0].attributes) + assert attrs[GenAIAttributes.PROVIDER_NAME] == "openai" + assert attrs[GenAIAttributes.REQUEST_MODEL] == "gpt-4" + assert attrs[GenAIAttributes.USAGE_INPUT_TOKENS] == 10 + assert attrs[GenAIAttributes.USAGE_OUTPUT_TOKENS] == 20 + + def test_decorator_with_streaming(self, memory_exporter): + from botanu.tracking.llm import BotanuAttributes, llm_instrumented + + @llm_instrumented(provider="anthropic") + def fake_stream(prompt, model="claude-3", stream=False): + return "streamed" + + fake_stream("Hi", model="claude-3", stream=True) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[BotanuAttributes.STREAMING] is True + + def test_decorator_without_usage(self, memory_exporter): + from botanu.tracking.llm import llm_instrumented + + @llm_instrumented(provider="custom", tokens_from_response=False) + def no_usage_fn(prompt, model="custom-model"): + return "done" + + no_usage_fn("test", model="custom-model") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert GenAIAttributes.USAGE_INPUT_TOKENS not in attrs + + +class TestClientRequestId: + """Tests for client_request_id passthrough.""" + + def test_client_request_id_on_track_llm_call(self, memory_exporter): + from botanu.tracking.llm import BotanuAttributes + + with track_llm_call( + model="gpt-4", + provider="openai", + client_request_id="cli-req-001", + ): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs[BotanuAttributes.CLIENT_REQUEST_ID] == "cli-req-001" + + +class TestKwargsPassthrough: + """Tests for additional kwargs passed to track_llm_call.""" + + def test_custom_kwargs(self, memory_exporter): + with track_llm_call( + model="gpt-4", + provider="openai", + deployment_id="dep-001", + ): + pass + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs["botanu.deployment_id"] == "dep-001" diff --git a/tests/unit/test_middleware.py b/tests/unit/test_middleware.py new file mode 100644 index 0000000..b41b838 --- /dev/null +++ b/tests/unit/test_middleware.py @@ -0,0 +1,175 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for BotanuMiddleware (FastAPI/Starlette).""" + +from __future__ import annotations + +import pytest +from opentelemetry import context as otel_context +from starlette.applications import Starlette +from starlette.responses import JSONResponse +from starlette.routing import Route +from starlette.testclient import TestClient + +from botanu.sdk.middleware import BotanuMiddleware + + +def _make_app(*, use_case: str = "test_uc", workflow: str | None = None, auto_generate_run_id: bool = True): + """Build a minimal Starlette app with BotanuMiddleware.""" + + async def homepage(request): + return JSONResponse({"ok": True}) + + app = Starlette(routes=[Route("/", homepage)]) + app.add_middleware( + BotanuMiddleware, + use_case=use_case, + workflow=workflow, + auto_generate_run_id=auto_generate_run_id, + ) + return app + + +@pytest.fixture(autouse=True) +def _clean_otel_context(): + """Reset OTel context before each middleware test to avoid baggage leaking.""" + token = otel_context.attach(otel_context.Context()) + yield + otel_context.detach(token) + + +class TestBotanuMiddleware: + """Tests for BotanuMiddleware dispatch behaviour.""" + + def test_response_contains_use_case_header(self, memory_exporter): + client = TestClient(_make_app(use_case="billing")) + resp = client.get("/") + assert resp.status_code == 200 + assert resp.headers["x-botanu-use-case"] == "billing" + + def test_response_contains_workflow_header(self, memory_exporter): + client = TestClient(_make_app(use_case="billing", workflow="invoice_flow")) + resp = client.get("/") + assert resp.headers["x-botanu-workflow"] == "invoice_flow" + + def test_auto_generated_run_id_in_response(self, memory_exporter): + client = TestClient(_make_app()) + resp = client.get("/") + run_id = resp.headers.get("x-botanu-run-id") + assert run_id is not None + assert len(run_id) > 0 + + def test_run_id_propagated_from_header(self, memory_exporter): + client = TestClient(_make_app()) + resp = client.get("/", headers={"x-botanu-run-id": "my-custom-run-123"}) + assert resp.headers["x-botanu-run-id"] == "my-custom-run-123" + + def test_use_case_propagated_from_header(self, memory_exporter): + client = TestClient(_make_app(use_case="default_uc")) + resp = client.get("/", headers={"x-botanu-use-case": "overridden_uc"}) + assert resp.headers["x-botanu-use-case"] == "overridden_uc" + + def test_workflow_propagated_from_header(self, memory_exporter): + client = TestClient(_make_app(use_case="uc", workflow="default_wf")) + resp = client.get("/", headers={"x-botanu-workflow": "overridden_wf"}) + assert resp.headers["x-botanu-workflow"] == "overridden_wf" + + def test_no_auto_run_id_when_disabled(self, memory_exporter): + client = TestClient(_make_app(auto_generate_run_id=False)) + resp = client.get("/") + # Should not have a run_id header since none was provided and auto-gen is off + assert "x-botanu-run-id" not in resp.headers + + def test_workflow_defaults_to_use_case(self, memory_exporter): + client = TestClient(_make_app(use_case="my_uc")) + resp = client.get("/") + assert resp.headers["x-botanu-workflow"] == "my_uc" + + def test_customer_id_propagated_from_header(self, memory_exporter): + client = TestClient(_make_app()) + resp = client.get("/", headers={"x-botanu-customer-id": "cust-456"}) + assert resp.status_code == 200 + + def test_each_request_gets_unique_run_id(self, memory_exporter): + client = TestClient(_make_app()) + resp1 = client.get("/") + resp2 = client.get("/") + run_id1 = resp1.headers.get("x-botanu-run-id") + run_id2 = resp2.headers.get("x-botanu-run-id") + assert run_id1 != run_id2 + + +class TestMiddlewareBaggageIsolation: + """Tests for baggage context isolation between requests.""" + + def test_baggage_does_not_leak_between_requests(self, memory_exporter): + """Baggage set in request 1 must not appear in request 2.""" + + app_with_baggage_check = _make_baggage_check_app() + client = TestClient(app_with_baggage_check) + + # Request 1: sends a custom run_id + resp1 = client.get("/check", headers={"x-botanu-run-id": "leak-test-001"}) + resp1.json() + + # Request 2: no custom run_id + resp2 = client.get("/check") + data2 = resp2.json() + + # Request 2 should NOT see request 1's run_id in baggage + assert data2.get("run_id") != "leak-test-001" + + def test_header_priority_over_constructor_defaults(self, memory_exporter): + """x-botanu-use-case header should override constructor default.""" + client = TestClient(_make_app(use_case="default_uc")) + resp = client.get("/", headers={"x-botanu-use-case": "header_uc"}) + assert resp.headers["x-botanu-use-case"] == "header_uc" + + def test_multiple_headers_propagated(self, memory_exporter): + """All x-botanu-* headers should be propagated together.""" + client = TestClient(_make_app(use_case="uc")) + resp = client.get( + "/", + headers={ + "x-botanu-run-id": "multi-001", + "x-botanu-use-case": "multi-uc", + "x-botanu-workflow": "multi-wf", + "x-botanu-customer-id": "cust-multi", + }, + ) + assert resp.headers["x-botanu-run-id"] == "multi-001" + assert resp.headers["x-botanu-use-case"] == "multi-uc" + assert resp.headers["x-botanu-workflow"] == "multi-wf" + + def test_exception_in_handler_still_detaches_context(self, memory_exporter): + """Context token should be detached even when handler raises.""" + app = _make_error_app() + client = TestClient(app, raise_server_exceptions=False) + resp = client.get("/error") + assert resp.status_code == 500 + + +def _make_baggage_check_app(): + """Build app that returns current baggage values.""" + from opentelemetry import baggage as otel_baggage + from opentelemetry.context import get_current + + async def check_baggage(request): + run_id = otel_baggage.get_baggage("botanu.run_id", context=get_current()) + return JSONResponse({"run_id": run_id}) + + app = Starlette(routes=[Route("/check", check_baggage)]) + app.add_middleware(BotanuMiddleware, use_case="test") + return app + + +def _make_error_app(): + """Build app that raises an exception in the handler.""" + + async def error_handler(request): + raise RuntimeError("Intentional test error") + + app = Starlette(routes=[Route("/error", error_handler)]) + app.add_middleware(BotanuMiddleware, use_case="error_test") + return app diff --git a/tests/unit/test_resource_detector.py b/tests/unit/test_resource_detector.py new file mode 100644 index 0000000..dad4d3d --- /dev/null +++ b/tests/unit/test_resource_detector.py @@ -0,0 +1,455 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for resource detection.""" + +from __future__ import annotations + +import os +import sys +from unittest import mock + +from botanu.resources.detector import ( + detect_all_resources, + detect_cloud_provider, + detect_container, + detect_host, + detect_kubernetes, + detect_process, + detect_serverless, + get_resource_attributes, +) + + +class TestDetectHost: + """Tests for host detection.""" + + def test_detects_hostname(self): + attrs = detect_host() + assert "host.name" in attrs + assert isinstance(attrs["host.name"], str) + + def test_detects_os_type(self): + attrs = detect_host() + assert attrs["os.type"] == sys.platform + + def test_detects_host_arch(self): + attrs = detect_host() + assert "host.arch" in attrs + + +class TestDetectProcess: + """Tests for process detection.""" + + def test_detects_pid(self): + attrs = detect_process() + assert attrs["process.pid"] == os.getpid() + + def test_detects_runtime(self): + attrs = detect_process() + assert attrs["process.runtime.name"] == "python" + assert "process.runtime.version" in attrs + + +class TestDetectKubernetes: + """Tests for Kubernetes detection.""" + + def test_no_k8s_when_not_in_cluster(self): + with mock.patch.dict(os.environ, {}, clear=True): + os.environ.pop("KUBERNETES_SERVICE_HOST", None) + attrs = detect_kubernetes() + assert attrs == {} + + def test_detects_k8s_pod_name(self): + with mock.patch.dict( + os.environ, + { + "KUBERNETES_SERVICE_HOST": "10.0.0.1", + "HOSTNAME": "my-pod-abc123", + "K8S_NAMESPACE": "default", + }, + ): + attrs = detect_kubernetes() + assert attrs.get("k8s.pod.name") == "my-pod-abc123" + assert attrs.get("k8s.namespace.name") == "default" + + def test_detects_k8s_from_env_vars(self): + with mock.patch.dict( + os.environ, + { + "KUBERNETES_SERVICE_HOST": "10.0.0.1", + "K8S_POD_NAME": "explicit-pod", + "K8S_POD_UID": "uid-12345", + "K8S_CLUSTER_NAME": "prod-cluster", + }, + ): + attrs = detect_kubernetes() + assert attrs.get("k8s.pod.name") == "explicit-pod" + assert attrs.get("k8s.pod.uid") == "uid-12345" + assert attrs.get("k8s.cluster.name") == "prod-cluster" + + +class TestDetectCloudProvider: + """Tests for cloud provider detection.""" + + def test_no_cloud_when_not_in_cloud(self): + with mock.patch.dict(os.environ, {}, clear=True): + # Clear all cloud env vars + for key in list(os.environ.keys()): + if any( + prefix in key + for prefix in ["AWS_", "GOOGLE_", "GCLOUD_", "GCP_", "AZURE_", "K_", "FUNCTION_", "WEBSITE_"] + ): + os.environ.pop(key, None) + attrs = detect_cloud_provider() + assert "cloud.provider" not in attrs + + def test_detects_aws(self): + with mock.patch.dict( + os.environ, + { + "AWS_REGION": "us-east-1", + "AWS_ACCOUNT_ID": "123456789012", + }, + clear=False, + ): + attrs = detect_cloud_provider() + assert attrs.get("cloud.provider") == "aws" + assert attrs.get("cloud.region") == "us-east-1" + + def test_detects_aws_lambda(self): + with mock.patch.dict( + os.environ, + { + "AWS_LAMBDA_FUNCTION_NAME": "my-function", + "AWS_LAMBDA_FUNCTION_VERSION": "$LATEST", + "AWS_REGION": "us-west-2", + }, + clear=False, + ): + attrs = detect_cloud_provider() + assert attrs.get("cloud.provider") == "aws" + assert attrs.get("faas.name") == "my-function" + + def test_detects_gcp(self): + with mock.patch.dict( + os.environ, + {"GOOGLE_CLOUD_PROJECT": "my-project", "GOOGLE_CLOUD_REGION": "us-central1"}, + clear=False, + ): + # Clear AWS vars + os.environ.pop("AWS_REGION", None) + os.environ.pop("AWS_DEFAULT_REGION", None) + attrs = detect_cloud_provider() + assert attrs.get("cloud.provider") == "gcp" + assert attrs.get("cloud.account.id") == "my-project" + + def test_detects_gcp_cloud_run(self): + with mock.patch.dict( + os.environ, + { + "K_SERVICE": "my-service", + "K_REVISION": "my-service-00001", + "GOOGLE_CLOUD_PROJECT": "my-project", + }, + clear=False, + ): + os.environ.pop("AWS_REGION", None) + attrs = detect_cloud_provider() + assert attrs.get("cloud.provider") == "gcp" + assert attrs.get("faas.name") == "my-service" + + def test_detects_azure(self): + with mock.patch.dict( + os.environ, + { + "WEBSITE_SITE_NAME": "my-app", + "AZURE_SUBSCRIPTION_ID": "sub-12345", + "REGION_NAME": "eastus", + }, + clear=False, + ): + # Clear other cloud vars + os.environ.pop("AWS_REGION", None) + os.environ.pop("GOOGLE_CLOUD_PROJECT", None) + attrs = detect_cloud_provider() + assert attrs.get("cloud.provider") == "azure" + assert attrs.get("faas.name") == "my-app" + + +class TestDetectContainer: + """Tests for container detection.""" + + def test_detects_container_id_from_env(self): + with mock.patch.dict(os.environ, {"CONTAINER_ID": "abc123def456"}): + attrs = detect_container() + # Container ID detection depends on cgroup files + # In test environment, may or may not detect + assert isinstance(attrs, dict) + + +class TestDetectServerless: + """Tests for serverless/FaaS detection.""" + + def test_detects_lambda(self): + with mock.patch.dict( + os.environ, + { + "AWS_LAMBDA_FUNCTION_NAME": "my-lambda", + "AWS_LAMBDA_FUNCTION_VERSION": "1", + "AWS_LAMBDA_FUNCTION_MEMORY_SIZE": "512", + }, + ): + attrs = detect_serverless() + assert attrs.get("faas.name") == "my-lambda" + assert attrs.get("faas.version") == "1" + assert attrs.get("faas.max_memory") == 512 * 1024 * 1024 + + def test_detects_cloud_run(self): + with mock.patch.dict( + os.environ, + { + "K_SERVICE": "cloud-run-service", + "K_REVISION": "rev-001", + }, + ): + # Clear Lambda vars + os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) + attrs = detect_serverless() + assert attrs.get("faas.name") == "cloud-run-service" + assert attrs.get("faas.version") == "rev-001" + + +class TestDetectAllResources: + """Tests for combined resource detection.""" + + def test_returns_dict(self): + attrs = detect_all_resources() + assert isinstance(attrs, dict) + + def test_includes_host_info(self): + # Clear cache to ensure fresh detection + detect_all_resources.cache_clear() + attrs = detect_all_resources() + assert "host.name" in attrs + assert "process.pid" in attrs + + def test_caches_results(self): + detect_all_resources.cache_clear() + result1 = detect_all_resources() + result2 = detect_all_resources() + assert result1 is result2 # Same object due to caching + + +class TestGetResourceAttributes: + """Tests for selective resource detection.""" + + def test_include_host_only(self): + attrs = get_resource_attributes( + include_host=True, + include_process=False, + include_container=False, + include_cloud=False, + include_k8s=False, + include_faas=False, + ) + assert "host.name" in attrs + assert "process.pid" not in attrs + + def test_include_process_only(self): + attrs = get_resource_attributes( + include_host=False, + include_process=True, + include_container=False, + include_cloud=False, + include_k8s=False, + include_faas=False, + ) + assert "process.pid" in attrs + assert "host.name" not in attrs + + +class TestAWSAvailabilityZone: + """Tests for _get_aws_availability_zone.""" + + def test_returns_none_for_lambda(self): + from botanu.resources.detector import _get_aws_availability_zone + + with mock.patch.dict(os.environ, {"AWS_LAMBDA_FUNCTION_NAME": "fn"}): + assert _get_aws_availability_zone() is None + + def test_returns_none_when_metadata_disabled(self): + from botanu.resources.detector import _get_aws_availability_zone + + with mock.patch.dict(os.environ, {"AWS_EC2_METADATA_DISABLED": "true"}, clear=True): + os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) + assert _get_aws_availability_zone() is None + + def test_returns_none_when_invalid_endpoint(self): + from botanu.resources.detector import _get_aws_availability_zone + + with mock.patch.dict( + os.environ, + { + "AWS_EC2_METADATA_SERVICE_ENDPOINT": "not-a-url", + }, + clear=True, + ): + os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) + assert _get_aws_availability_zone() is None + + def test_returns_none_on_network_error(self): + from botanu.resources.detector import _get_aws_availability_zone + + with mock.patch.dict(os.environ, {}, clear=True): + os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) + os.environ.pop("AWS_EC2_METADATA_DISABLED", None) + # Default endpoint (169.254.169.254) will fail in test env + result = _get_aws_availability_zone() + assert result is None + + +class TestCloudRegionFromAZ: + """Tests for cloud region derivation from availability zone.""" + + def test_region_derived_from_az(self): + """When AZ is 'us-east-1a', region should be 'us-east-1'.""" + + with mock.patch.dict( + os.environ, + { + "AWS_REGION": "", + "AWS_DEFAULT_REGION": "", + "AWS_ACCOUNT_ID": "123456789012", + }, + clear=True, + ): + os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) + + # Mock the IMDS call to return an AZ + with mock.patch( + "botanu.resources.detector._get_aws_availability_zone", + return_value="us-west-2c", + ): + attrs = detect_cloud_provider() + if "cloud.availability_zone" in attrs: + assert attrs["cloud.region"] == "us-west-2" + + +class TestContainerId: + """Tests for container ID extraction.""" + + def test_container_id_from_env(self): + from botanu.resources.detector import _get_container_id + + # Short container IDs (< 12 chars) are ignored + with mock.patch.dict(os.environ, {"CONTAINER_ID": "short"}, clear=True): + os.environ.pop("HOSTNAME", None) + result = _get_container_id() + assert result is None + + # Long enough IDs are returned + with mock.patch.dict(os.environ, {"CONTAINER_ID": "abcdef123456"}, clear=True): + os.environ.pop("HOSTNAME", None) + result = _get_container_id() + # May be overridden by cgroup parsing, but at minimum not None + assert result is None or len(result) >= 12 + + +class TestDetectHostExtended: + """Extended host detection tests.""" + + def test_host_id_from_env(self): + with mock.patch.dict(os.environ, {"HOST_ID": "i-0123456789"}): + attrs = detect_host() + assert attrs["host.id"] == "i-0123456789" + + def test_host_id_from_instance_id(self): + with mock.patch.dict(os.environ, {"INSTANCE_ID": "vm-abc"}, clear=True): + os.environ.pop("HOST_ID", None) + attrs = detect_host() + assert attrs["host.id"] == "vm-abc" + + def test_host_id_falls_back_to_hostname(self): + with mock.patch.dict(os.environ, {}, clear=True): + os.environ.pop("HOST_ID", None) + os.environ.pop("INSTANCE_ID", None) + attrs = detect_host() + assert attrs.get("host.id") == attrs.get("host.name") + + +class TestDetectServerlessExtended: + """Extended serverless detection tests.""" + + def test_gcp_cloud_function(self): + with mock.patch.dict( + os.environ, + { + "FUNCTION_NAME": "my-function", + "FUNCTION_TARGET": "handle_event", + }, + clear=True, + ): + os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) + os.environ.pop("K_SERVICE", None) + attrs = detect_serverless() + assert attrs["faas.name"] == "my-function" + assert attrs["faas.trigger"] == "handle_event" + + def test_azure_functions(self): + with mock.patch.dict( + os.environ, + { + "WEBSITE_SITE_NAME": "my-azure-fn", + "WEBSITE_INSTANCE_ID": "inst-123", + }, + clear=True, + ): + os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) + os.environ.pop("K_SERVICE", None) + os.environ.pop("FUNCTION_NAME", None) + attrs = detect_serverless() + assert attrs["faas.name"] == "my-azure-fn" + assert attrs["faas.instance"] == "inst-123" + + def test_no_serverless_detected(self): + with mock.patch.dict(os.environ, {}, clear=True): + os.environ.pop("AWS_LAMBDA_FUNCTION_NAME", None) + os.environ.pop("K_SERVICE", None) + os.environ.pop("FUNCTION_NAME", None) + os.environ.pop("WEBSITE_SITE_NAME", None) + attrs = detect_serverless() + assert attrs == {} + + +class TestDetectProcessExtended: + """Extended process detection tests.""" + + def test_process_command(self): + attrs = detect_process() + assert "process.command" in attrs + assert isinstance(attrs["process.command"], str) + + def test_process_runtime_version_format(self): + attrs = detect_process() + version = attrs["process.runtime.version"] + parts = version.split(".") + assert len(parts) >= 2 # major.minor at minimum + + +class TestServiceInstanceId: + """Tests for service.instance.id derivation in detect_all_resources.""" + + def test_instance_id_from_hostname_in_k8s(self): + detect_all_resources.cache_clear() + with mock.patch.dict( + os.environ, + { + "KUBERNETES_SERVICE_HOST": "10.0.0.1", + "HOSTNAME": "my-pod-abc123xyz", + }, + ): + attrs = detect_all_resources() + # Should have service.instance.id + assert "service.instance.id" in attrs + detect_all_resources.cache_clear() diff --git a/tests/unit/test_run_context.py b/tests/unit/test_run_context.py new file mode 100644 index 0000000..0869676 --- /dev/null +++ b/tests/unit/test_run_context.py @@ -0,0 +1,204 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for RunContext model.""" + +from __future__ import annotations + +import os +import re +import time +from unittest import mock + +from botanu.models.run_context import ( + RunContext, + RunStatus, + generate_run_id, +) + + +class TestGenerateRunId: + """Tests for UUIDv7 generation.""" + + def test_format_is_uuid(self): + """run_id should be valid UUID format.""" + run_id = generate_run_id() + uuid_pattern = r"^[0-9a-f]{8}-[0-9a-f]{4}-7[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + assert re.match(uuid_pattern, run_id), f"Invalid UUID format: {run_id}" + + def test_uniqueness(self): + """Generated IDs should be unique.""" + ids = [generate_run_id() for _ in range(1000)] + assert len(set(ids)) == 1000 + + def test_sortable_by_time(self): + """IDs generated later should sort after earlier ones.""" + id1 = generate_run_id() + time.sleep(0.002) + id2 = generate_run_id() + assert id1 < id2 + + +class TestRunContextCreate: + """Tests for RunContext.create factory.""" + + def test_creates_with_required_fields(self): + ctx = RunContext.create(use_case="Customer Support") + assert ctx.run_id is not None + assert ctx.use_case == "Customer Support" + assert ctx.environment == "production" # default + assert ctx.attempt == 1 + + def test_root_run_id_defaults_to_run_id(self): + ctx = RunContext.create(use_case="test") + assert ctx.root_run_id == ctx.run_id + + def test_accepts_custom_root_run_id(self): + ctx = RunContext.create(use_case="test", root_run_id="custom-root") + assert ctx.root_run_id == "custom-root" + + def test_environment_from_env_var(self): + with mock.patch.dict(os.environ, {"BOTANU_ENVIRONMENT": "staging"}): + ctx = RunContext.create(use_case="test") + assert ctx.environment == "staging" + + def test_explicit_environment_overrides_env_var(self): + with mock.patch.dict(os.environ, {"BOTANU_ENVIRONMENT": "staging"}): + ctx = RunContext.create(use_case="test", environment="production") + assert ctx.environment == "production" + + +class TestRunContextRetry: + """Tests for retry handling.""" + + def test_create_retry_increments_attempt(self): + original = RunContext.create(use_case="test") + retry = RunContext.create_retry(original) + + assert retry.attempt == 2 + assert retry.retry_of_run_id == original.run_id + assert retry.root_run_id == original.root_run_id + assert retry.run_id != original.run_id + + def test_multiple_retries_preserve_root(self): + original = RunContext.create(use_case="test") + retry1 = RunContext.create_retry(original) + retry2 = RunContext.create_retry(retry1) + + assert retry2.attempt == 3 + assert retry2.root_run_id == original.run_id + + +class TestRunContextDeadline: + """Tests for deadline handling.""" + + def test_deadline_seconds(self): + ctx = RunContext.create(use_case="test", deadline_seconds=10.0) + assert ctx.deadline is not None + assert ctx.deadline > time.time() + + def test_is_past_deadline(self): + ctx = RunContext.create(use_case="test", deadline_seconds=0.001) + time.sleep(0.01) + assert ctx.is_past_deadline() is True + + def test_remaining_time_seconds(self): + ctx = RunContext.create(use_case="test", deadline_seconds=10.0) + remaining = ctx.remaining_time_seconds() + assert remaining is not None + assert 9.0 < remaining <= 10.0 + + +class TestRunContextCancellation: + """Tests for cancellation handling.""" + + def test_request_cancellation(self): + ctx = RunContext.create(use_case="test") + assert ctx.is_cancelled() is False + + ctx.request_cancellation("user") + assert ctx.is_cancelled() is True + assert ctx.cancelled_at is not None + + +class TestRunContextOutcome: + """Tests for outcome recording.""" + + def test_complete_sets_outcome(self): + ctx = RunContext.create(use_case="test") + ctx.complete( + status=RunStatus.SUCCESS, + value_type="tickets_resolved", + value_amount=1.0, + ) + + assert ctx.outcome is not None + assert ctx.outcome.status == RunStatus.SUCCESS + assert ctx.outcome.value_type == "tickets_resolved" + assert ctx.outcome.value_amount == 1.0 + + +class TestRunContextSerialization: + """Tests for baggage and span attribute serialization.""" + + def test_to_baggage_dict_lean_mode(self): + with mock.patch.dict(os.environ, {"BOTANU_PROPAGATION_MODE": "lean"}): + ctx = RunContext.create( + use_case="Customer Support", + workflow="handle_ticket", + tenant_id="tenant-123", + ) + baggage = ctx.to_baggage_dict() + + # Lean mode only includes run_id and use_case + assert "botanu.run_id" in baggage + assert "botanu.use_case" in baggage + assert "botanu.workflow" not in baggage + assert "botanu.tenant_id" not in baggage + + def test_to_baggage_dict_full_mode(self): + with mock.patch.dict(os.environ, {"BOTANU_PROPAGATION_MODE": "full"}): + ctx = RunContext.create( + use_case="Customer Support", + workflow="handle_ticket", + tenant_id="tenant-123", + ) + baggage = ctx.to_baggage_dict() + + assert baggage["botanu.workflow"] == "handle_ticket" + assert baggage["botanu.tenant_id"] == "tenant-123" + + def test_to_span_attributes(self): + ctx = RunContext.create( + use_case="Customer Support", + workflow="handle_ticket", + tenant_id="tenant-123", + ) + attrs = ctx.to_span_attributes() + + assert attrs["botanu.run_id"] == ctx.run_id + assert attrs["botanu.use_case"] == "Customer Support" + assert attrs["botanu.workflow"] == "handle_ticket" + assert attrs["botanu.tenant_id"] == "tenant-123" + + def test_from_baggage_roundtrip(self): + original = RunContext.create( + use_case="test", + workflow="my_workflow", + tenant_id="tenant-abc", + ) + baggage = original.to_baggage_dict(lean_mode=False) + restored = RunContext.from_baggage(baggage) + + assert restored is not None + assert restored.run_id == original.run_id + assert restored.use_case == original.use_case + assert restored.workflow == original.workflow + assert restored.tenant_id == original.tenant_id + + def test_from_baggage_returns_none_for_missing_fields(self): + result = RunContext.from_baggage({}) + assert result is None + + result = RunContext.from_baggage({"botanu.run_id": "some-id"}) + assert result is None diff --git a/tests/unit/test_span_helpers.py b/tests/unit/test_span_helpers.py new file mode 100644 index 0000000..799bcf4 --- /dev/null +++ b/tests/unit/test_span_helpers.py @@ -0,0 +1,124 @@ +# SPDX-FileCopyrightText: 2026 The Botanu Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for span helper functions.""" + +from __future__ import annotations + +from opentelemetry import trace + +from botanu.sdk.span_helpers import emit_outcome, set_business_context + + +class TestEmitOutcome: + """Tests for emit_outcome function.""" + + def test_emit_success_outcome(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + emit_outcome("success") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.outcome") == "success" + + def test_emit_failure_outcome(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + emit_outcome("failed", reason="timeout") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.outcome") == "failed" + assert attrs.get("botanu.outcome.reason") == "timeout" + + def test_emit_outcome_with_value(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + emit_outcome( + "success", + value_type="tickets_resolved", + value_amount=5.0, + ) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.outcome") == "success" + assert attrs.get("botanu.outcome.value_type") == "tickets_resolved" + assert attrs.get("botanu.outcome.value_amount") == 5.0 + + def test_emit_outcome_with_confidence(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + emit_outcome("success", confidence=0.95) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.outcome.confidence") == 0.95 + + def test_emit_outcome_adds_event(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + emit_outcome("success", value_type="orders", value_amount=1) + + spans = memory_exporter.get_finished_spans() + events = [e for e in spans[0].events if e.name == "botanu.outcome_emitted"] + assert len(events) == 1 + assert events[0].attributes["status"] == "success" + + +class TestSetBusinessContext: + """Tests for set_business_context function.""" + + def test_set_customer_id(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + set_business_context(customer_id="cust-123") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.customer_id") == "cust-123" + + def test_set_team(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + set_business_context(team="platform-team") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.team") == "platform-team" + + def test_set_cost_center(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + set_business_context(cost_center="CC-456") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.cost_center") == "CC-456" + + def test_set_region(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + set_business_context(region="us-west-2") + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.region") == "us-west-2" + + def test_set_multiple_contexts(self, memory_exporter): + tracer = trace.get_tracer("test") + with tracer.start_as_current_span("test-span"): + set_business_context( + customer_id="cust-123", + team="support", + cost_center="CC-456", + region="eu-central-1", + ) + + spans = memory_exporter.get_finished_spans() + attrs = dict(spans[0].attributes) + assert attrs.get("botanu.customer_id") == "cust-123" + assert attrs.get("botanu.team") == "support" + assert attrs.get("botanu.cost_center") == "CC-456" + assert attrs.get("botanu.region") == "eu-central-1"