Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
task_integrations,
integrations,
other,
health,
developer,
updates,
calendar_meetings,
Expand Down Expand Up @@ -87,6 +88,7 @@
app.include_router(trends.router)

app.include_router(other.router)
app.include_router(health.router)

app.include_router(firmware.router)
app.include_router(updates.router)
Expand Down
188 changes: 188 additions & 0 deletions backend/routers/health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import asyncio
import logging
import os
import time

import httpx
from fastapi import APIRouter
from fastapi.responses import JSONResponse

logger = logging.getLogger(__name__)

router = APIRouter()

TIMEOUT = 5.0 # seconds per check


async def _check_anthropic() -> dict:
"""Check Anthropic API connectivity."""
try:
api_key = os.getenv('ANTHROPIC_API_KEY', '')
if not api_key:
return {"status": "down", "error": "ANTHROPIC_API_KEY not set"}
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
r = await client.get(
"https://api.anthropic.com/v1/models",
headers={
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
},
)
if r.status_code == 200:
return {"status": "ok"}
elif r.status_code == 401:
return {"status": "down", "error": "invalid API key or out of credits"}
else:
return {"status": "down", "error": f"HTTP {r.status_code}"}
except Exception as e:
return {"status": "down", "error": str(e)[:200]}
Comment on lines +17 to +38
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Unauthenticated endpoints disclose API key configuration and status

These endpoints are intentionally public (for monitoring), but they return whether specific API keys are missing ("ANTHROPIC_API_KEY not set", "DEEPGRAM_API_KEY not set", etc.). An attacker probing the status page could enumerate which third-party integrations are configured or not on this backend.

Consider replacing the "key not set" messages with a generic "service not configured" or simply returning {"status": "down"} without specifying the reason, so the error details are not publicly exposed.



async def _check_deepgram() -> dict:
"""Check Deepgram API connectivity."""
try:
api_key = os.getenv('DEEPGRAM_API_KEY', '')
if not api_key:
return {"status": "down", "error": "DEEPGRAM_API_KEY not set"}
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
r = await client.get(
"https://api.deepgram.com/v1/projects",
headers={"Authorization": f"Token {api_key}"},
)
if r.status_code == 200:
return {"status": "ok"}
else:
return {"status": "down", "error": f"HTTP {r.status_code}"}
except Exception as e:
return {"status": "down", "error": str(e)[:200]}


async def _check_openai() -> dict:
"""Check OpenAI API connectivity."""
try:
api_key = os.getenv('OPENAI_API_KEY', '')
if not api_key:
return {"status": "down", "error": "OPENAI_API_KEY not set"}
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
r = await client.get(
"https://api.openai.com/v1/models",
headers={"Authorization": f"Bearer {api_key}"},
)
if r.status_code == 200:
return {"status": "ok"}
else:
return {"status": "down", "error": f"HTTP {r.status_code}"}
except Exception as e:
return {"status": "down", "error": str(e)[:200]}


async def _check_firestore() -> dict:
"""Check Firestore connectivity with a minimal read."""
try:
from database._client import db
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 In-function import violates backend import rules

from database._client import db is placed inside the function body. The backend import rules require all imports to be at the module's top level. Move this to the top of the file alongside the other imports:

Suggested change
from database._client import db
from database._client import db

(Add this line at the top of health.py with the other imports, then remove it from inside _check_firestore.)

Context Used: Backend Python import rules - no in-function impor... (source)

# Read a nonexistent doc — fast, just checks connectivity
doc = db.collection('_health_check').document('ping').get()
return {"status": "ok"}
except Exception as e:
return {"status": "down", "error": str(e)[:200]}
Comment on lines +79 to +87
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Blocking Firestore call in async function blocks event loop

_check_firestore is declared async but calls db.collection(...).document(...).get() — which uses the synchronous google.cloud.firestore.Client and performs a blocking network I/O call. Awaiting this inside asyncio.gather in health_services does not give you true concurrency: the synchronous .get() will hold the event loop for its entire duration, stalling all other concurrent async tasks (including the other health checks and any in-flight requests to the FastAPI app).

The fix is to offload the blocking call to a thread pool via asyncio.get_event_loop().run_in_executor:

async def _check_firestore() -> dict:
    """Check Firestore connectivity with a minimal read."""
    try:
        from database._client import db  # move to top of file per import rules
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(
            None,
            lambda: db.collection('_health_check').document('ping').get()
        )
        return {"status": "ok"}
    except Exception as e:
        return {"status": "down", "error": str(e)[:200]}

Alternatively, use the async Firestore client (google.cloud.firestore.AsyncClient).



async def _check_typesense() -> dict:
"""Check Typesense connectivity."""
try:
host = os.getenv('TYPESENSE_HOST', '')
port = os.getenv('TYPESENSE_HOST_PORT', '443')
api_key = os.getenv('TYPESENSE_API_KEY', '')
if not host or not api_key:
return {"status": "down", "error": "TYPESENSE config not set"}
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
r = await client.get(
f"https://{host}:{port}/health",
headers={"X-TYPESENSE-API-KEY": api_key},
)
if r.status_code == 200:
return {"status": "ok"}
else:
return {"status": "down", "error": f"HTTP {r.status_code}"}
except Exception as e:
return {"status": "down", "error": str(e)[:200]}
Comment on lines +90 to +108
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Typesense /health endpoint is public and doesn't require an API key

The Typesense /health endpoint (documented at https://typesense.org/docs/) does not require authentication — it is intentionally public. Sending the API key in the X-TYPESENSE-API-KEY header on this call is unnecessary (though harmless). More importantly, consider using /health without the key at all to avoid any accidental key exposure in logs or network traces.

Suggested change
async def _check_typesense() -> dict:
"""Check Typesense connectivity."""
try:
host = os.getenv('TYPESENSE_HOST', '')
port = os.getenv('TYPESENSE_HOST_PORT', '443')
api_key = os.getenv('TYPESENSE_API_KEY', '')
if not host or not api_key:
return {"status": "down", "error": "TYPESENSE config not set"}
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
r = await client.get(
f"https://{host}:{port}/health",
headers={"X-TYPESENSE-API-KEY": api_key},
)
if r.status_code == 200:
return {"status": "ok"}
else:
return {"status": "down", "error": f"HTTP {r.status_code}"}
except Exception as e:
return {"status": "down", "error": str(e)[:200]}
async with httpx.AsyncClient(timeout=TIMEOUT) as client:
r = await client.get(
f"https://{host}:{port}/health",
)



def _make_response(service: str, result: dict) -> JSONResponse:
body = {"service": service, **result}
status = 200 if result.get("status") == "ok" else 503
return JSONResponse(content=body, status_code=status, headers={"Cache-Control": "no-cache, no-store"})


@router.get("/v1/health/chat")
async def health_chat():
"""Check Anthropic (chat) API health."""
result = await _check_anthropic()
return _make_response("chat", result)


@router.get("/v1/health/transcription")
async def health_transcription():
"""Check Deepgram (transcription) API health."""
result = await _check_deepgram()
return _make_response("transcription", result)


@router.get("/v1/health/ai")
async def health_ai():
"""Check OpenAI (AI processing) API health."""
result = await _check_openai()
return _make_response("ai", result)


@router.get("/v1/health/storage")
async def health_storage():
"""Check Firestore (database) health."""
result = await _check_firestore()
return _make_response("storage", result)


@router.get("/v1/health/search")
async def health_search():
"""Check Typesense (search) health."""
result = await _check_typesense()
return _make_response("search", result)


@router.get("/v1/health/services")
async def health_services():
"""Aggregate health check for all services."""
start = time.time()
results = await asyncio.gather(
_check_anthropic(),
_check_deepgram(),
_check_openai(),
_check_firestore(),
_check_typesense(),
return_exceptions=True,
)

service_names = ["chat", "transcription", "ai", "storage", "search"]
services = {}
for name, result in zip(service_names, results):
if isinstance(result, Exception):
services[name] = {"status": "down", "error": str(result)[:200]}
else:
services[name] = result

up_count = sum(1 for s in services.values() if s.get("status") == "ok")
total = len(services)

if up_count == total:
overall = "ok"
status_code = 200
elif up_count == 0:
overall = "down"
status_code = 503
else:
overall = "degraded"
status_code = 200 # Still return 200 for degraded so the page shows partial

elapsed = round(time.time() - start, 2)
body = {"status": overall, "services": services, "response_time_s": elapsed}
return JSONResponse(content=body, status_code=status_code, headers={"Cache-Control": "no-cache, no-store"})
Loading