diff --git a/backend/sample.env b/backend/sample.env index 909f02e1f9..7b92796369 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -1,3 +1,15 @@ +# ----------------------------------------------------------------------------- +# WARNING: This file is NOT runnable by itself. +# It must be combined with docker/sample.env (copied as docker/.env), +# which provides required shared vars: DB_*, REDIS_*, CELERY_BROKER_*, +# PLATFORM_SERVICE_HOST/PORT, PROMPT_HOST/PORT, X2TEXT_HOST/PORT, +# UNSTRACT_RUNNER_HOST/PORT, MinIO storage credentials, and timeouts. +# Missing these will cause startup failures or misrouted traffic. +# +# For local dev outside Docker, copy shared vars here and replace +# container hostnames with localhost (see docker/sample.env header). +# ----------------------------------------------------------------------------- + DJANGO_SETTINGS_MODULE='backend.settings.dev' # NOTE: Change below to True if you are running in HTTPS mode. @@ -14,22 +26,9 @@ PATH_PREFIX="api/v1" DJANGO_APP_BACKEND_URL=http://frontend.unstract.localhost DJANGO_SECRET_KEY="1(xf&nc6!y7!l&!5xe&i_rx7e^m@fcut9fduv86ft=-b@2g6" -# Postgres DB envs -DB_HOST='unstract-db' -DB_USER='unstract_dev' -DB_PASSWORD='unstract_pass' -DB_NAME='unstract_db' -DB_PORT=5432 -DB_SCHEMA="unstract" - # Celery Backend Database (optional - defaults to DB_NAME if unset) # Example: # CELERY_BACKEND_DB_NAME=unstract_celery_db -# Redis -REDIS_HOST="unstract-redis" -REDIS_PORT=6379 -REDIS_PASSWORD="" -REDIS_USER=default # Redis Retry Configuration # Controls automatic retry behavior for transient Redis connection failures @@ -74,20 +73,6 @@ GOOGLE_STORAGE_ACCESS_KEY_ID= GOOGLE_STORAGE_SECRET_ACCESS_KEY= GOOGLE_STORAGE_BASE_URL=https://storage.googleapis.com -# Platform Service -PLATFORM_SERVICE_HOST=http://unstract-platform-service -PLATFORM_SERVICE_PORT=3001 - -# Tool Runner -UNSTRACT_RUNNER_HOST=http://unstract-runner -UNSTRACT_RUNNER_PORT=5002 -UNSTRACT_RUNNER_API_TIMEOUT=240 # (in seconds) 2 mins -UNSTRACT_RUNNER_API_RETRY_COUNT=5 # Number of retries for failed requests -UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3 # Exponential backoff factor for retries - -# Prompt Service -PROMPT_HOST=http://unstract-prompt-service -PROMPT_PORT=3003 #Prompt Studio PROMPT_STUDIO_FILE_PATH=/app/prompt-studio-data @@ -98,15 +83,6 @@ STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.97" STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure" STRUCTURE_TOOL_IMAGE_TAG="0.0.97" -# Feature Flags -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9000 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python - - -#X2Text Service -X2TEXT_HOST=http://unstract-x2text-service -X2TEXT_PORT=3004 # Encryption Key # Key must be 32 url-safe base64-encoded bytes. Check the README.md for details @@ -142,14 +118,6 @@ SYSTEM_ADMIN_EMAIL="admin@abc.com" # Set Django Session Expiry Time (in seconds) SESSION_COOKIE_AGE=86400 -# Control async extraction of LLMWhisperer -# Time in seconds to wait before polling LLMWhisperer's status API -ADAPTER_LLMW_POLL_INTERVAL=30 -# Total number of times to poll the status API. -# 500 mins to allow 1500 (max pages limit) * 20 (approx time in sec to process a page) -ADAPTER_LLMW_MAX_POLLS=1000 -# Number of times to retry the /whisper-status API before failing the extraction -ADAPTER_LLMW_STATUS_RETRIES=5 # Enable logging of workflow history. ENABLE_LOG_HISTORY=True @@ -157,14 +125,6 @@ ENABLE_LOG_HISTORY=True LOG_HISTORY_CONSUMER_INTERVAL=30 # Maximum number of logs to insert in a single batch. LOGS_BATCH_LIMIT=30 -# Logs Expiry of 24 hours -LOGS_EXPIRATION_TIME_IN_SECOND=86400 - -# Celery Configuration -# Used by celery and to connect to queue to push logs -CELERY_BROKER_BASE_URL="amqp://unstract-rabbitmq:5672//" -CELERY_BROKER_USER=admin -CELERY_BROKER_PASS=password # Indexing flag to prevent re-index INDEXING_FLAG_TTL=1800 @@ -176,25 +136,6 @@ NOTIFICATION_TIMEOUT=5 # with a YAML and JSONs TOOL_REGISTRY_CONFIG_PATH="/data/tool_registry_config" -# Flipt Service -FLIPT_SERVICE_AVAILABLE=False - -# File System Configuration for Workflow and API Execution - -# Directory Prefixes for storing execution files -WORKFLOW_EXECUTION_DIR_PREFIX="unstract/execution" -API_EXECUTION_DIR_PREFIX="unstract/api" - -# Storage Provider for Workflow Execution -# Valid options: MINIO, S3, etc.. -WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' - -# Storage Provider for API Execution -API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' - -#Remote storage related envs -PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data" # Storage Provider for Tool registry TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}' @@ -249,3 +190,7 @@ HITL_FILES_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endp # File active cache redis db FILE_ACTIVE_CACHE_REDIS_DB=0 + +# LLMWhisperer async extraction settings +ADAPTER_LLMW_POLL_INTERVAL=30 +ADAPTER_LLMW_MAX_POLLS=1000 diff --git a/docker/README.md b/docker/README.md index 4990ffe314..b935c7f900 100644 --- a/docker/README.md +++ b/docker/README.md @@ -15,7 +15,23 @@ VERSION=dev docker compose -f docker-compose.build.yaml --profile optional build ## Docker Run -**NOTE**: First copy `sample.*.env` files to `*.env` and update as required. +**NOTE**: Before running, set up your env files: + +```bash +# Run from the docker/ directory: + +# 1. Common env vars shared across all services and infrastructure +# (DB, Redis, RabbitMQ, Flipt, MinIO, service URLs, worker config) +cp sample.env .env + +# 2. Per-service env files (service-specific settings only) +cp ../backend/sample.env ../backend/.env +cp ../platform-service/sample.env ../platform-service/.env +cp ../prompt-service/sample.env ../prompt-service/.env +cp ../x2text-service/sample.env ../x2text-service/.env +cp ../runner/sample.env ../runner/.env +cp ../workers/sample.env ../workers/.env +``` ```bash # Up all services diff --git a/docker/docker-compose-dev-essentials.yaml b/docker/docker-compose-dev-essentials.yaml index d0ec7b4ba2..9638ae652a 100644 --- a/docker/docker-compose-dev-essentials.yaml +++ b/docker/docker-compose-dev-essentials.yaml @@ -11,7 +11,13 @@ services: - postgres_data:/var/lib/postgresql/data/ - ./scripts/db-setup/db_setup.sh:/docker-entrypoint-initdb.d/db_setup.sh env_file: - - ./essentials.env + - ./.env + environment: + # Map app DB_* vars to Postgres container's expected POSTGRES_* vars + POSTGRES_USER: ${DB_USER} + POSTGRES_PASSWORD: ${DB_PASSWORD} + POSTGRES_DB: ${DB_NAME} + POSTGRES_SCHEMA: ${DB_SCHEMA} labels: - traefik.enable=false @@ -39,7 +45,7 @@ services: volumes: - minio_data:/data env_file: - - ./essentials.env + - ./.env command: server /data --console-address ":9001" labels: - traefik.enable=true @@ -100,6 +106,14 @@ services: - "9005:9000" # gRPC port volumes: - flipt_data:/var/opt/flipt + # https://www.flipt.io/docs/configuration/overview#environment-variables) + # https://www.flipt.io/docs/configuration/overview#configuration-parameters + env_file: + - ./.env + environment: + FLIPT_CACHE_ENABLED: true + # Flipt DB connection derived from app DB_* vars + FLIPT_DB_URL: "postgres://${DB_USER}:${DB_PASSWORD}@db:5432/${DB_NAME}?sslmode=disable" labels: - traefik.enable=true - traefik.http.routers.feature-flag.rule=Host(`feature-flag.unstract.localhost`) @@ -128,7 +142,7 @@ services: labels: - traefik.enable=false env_file: - - ./essentials.env + - ./.env rabbitmq: image: rabbitmq:4.1.0-management @@ -136,7 +150,11 @@ services: hostname: unstract-rabbit restart: unless-stopped env_file: - - ./essentials.env + - ./.env + environment: + # Map app CELERY_BROKER_* vars to RabbitMQ container's expected vars + RABBITMQ_DEFAULT_USER: ${CELERY_BROKER_USER} + RABBITMQ_DEFAULT_PASS: ${CELERY_BROKER_PASS} ports: - "5672:5672" # AMQP port - "15672:15672" # Management UI port diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 6f1996818a..92b0eb2134 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -12,6 +12,7 @@ services: ports: - "8000:8000" env_file: + - ./.env - ../backend/.env depends_on: - db @@ -26,7 +27,7 @@ services: volumes: - prompt_studio_data:/app/prompt-studio-data - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config environment: - ENVIRONMENT=development - APPLICATION_NAME=unstract-backend @@ -46,6 +47,7 @@ services: entrypoint: .venv/bin/celery command: "-A backend worker --loglevel=info -Q dashboard_metric_events --autoscale=${WORKER_METRICS_AUTOSCALE:-4,1}" env_file: + - ./.env - ../backend/.env depends_on: - rabbitmq @@ -65,6 +67,7 @@ services: entrypoint: .venv/bin/celery command: "-A backend flower --port=5555 --purge_offline_workers=5" env_file: + - ./.env - ../backend/.env depends_on: - rabbitmq @@ -88,8 +91,8 @@ services: entrypoint: .venv/bin/celery command: "-A backend beat --scheduler django_celery_beat.schedulers:DatabaseScheduler -l INFO" env_file: + - ./.env - ../backend/.env - - ./essentials.env depends_on: - db - rabbitmq @@ -121,6 +124,7 @@ services: ports: - "3001:3001" env_file: + - ./.env - ../platform-service/.env depends_on: - redis @@ -140,6 +144,7 @@ services: ports: - "3003:3003" env_file: + - ./.env - ../prompt-service/.env labels: - traefik.enable=false @@ -154,6 +159,7 @@ services: ports: - "3004:3004" env_file: + - ./.env - ../x2text-service/.env depends_on: - db @@ -167,6 +173,7 @@ services: ports: - 5002:5002 env_file: + - ./.env - ../runner/.env volumes: - ./workflow_data:/data @@ -190,8 +197,8 @@ services: ports: - "8085:8090" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -212,7 +219,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-callback-v2: image: unstract/worker-unified:${VERSION} @@ -222,8 +229,8 @@ services: ports: - "8086:8083" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -238,7 +245,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-file-processing-v2: image: unstract/worker-unified:${VERSION} @@ -263,8 +270,8 @@ services: ports: - "8087:8082" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -290,7 +297,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-general-v2: image: unstract/worker-unified:${VERSION} @@ -300,8 +307,8 @@ services: ports: - "8088:8082" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -317,7 +324,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-notification-v2: image: unstract/worker-unified:${VERSION} @@ -327,8 +334,8 @@ services: ports: - "8089:8085" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -365,7 +372,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-log-consumer-v2: image: unstract/worker-unified:${VERSION} @@ -375,8 +382,8 @@ services: ports: - "8090:8084" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -414,7 +421,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config worker-log-history-scheduler-v2: image: unstract/worker-unified:${VERSION} @@ -423,8 +430,8 @@ services: entrypoint: ["/bin/bash"] command: ["/app/log_consumer/scheduler.sh"] env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -447,8 +454,8 @@ services: ports: - "8091:8087" env_file: + - ./.env - ../workers/.env - - ./essentials.env depends_on: - db - redis @@ -481,7 +488,7 @@ services: - traefik.enable=false volumes: - ./workflow_data:/data - - ${TOOL_REGISTRY_CONFIG_SRC_PATH}:/data/tool_registry_config + - ${TOOL_REGISTRY_CONFIG_SRC_PATH:-../unstract/tool-registry/tool_registry_config}:/data/tool_registry_config volumes: prompt_studio_data: diff --git a/docker/sample.env b/docker/sample.env index 2feb36cf2b..da37bc01d7 100644 --- a/docker/sample.env +++ b/docker/sample.env @@ -1,3 +1,121 @@ +# ============================================================================= +# Unstract Docker Environment Configuration +# Copy this file to .env and update the values for your environment. +# +# This file is used by both: +# - Infrastructure containers (Postgres, MinIO, RabbitMQ, Flipt) +# - Application services (backend, workers, platform-service, etc.) +# - Docker Compose variable substitution (worker scaling, celery config) +# +# For local development outside Docker, replace container hostnames: +# unstract-db -> localhost +# unstract-redis -> localhost +# unstract-rabbitmq -> localhost (CELERY_BROKER_BASE_URL=amqp://localhost:5672//) +# unstract-flipt -> localhost (EVALUATION_SERVER_PORT=9005 for host-mapped port) +# unstract-minio -> localhost (in storage credential JSON blocks) +# unstract-platform-service -> localhost +# unstract-prompt-service -> localhost +# unstract-x2text-service -> localhost +# unstract-runner -> localhost +# ============================================================================= + +# ============================================================================= +# PostgreSQL +# These DB_* vars are used by app services AND automatically mapped to +# POSTGRES_* for the Postgres container via docker-compose environment blocks. +# You only need to set them once here. +# +# DEPRECATED ALIASES (kept for rolling-deploy compatibility, remove after full rollout): +# PG_BE_HOST, PG_BE_PORT, PG_BE_USERNAME, PG_BE_PASSWORD, PG_BE_DATABASE +# DB_USERNAME (x2text-service legacy) +# New code falls back to these if DB_* is not set. +# ============================================================================= +DB_HOST=unstract-db +DB_PORT=5432 +DB_USER=unstract_dev +DB_PASSWORD=unstract_pass +DB_NAME=unstract_db +DB_SCHEMA=unstract + +# ============================================================================= +# Redis +# ============================================================================= +REDIS_HOST=unstract-redis +REDIS_PORT=6379 +REDIS_USER=default +REDIS_PASSWORD= + +# ============================================================================= +# RabbitMQ / Celery Broker +# These CELERY_BROKER_* vars are used by app services AND automatically mapped +# to RABBITMQ_* for the RabbitMQ container via docker-compose environment blocks. +# You only need to set them once here. +# ============================================================================= +CELERY_BROKER_BASE_URL=amqp://unstract-rabbitmq:5672// +CELERY_BROKER_USER=admin +CELERY_BROKER_PASS=password + +# ============================================================================= +# MinIO (Object Storage) +# MINIO_ROOT_USER/PASSWORD are used by the MinIO container on init. +# IMPORTANT: If you change these, update ALL FOUR JSON blocks below too. +# The "key" and "secret" values must match MINIO_ROOT_USER and MINIO_ROOT_PASSWORD. +# ============================================================================= +MINIO_ROOT_USER=minio +MINIO_ROOT_PASSWORD=minio123 + +WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +TEMPORARY_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' +REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data" + +# File execution directory prefixes +WORKFLOW_EXECUTION_DIR_PREFIX="unstract/execution" +API_EXECUTION_DIR_PREFIX="unstract/api" + +# ============================================================================= +# Flipt (Feature Flags) +# ============================================================================= +FLIPT_SERVICE_AVAILABLE=False +EVALUATION_SERVER_IP=unstract-flipt +EVALUATION_SERVER_PORT=9000 +PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +# ============================================================================= +# Inter-Service Communication +# ============================================================================= + +# Platform Service +PLATFORM_SERVICE_HOST=http://unstract-platform-service +PLATFORM_SERVICE_PORT=3001 + +# Prompt Service +PROMPT_HOST=http://unstract-prompt-service +PROMPT_PORT=3003 + +# X2Text Service +X2TEXT_HOST=http://unstract-x2text-service +X2TEXT_PORT=3004 + +# Tool Runner +UNSTRACT_RUNNER_HOST=http://unstract-runner +UNSTRACT_RUNNER_PORT=5002 +UNSTRACT_RUNNER_API_TIMEOUT=240 +UNSTRACT_RUNNER_API_RETRY_COUNT=5 +UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3 + +# ============================================================================= +# Shared Timeouts & Settings +# ============================================================================= + +# Logs expiry (24 hours) +LOGS_EXPIRATION_TIME_IN_SECOND=86400 + +# ============================================================================= +# Docker Compose / Worker Configuration +# ============================================================================= + # Path where public and private tools are registered # with a YAML and JSONs TOOL_REGISTRY_CONFIG_SRC_PATH="${PWD}/../unstract/tool-registry/tool_registry_config" diff --git a/docker/sample.essentials.env b/docker/sample.essentials.env deleted file mode 100644 index 51876fb8f9..0000000000 --- a/docker/sample.essentials.env +++ /dev/null @@ -1,19 +0,0 @@ -# Refer https://hub.docker.com/_/postgres#:~:text=How%20to%20extend%20this%20image -POSTGRES_USER=unstract_dev -POSTGRES_PASSWORD=unstract_pass -POSTGRES_DB=unstract_db -# Used by db setup script -POSTGRES_SCHEMA=unstract - -MINIO_ROOT_USER=minio -MINIO_ROOT_PASSWORD=minio123 -MINIO_ACCESS_KEY=minio -MINIO_SECRET_KEY=minio123 - -QDRANT_USER=unstract_vector_dev -QDRANT_PASS=unstract_vector_pass -QDRANT_DB=unstract_vector_db - -# RabbitMQ related envs -RABBITMQ_DEFAULT_USER=admin -RABBITMQ_DEFAULT_PASS=password diff --git a/docker/scripts/db-setup/README.md b/docker/scripts/db-setup/README.md index e24b9c875b..6415e9949e 100644 --- a/docker/scripts/db-setup/README.md +++ b/docker/scripts/db-setup/README.md @@ -1,9 +1,10 @@ # Unstract DB Setup Script -[The db_setup.sh](/docker/scripts/db-setup/db_setup.sh) script helps setup the postgres database by making use of environment variables defined in the `.essentials.env` (user copy of the [sample.essentials.env](/docker/sample.essentials.env)) +[The db_setup.sh](/docker/scripts/db-setup/db_setup.sh) script helps set up the postgres database by making use of environment variables derived from the `.env` (user copy of [sample.env](/docker/sample.env)). The Postgres container receives these via docker-compose environment mappings: -- POSTGRES_USER -- POSTGRES_DB -- POSTGRES_SCHEMA +- POSTGRES_USER (mapped from DB_USER) +- POSTGRES_PASSWORD (mapped from DB_PASSWORD) +- POSTGRES_DB (mapped from DB_NAME) +- POSTGRES_SCHEMA (mapped from DB_SCHEMA) -This script helps setup the DB user and creates a new schema as well. +This script helps set up the DB user and creates a new schema as well. diff --git a/platform-service/sample.env b/platform-service/sample.env index 54eec703c3..473b157643 100644 --- a/platform-service/sample.env +++ b/platform-service/sample.env @@ -1,33 +1,18 @@ +# ----------------------------------------------------------------------------- +# LOCAL DEVELOPMENT NOTE: +# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) +# live in docker/sample.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see docker/sample.env header). +# ----------------------------------------------------------------------------- + # Flask FLASK_APP=src/unstract/platform_service/run.py FLASK_RUN_PORT=3001 -# Redis -REDIS_HOST=unstract-redis -REDIS_PORT=6379 -REDIS_USERNAME=default -REDIS_PASSWORD= - -# Backend DB -PG_BE_HOST=unstract-db -PG_BE_PORT=5432 -PG_BE_USERNAME=unstract_dev -PG_BE_PASSWORD=unstract_pass -PG_BE_DATABASE=unstract_db -DB_SCHEMA="unstract" - - # Encryption Key # key must be 32 url-safe base64-encoded bytes. ENCRYPTION_KEY="Sample-Key" -# Feature Flags -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9000 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python -# Flipt Service -FLIPT_SERVICE_AVAILABLE=False - # Cost calculation related ENVs MODEL_PRICES_URL="https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" MODEL_PRICES_TTL_IN_DAYS=7 @@ -35,6 +20,5 @@ MODEL_PRICES_FILE_PATH="/cost/model_prices.json" #Remote storage config FILE_STORAGE_CREDENTIALS='{"provider":"local"}' -REMOTE_MODEL_PRICES_FILE_PATH="unstract/cost/model_prices.json" LOG_LEVEL=INFO diff --git a/platform-service/src/unstract/platform_service/config.py b/platform-service/src/unstract/platform_service/config.py index 08fff41307..40df343a47 100644 --- a/platform-service/src/unstract/platform_service/config.py +++ b/platform-service/src/unstract/platform_service/config.py @@ -38,11 +38,11 @@ def create_app() -> Flask: # Initialize and connect to the database db.init( - database=Env.PG_BE_DATABASE, - user=Env.PG_BE_USERNAME, - password=Env.PG_BE_PASSWORD, - host=Env.PG_BE_HOST, - port=Env.PG_BE_PORT, + database=Env.DB_NAME, + user=Env.DB_USER, + password=Env.DB_PASSWORD, + host=Env.DB_HOST, + port=Env.DB_PORT, options=f"-c application_name={Env.APPLICATION_NAME}", ) diff --git a/platform-service/src/unstract/platform_service/env.py b/platform-service/src/unstract/platform_service/env.py index 2bcf2da382..2d37fa90be 100644 --- a/platform-service/src/unstract/platform_service/env.py +++ b/platform-service/src/unstract/platform_service/env.py @@ -10,13 +10,19 @@ class Env: BAD_REQUEST = "Bad Request" REDIS_HOST = EnvManager.get_required_setting("REDIS_HOST") REDIS_PORT = int(EnvManager.get_required_setting("REDIS_PORT", 6379)) - REDIS_USERNAME = os.environ.get("REDIS_USERNAME") + # REDIS_USER/PASSWORD are optional (local Redis often has no auth) + REDIS_USER = os.environ.get("REDIS_USER") or os.environ.get("REDIS_USERNAME") REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") - PG_BE_HOST = os.environ.get("PG_BE_HOST") - PG_BE_PORT = int(os.environ.get("PG_BE_PORT", 5432)) - PG_BE_USERNAME = os.environ.get("PG_BE_USERNAME") - PG_BE_PASSWORD = os.environ.get("PG_BE_PASSWORD") - PG_BE_DATABASE = os.environ.get("PG_BE_DATABASE") + # DB vars: new names with fallback to legacy PG_BE_* names for rolling deploys + DB_HOST = EnvManager.get_required_setting("DB_HOST", os.environ.get("PG_BE_HOST")) + DB_PORT = int( + EnvManager.get_required_setting("DB_PORT", os.environ.get("PG_BE_PORT", "5432")) + ) + DB_USER = EnvManager.get_required_setting("DB_USER", os.environ.get("PG_BE_USERNAME")) + DB_PASSWORD = EnvManager.get_required_setting( + "DB_PASSWORD", os.environ.get("PG_BE_PASSWORD") + ) + DB_NAME = EnvManager.get_required_setting("DB_NAME", os.environ.get("PG_BE_DATABASE")) ENCRYPTION_KEY = EnvManager.get_required_setting("ENCRYPTION_KEY") MODEL_PRICES_URL = EnvManager.get_required_setting("MODEL_PRICES_URL") MODEL_PRICES_TTL_IN_DAYS = int( diff --git a/platform-service/src/unstract/platform_service/extensions.py b/platform-service/src/unstract/platform_service/extensions.py index d847405791..0798e6b15f 100644 --- a/platform-service/src/unstract/platform_service/extensions.py +++ b/platform-service/src/unstract/platform_service/extensions.py @@ -25,7 +25,7 @@ def get_redis_pool() -> redis.ConnectionPool: _redis_pool = redis.ConnectionPool( host=Env.REDIS_HOST, port=Env.REDIS_PORT, - username=Env.REDIS_USERNAME, + username=Env.REDIS_USER, password=Env.REDIS_PASSWORD, max_connections=10, decode_responses=False, diff --git a/prompt-service/sample.env b/prompt-service/sample.env index e26e6cbcd2..9f804c127f 100644 --- a/prompt-service/sample.env +++ b/prompt-service/sample.env @@ -1,65 +1,16 @@ -# Backend DB -PG_BE_HOST=unstract-db -PG_BE_PORT=5432 -PG_BE_USERNAME=unstract_dev -PG_BE_PASSWORD=unstract_pass -PG_BE_DATABASE=unstract_db -DB_SCHEMA="unstract" - -# Redis -REDIS_HOST="unstract-redis" -REDIS_PORT=6379 -REDIS_PASSWORD="" -REDIS_USER=default +# ----------------------------------------------------------------------------- +# LOCAL DEVELOPMENT NOTE: +# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) +# live in docker/sample.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see docker/sample.env header). +# ----------------------------------------------------------------------------- # Logging LOG_LEVEL=INFO - -### Env from `unstract-core` ### -# Celery for PublishLogs -CELERY_BROKER_BASE_URL="amqp://unstract-rabbitmq:5672//" -CELERY_BROKER_USER=admin -CELERY_BROKER_PASS=password -# Logs Expiry of 24 hours -LOGS_EXPIRATION_TIME_IN_SECOND=86400 - - -### Env from `unstract-flags` ### -# Feature Flags -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9000 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python -# Flipt Service -FLIPT_SERVICE_AVAILABLE=False - - -### Env from `unstract-sdk` ### -# Platform Service -PLATFORM_SERVICE_HOST=http://unstract-platform-service -PLATFORM_SERVICE_PORT=3001 - -# X2Text Service -X2TEXT_HOST=http://unstract-x2text-service -X2TEXT_PORT=3004 - -# Remote storage related envs -PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -TEMPORARY_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data/" - # Timeout for LLMW (v2) extraction ADAPTER_LLMW_WAIT_TIMEOUT=900 # 15 mins -# Control async extraction of LLMWhisperer (v1) -# Time in seconds to wait before polling LLMWhisperer's status API -ADAPTER_LLMW_POLL_INTERVAL=30 -# Total number of times to poll the status API. -# 500 mins to allow 1500 (max pages limit) * 20 (approx time in sec to process a page) -ADAPTER_LLMW_MAX_POLLS=1000 -# Number of times to retry the /whisper-status API before failing the extraction -ADAPTER_LLMW_STATUS_RETRIES=5 - ### Env for Rentroll Service ### # Rentroll Service RENTROLL_SERVICE_HOST=http://unstract-rentroll-service diff --git a/prompt-service/src/unstract/prompt_service/extensions.py b/prompt-service/src/unstract/prompt_service/extensions.py index 1626591dc0..28019ac890 100644 --- a/prompt-service/src/unstract/prompt_service/extensions.py +++ b/prompt-service/src/unstract/prompt_service/extensions.py @@ -8,11 +8,12 @@ from unstract.prompt_service.utils.env_loader import get_env_or_die # Load required environment variables -db_host = get_env_or_die("PG_BE_HOST") -db_port = get_env_or_die("PG_BE_PORT") -db_user = get_env_or_die("PG_BE_USERNAME") -db_pass = get_env_or_die("PG_BE_PASSWORD") -db_name = get_env_or_die("PG_BE_DATABASE") +# New names with fallback to legacy PG_BE_* names for rolling deploys +db_host = get_env_or_die("DB_HOST", env.get("PG_BE_HOST")) +db_port = get_env_or_die("DB_PORT", env.get("PG_BE_PORT")) +db_user = get_env_or_die("DB_USER", env.get("PG_BE_USERNAME")) +db_pass = get_env_or_die("DB_PASSWORD", env.get("PG_BE_PASSWORD")) +db_name = get_env_or_die("DB_NAME", env.get("PG_BE_DATABASE")) application_name = env.get("APPLICATION_NAME", "unstract-prompt-service") # Initialize and connect to the database diff --git a/run-platform.sh b/run-platform.sh index bcacfa82e8..40dfae7064 100755 --- a/run-platform.sh +++ b/run-platform.sh @@ -175,7 +175,7 @@ copy_or_merge_envs() { if [ ! -e "$dest_file" ]; then cp "$src_file" "$dest_file" echo -e "Created env for ""$blue_text""$displayed_reason""$default_text"" at ""$blue_text""$dest_file""$default_text""." - elif [ "$opt_only_env" = true ] || [ "$opt_update" = true ]; then + else python3 "$script_dir/docker/scripts/merge_env.py" "$src_file" "$dest_file" if [ $? -ne 0 ]; then exit 1 @@ -238,8 +238,7 @@ setup_env() { fi done - copy_or_merge_envs "$script_dir/docker/sample.essentials.env" "$script_dir/docker/essentials.env" "essential services" - copy_or_merge_envs "$script_dir/docker/sample.env" "$script_dir/docker/.env" "docker compose" + copy_or_merge_envs "$script_dir/docker/sample.env" "$script_dir/docker/.env" "docker services" if [ "$opt_only_env" = true ]; then diff --git a/runner/sample.env b/runner/sample.env index 723bc89612..bcd892192c 100644 --- a/runner/sample.env +++ b/runner/sample.env @@ -1,7 +1,9 @@ -# To pass to tool-sidecar for Kombu's connection -CELERY_BROKER_BASE_URL="amqp://unstract-rabbitmq:5672//" -CELERY_BROKER_USER=admin -CELERY_BROKER_PASS=password +# ----------------------------------------------------------------------------- +# LOCAL DEVELOPMENT NOTE: +# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) +# live in docker/sample.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see docker/sample.env header). +# ----------------------------------------------------------------------------- TOOL_CONTAINER_NETWORK="unstract-network" TOOL_CONTAINER_LABELS="[]" @@ -20,27 +22,6 @@ REMOVE_CONTAINER_ON_EXIT=True # Client module path of the container engine to be used. CONTAINER_CLIENT_PATH=unstract.runner.clients.docker_client -# Logs Expiry of 24 hours -LOGS_EXPIRATION_TIME_IN_SECOND=86400 - -# Feature Flags -FLIPT_SERVICE_AVAILABLE=False -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9005 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python - -# File System Configuration for Workflow and API Execution -# Directory Prefixes for storing execution files -WORKFLOW_EXECUTION_DIR_PREFIX="unstract/execution" -# Storage Provider for Workflow Execution -# Valid options: MINIO, S3, etc.. -WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' - -# For unified notification -REDIS_HOST=unstract-redis -REDIS_PORT=6379 -REDIS_USER=default -REDIS_PASSWORD= # Flask related envs # Can be 'production' or 'development' diff --git a/workers/sample.env b/workers/sample.env index 516fc242e1..74cda9e92b 100644 --- a/workers/sample.env +++ b/workers/sample.env @@ -1,7 +1,15 @@ # ============================================================================= # Unstract Workers Environment Configuration # ============================================================================= -# Copy this file to .env and update the values for your environment +# Copy this file to .env and update the values for your environment. +# +# WARNING: This file is NOT runnable by itself. +# It must be combined with docker/sample.env (copied as docker/.env), +# which provides required shared vars: DB_*, REDIS_*, CELERY_BROKER_*, +# and other infrastructure settings. Missing these will cause startup failures. +# +# For local dev outside Docker, copy shared vars here and replace +# container hostnames with localhost (see docker/sample.env header). # ============================================================================= # Core Configuration (REQUIRED) @@ -43,36 +51,15 @@ INTERNAL_API_ORGANIZATION_PREFIX=v1/organization/ # Celery Configuration # ============================================================================= -# Celery Broker (RabbitMQ) - REQUIRED -# These credentials must match your RabbitMQ configuration -CELERY_BROKER_BASE_URL=amqp://unstract-rabbitmq:5672// -CELERY_BROKER_USER=admin -CELERY_BROKER_PASS=password - -# ============================================================================= -# Database Configuration (REQUIRED) -# ============================================================================= - -# PostgreSQL (for Celery result backend) - REQUIRED -# These credentials must match your PostgreSQL configuration -DB_HOST=unstract-db -DB_USER=unstract_dev -DB_PASSWORD=unstract_pass -DB_NAME=unstract_db -DB_PORT=5432 -DB_SCHEMA=unstract - # Celery Backend Database Schema CELERY_BACKEND_DB_SCHEMA=public -# Redis (for caching and queues) - REQUIRED -REDIS_HOST=unstract-redis -REDIS_PORT=6379 -REDIS_PASSWORD= -REDIS_USER=default +# Redis DB index (general Redis settings come from docker/sample.env) REDIS_DB=0 -# Cache-Specific Redis Configuration +# Cache-Specific Redis Configuration (worker-specific overrides) +# These override the base REDIS_* vars from docker/sample.env for worker cache operations. +# For Docker: use unstract-redis. For local dev: use localhost. CACHE_REDIS_ENABLED=true CACHE_REDIS_HOST=unstract-redis CACHE_REDIS_PORT=6379 @@ -82,9 +69,6 @@ CACHE_REDIS_USERNAME= CACHE_REDIS_SSL=false CACHE_REDIS_SSL_CERT_REQS=required -# Database URL (for fallback usage) -DATABASE_URL=postgresql://unstract_dev:unstract_pass@unstract-db:5432/unstract_db - # ============================================================================= # Worker Infrastructure Settings # ============================================================================= @@ -208,7 +192,6 @@ WORKER_INSTANCE_ID=dev-01 ENABLE_LOG_HISTORY=true LOG_HISTORY_CONSUMER_INTERVAL=30 LOGS_BATCH_LIMIT=30 -LOGS_EXPIRATION_TIME_IN_SECOND=86400 LOG_HISTORY_QUEUE_NAME=log_history_queue # Log Queue Size Protection @@ -226,36 +209,11 @@ NOTIFICATION_QUEUE_NAME=notifications # Backend Services # ============================================================================= -# Platform Service -PLATFORM_SERVICE_HOST=http://unstract-platform-service -PLATFORM_SERVICE_PORT=3001 - -# Prompt Service -PROMPT_HOST=http://unstract-prompt-service -PROMPT_PORT=3003 - -# X2Text Service -X2TEXT_HOST=http://unstract-x2text-service -X2TEXT_PORT=3004 - -# Tool Runner -UNSTRACT_RUNNER_HOST=http://unstract-runner -UNSTRACT_RUNNER_PORT=5002 -UNSTRACT_RUNNER_API_TIMEOUT=300 -UNSTRACT_RUNNER_API_RETRY_COUNT=5 -UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3 # ============================================================================= # File Storage Configuration # ============================================================================= -# File Storage Credentials (MinIO) -WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' -API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}' - -# File Execution Configuration -WORKFLOW_EXECUTION_DIR_PREFIX=unstract/execution -API_EXECUTION_DIR_PREFIX=unstract/api MAX_PARALLEL_FILE_BATCHES=1 # File Execution TTL Configuration @@ -360,25 +318,14 @@ GOOGLE_OAUTH2_SECRET= +# LLMWhisperer async extraction settings +ADAPTER_LLMW_POLL_INTERVAL=30 +ADAPTER_LLMW_MAX_POLLS=1000 + # ============================================================================= # Local Development Overrides # ============================================================================= -# For local development (all services on host), change Docker service names to localhost: +# For local development (all services on host), override vars from docker/sample.env: # DJANGO_APP_BACKEND_URL=http://localhost:8000 # INTERNAL_API_BASE_URL=http://localhost:8000/internal -# CELERY_BROKER_BASE_URL=amqp://localhost:5672// -# DB_HOST=localhost -# REDIS_HOST=localhost # CACHE_REDIS_HOST=localhost -# PLATFORM_SERVICE_HOST=http://localhost -# PROMPT_HOST=http://localhost -# X2TEXT_HOST=http://localhost -# UNSTRACT_RUNNER_HOST=http://localhost -# WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS={"provider": "minio", "credentials": {"endpoint_url": "http://localhost:9000", "key": "minio", "secret": "minio123"}} -# API_FILE_STORAGE_CREDENTIALS={"provider": "minio", "credentials": {"endpoint_url": "http://localhost:9000", "key": "minio", "secret": "minio123"}} - -# Flipt Service -FLIPT_SERVICE_AVAILABLE=False -EVALUATION_SERVER_IP=unstract-flipt -EVALUATION_SERVER_PORT=9005 -PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python diff --git a/workers/shared/infrastructure/config/worker_config.py b/workers/shared/infrastructure/config/worker_config.py index eb1c0b8e79..4a2cab2f41 100644 --- a/workers/shared/infrastructure/config/worker_config.py +++ b/workers/shared/infrastructure/config/worker_config.py @@ -372,7 +372,8 @@ def __post_init__(self): f"Worker configuration validation failed (worker will continue with defaults): {e}" ) logging.info( - "To fix this, ensure all required environment variables are set. See workers/sample.env" + "To fix this, ensure all required environment variables are set. " + "See docker/sample.env for shared vars and workers/sample.env for worker-specific vars" ) def _build_cache_redis_url(self): @@ -427,7 +428,7 @@ def validate(self): "CELERY_BROKER_URL could not be built. Please set the following environment variables: " "CELERY_BROKER_BASE_URL (e.g., 'amqp://unstract-rabbitmq:5672//'), " "CELERY_BROKER_USER, and CELERY_BROKER_PASS. " - "See workers/sample.env for examples." + "See docker/sample.env for these shared vars." ) if not self.celery_result_backend: @@ -435,7 +436,7 @@ def validate(self): "CELERY_RESULT_BACKEND could not be built. Please set the following environment variables: " "DB_HOST, DB_USER, DB_PASSWORD, DB_NAME, and DB_PORT. " "These are required for Celery to store task results. " - "See workers/sample.env for examples." + "See docker/sample.env for these shared vars." ) # Cache Redis validation diff --git a/x2text-service/app/env.py b/x2text-service/app/env.py index d2c213fbb7..c379217e98 100644 --- a/x2text-service/app/env.py +++ b/x2text-service/app/env.py @@ -48,7 +48,8 @@ class Env: DB_SCHEMA = EnvManager.get_required_setting("DB_SCHEMA") DB_HOST = EnvManager.get_required_setting("DB_HOST") DB_PORT = int(EnvManager.get_required_setting("DB_PORT", 5432)) - DB_USERNAME = EnvManager.get_required_setting("DB_USERNAME") + # New name with fallback to legacy DB_USERNAME for rolling deploys + DB_USER = EnvManager.get_required_setting("DB_USER", os.environ.get("DB_USERNAME")) DB_PASSWORD = EnvManager.get_required_setting("DB_PASSWORD") DB_NAME = EnvManager.get_required_setting("DB_NAME") diff --git a/x2text-service/app/models.py b/x2text-service/app/models.py index b3fb102544..5528e7f7f8 100644 --- a/x2text-service/app/models.py +++ b/x2text-service/app/models.py @@ -7,7 +7,7 @@ be_db = peewee.PostgresqlDatabase( Env.DB_NAME, - user=Env.DB_USERNAME, + user=Env.DB_USER, password=Env.DB_PASSWORD, host=Env.DB_HOST, port=Env.DB_PORT, diff --git a/x2text-service/sample.env b/x2text-service/sample.env index df25e0b09b..8550ec652f 100644 --- a/x2text-service/sample.env +++ b/x2text-service/sample.env @@ -1,13 +1,11 @@ +# ----------------------------------------------------------------------------- +# LOCAL DEVELOPMENT NOTE: +# Shared env vars (DB, Redis, RabbitMQ, Flipt, service URLs, MinIO, timeouts) +# live in docker/sample.env. When running outside Docker, copy those vars +# here and replace container hostnames with localhost (see docker/sample.env header). +# ----------------------------------------------------------------------------- + FLASK_ENV=production FLASK_RUN_HOST=0.0.0.0 FLASK_RUN_PORT=3004 API_URL_PREFIX=/api/v1 - - -# Postgres -DB_HOST=unstract-db -DB_PORT=5432 -DB_USERNAME=unstract_dev -DB_PASSWORD=unstract_pass -DB_NAME=unstract_db -DB_SCHEMA="unstract"