Skip to content
Open
87 changes: 16 additions & 71 deletions backend/sample.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# -----------------------------------------------------------------------------
# WARNING: This file is NOT runnable by itself.
# It must be combined with docker/sample.env (copied as docker/.env),
# which provides required shared vars: DB_*, REDIS_*, CELERY_BROKER_*,
# PLATFORM_SERVICE_HOST/PORT, PROMPT_HOST/PORT, X2TEXT_HOST/PORT,
# UNSTRACT_RUNNER_HOST/PORT, MinIO storage credentials, and timeouts.
# Missing these will cause startup failures or misrouted traffic.
#
# For local dev outside Docker, copy shared vars here and replace
# container hostnames with localhost (see docker/sample.env header).
# -----------------------------------------------------------------------------

DJANGO_SETTINGS_MODULE='backend.settings.dev'

# NOTE: Change below to True if you are running in HTTPS mode.
Expand All @@ -14,22 +26,9 @@ PATH_PREFIX="api/v1"
DJANGO_APP_BACKEND_URL=http://frontend.unstract.localhost
DJANGO_SECRET_KEY="1(xf&nc6!y7!l&!5xe&i_rx7e^m@fcut9fduv86ft=-b@2g6"

# Postgres DB envs
DB_HOST='unstract-db'
DB_USER='unstract_dev'
DB_PASSWORD='unstract_pass'
DB_NAME='unstract_db'
DB_PORT=5432
DB_SCHEMA="unstract"

# Celery Backend Database (optional - defaults to DB_NAME if unset)
# Example:
# CELERY_BACKEND_DB_NAME=unstract_celery_db
# Redis
REDIS_HOST="unstract-redis"
REDIS_PORT=6379
REDIS_PASSWORD=""
REDIS_USER=default

# Redis Retry Configuration
# Controls automatic retry behavior for transient Redis connection failures
Expand Down Expand Up @@ -74,20 +73,6 @@ GOOGLE_STORAGE_ACCESS_KEY_ID=
GOOGLE_STORAGE_SECRET_ACCESS_KEY=
GOOGLE_STORAGE_BASE_URL=https://storage.googleapis.com

# Platform Service
PLATFORM_SERVICE_HOST=http://unstract-platform-service
PLATFORM_SERVICE_PORT=3001

# Tool Runner
UNSTRACT_RUNNER_HOST=http://unstract-runner
UNSTRACT_RUNNER_PORT=5002
UNSTRACT_RUNNER_API_TIMEOUT=240 # (in seconds) 2 mins
UNSTRACT_RUNNER_API_RETRY_COUNT=5 # Number of retries for failed requests
UNSTRACT_RUNNER_API_BACKOFF_FACTOR=3 # Exponential backoff factor for retries

# Prompt Service
PROMPT_HOST=http://unstract-prompt-service
PROMPT_PORT=3003

#Prompt Studio
PROMPT_STUDIO_FILE_PATH=/app/prompt-studio-data
Expand All @@ -98,15 +83,6 @@ STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.97"
STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure"
STRUCTURE_TOOL_IMAGE_TAG="0.0.97"

# Feature Flags
EVALUATION_SERVER_IP=unstract-flipt
EVALUATION_SERVER_PORT=9000
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python


#X2Text Service
X2TEXT_HOST=http://unstract-x2text-service
X2TEXT_PORT=3004

# Encryption Key
# Key must be 32 url-safe base64-encoded bytes. Check the README.md for details
Expand Down Expand Up @@ -142,29 +118,13 @@ SYSTEM_ADMIN_EMAIL="admin@abc.com"
# Set Django Session Expiry Time (in seconds)
SESSION_COOKIE_AGE=86400

# Control async extraction of LLMWhisperer
# Time in seconds to wait before polling LLMWhisperer's status API
ADAPTER_LLMW_POLL_INTERVAL=30
# Total number of times to poll the status API.
# 500 mins to allow 1500 (max pages limit) * 20 (approx time in sec to process a page)
ADAPTER_LLMW_MAX_POLLS=1000
# Number of times to retry the /whisper-status API before failing the extraction
ADAPTER_LLMW_STATUS_RETRIES=5

# Enable logging of workflow history.
ENABLE_LOG_HISTORY=True
# Interval in seconds for periodic consumer operations.
LOG_HISTORY_CONSUMER_INTERVAL=30
# Maximum number of logs to insert in a single batch.
LOGS_BATCH_LIMIT=30
# Logs Expiry of 24 hours
LOGS_EXPIRATION_TIME_IN_SECOND=86400

# Celery Configuration
# Used by celery and to connect to queue to push logs
CELERY_BROKER_BASE_URL="amqp://unstract-rabbitmq:5672//"
CELERY_BROKER_USER=admin
CELERY_BROKER_PASS=password

# Indexing flag to prevent re-index
INDEXING_FLAG_TTL=1800
Expand All @@ -176,25 +136,6 @@ NOTIFICATION_TIMEOUT=5
# with a YAML and JSONs
TOOL_REGISTRY_CONFIG_PATH="/data/tool_registry_config"

# Flipt Service
FLIPT_SERVICE_AVAILABLE=False

# File System Configuration for Workflow and API Execution

# Directory Prefixes for storing execution files
WORKFLOW_EXECUTION_DIR_PREFIX="unstract/execution"
API_EXECUTION_DIR_PREFIX="unstract/api"

# Storage Provider for Workflow Execution
# Valid options: MINIO, S3, etc..
WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}'

# Storage Provider for API Execution
API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}'

#Remote storage related envs
PERMANENT_REMOTE_STORAGE='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "minio", "secret": "minio123"}}'
REMOTE_PROMPT_STUDIO_FILE_PATH="unstract/prompt-studio-data"

# Storage Provider for Tool registry
TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}'
Expand Down Expand Up @@ -249,3 +190,7 @@ HITL_FILES_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endp

# File active cache redis db
FILE_ACTIVE_CACHE_REDIS_DB=0

# LLMWhisperer async extraction settings
ADAPTER_LLMW_POLL_INTERVAL=30
ADAPTER_LLMW_MAX_POLLS=1000
18 changes: 17 additions & 1 deletion docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,23 @@ VERSION=dev docker compose -f docker-compose.build.yaml --profile optional build

## Docker Run

**NOTE**: First copy `sample.*.env` files to `*.env` and update as required.
**NOTE**: Before running, set up your env files:

```bash
# Run from the docker/ directory:

# 1. Common env vars shared across all services and infrastructure
# (DB, Redis, RabbitMQ, Flipt, MinIO, service URLs, worker config)
cp sample.env .env

# 2. Per-service env files (service-specific settings only)
cp ../backend/sample.env ../backend/.env
cp ../platform-service/sample.env ../platform-service/.env
cp ../prompt-service/sample.env ../prompt-service/.env
cp ../x2text-service/sample.env ../x2text-service/.env
cp ../runner/sample.env ../runner/.env
cp ../workers/sample.env ../workers/.env
```

```bash
# Up all services
Expand Down
26 changes: 22 additions & 4 deletions docker/docker-compose-dev-essentials.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@ services:
- postgres_data:/var/lib/postgresql/data/
- ./scripts/db-setup/db_setup.sh:/docker-entrypoint-initdb.d/db_setup.sh
env_file:
- ./essentials.env
- ./.env
environment:
# Map app DB_* vars to Postgres container's expected POSTGRES_* vars
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASSWORD}
POSTGRES_DB: ${DB_NAME}
POSTGRES_SCHEMA: ${DB_SCHEMA}
labels:
- traefik.enable=false

Expand Down Expand Up @@ -39,7 +45,7 @@ services:
volumes:
- minio_data:/data
env_file:
- ./essentials.env
- ./.env
command: server /data --console-address ":9001"
labels:
- traefik.enable=true
Expand Down Expand Up @@ -100,6 +106,14 @@ services:
- "9005:9000" # gRPC port
volumes:
- flipt_data:/var/opt/flipt
# https://www.flipt.io/docs/configuration/overview#environment-variables)
# https://www.flipt.io/docs/configuration/overview#configuration-parameters
env_file:
- ./.env
environment:
FLIPT_CACHE_ENABLED: true
# Flipt DB connection derived from app DB_* vars
FLIPT_DB_URL: "postgres://${DB_USER}:${DB_PASSWORD}@db:5432/${DB_NAME}?sslmode=disable"
labels:
- traefik.enable=true
- traefik.http.routers.feature-flag.rule=Host(`feature-flag.unstract.localhost`)
Expand Down Expand Up @@ -128,15 +142,19 @@ services:
labels:
- traefik.enable=false
env_file:
- ./essentials.env
- ./.env

rabbitmq:
image: rabbitmq:4.1.0-management
container_name: unstract-rabbitmq
hostname: unstract-rabbit
restart: unless-stopped
env_file:
- ./essentials.env
- ./.env
environment:
# Map app CELERY_BROKER_* vars to RabbitMQ container's expected vars
RABBITMQ_DEFAULT_USER: ${CELERY_BROKER_USER}
RABBITMQ_DEFAULT_PASS: ${CELERY_BROKER_PASS}
ports:
- "5672:5672" # AMQP port
- "15672:15672" # Management UI port
Expand Down
Loading
Loading